diff options
authorSanthosh Thottingal <>2009-10-03 12:01:24 +0530
committerSanthosh Thottingal <>2009-10-03 12:01:24 +0530
commite4c73b927e00bfaf5d797fbfadae191f6375cac0 (patch)
parent7c19f25ef4e8a18709eed13545d62c76fa299ce6 (diff)
Adding inkscape hyphenation extension
-rw-r--r--hyphenation/inkscape/inkscape-hyphenation.zipbin0 -> 61722 bytes
22 files changed, 12344 insertions, 5 deletions
diff --git a/hyphenation/inkscape/README b/hyphenation/inkscape/README
new file mode 100644
index 0000000..152287f
--- /dev/null
+++ b/hyphenation/inkscape/README
@@ -0,0 +1,24 @@
+Inkscape Hyphenation Extension
+1. How to install.?
+ Download the extension from . In GNU/Linux machines, extract the zip file and copy to /usr/share/inkscape/extensions folder. In Windows , extract to [inkscape installation directory]\extensions folder.
+After this close and reopen inkscape. You will see a menu named Hyphenate in Effects->Text menu.
+2. How to use?
+ In the document, add a text field, enter text in any indian language. Select the text and apply hyphenation by Effects->Text->Hyphenate. Then change the alignment of text to justify. You will see the text get hyphenated and occupying maximum possible space in the text field
+3. How to report a problem?
+ Mail to santhosh dot thottingal at gmail dot com
+4. What are all the supported Languages?
+ English, Hindi, Malayalam, Tamil, Telugu, Oriya, Bengali, Panjabi, Gujarati, Marathi, Kannada, Assamese
+5. What is the license of this extension?
+ This extension is licensed under GNU GPL version 3 or later version.
+6. What is hyphenation and where can I get more information about that?
+ *
+ *
diff --git a/hyphenation/inkscape/README~ b/hyphenation/inkscape/README~
new file mode 100644
index 0000000..4efe63c
--- /dev/null
+++ b/hyphenation/inkscape/README~
@@ -0,0 +1,24 @@
+Inkscape Hyphenation Extension
+1. How to install.?
+ Download the extension from . In GNU/Linux machines, extract this file to /usr/share/inkscape/extensions folder. In Windows , extract to [inkscape installation directory]\extensions folder.
+After this close and reopen inkscape. You will see a menu named Hyphenate in Effects->Text menu.
+2. How to use?
+ In the document, add a text field, enter text in any indian language. Select the text and apply hyphenation by Effects->Text->Hyphenate. Then change the alignment of text to justify. You will see the text get hyphenated and occupying maximum possible space in the text field
+3. How to report a problem?
+ Mail to santhosh dot thottingal at gmail dot com
+4. What are all the supported Languages?
+ English, Hindi, Malayalam, Tamil, Telugu, Oriya, Bengali, Panjabi, Gujarati, Marathi, Kannada, Assamese
+5. What is the license of this extension?
+ This extension is licensed under GNU GPL version 3 or later version.
+6. What is hyphenation and where can I get more information about that?
+ *
+ *
diff --git a/hyphenation/inkscape/extensions/hyphenator/ b/hyphenation/inkscape/extensions/hyphenator/
new file mode 100755
index 0000000..80e71a4
--- /dev/null
+++ b/hyphenation/inkscape/extensions/hyphenator/
@@ -0,0 +1,3 @@
+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+from hyphenator import *
diff --git a/hyphenation/inkscape/extensions/hyphenator/ b/hyphenation/inkscape/extensions/hyphenator/
new file mode 100755
index 0000000..3839853
--- /dev/null
+++ b/hyphenation/inkscape/extensions/hyphenator/
@@ -0,0 +1,249 @@
+# -*- coding: utf-8 -*-
+This is a Pure Python module to hyphenate text.
+It is inspired by Ruby's Text::Hyphen, but currently reads standard *.dic files,
+that must be installed separately.
+In the future it's maybe nice if dictionaries could be distributed together with
+this module, in a slightly prepared form, like in Ruby's Text::Hyphen.
+Wilbert Berendsen, March 2008
+License: LGPL.
+import sys
+import re
+import os
+import string
+import langdetect
+#__all__ = ("Hyphenator")
+# cache of per-file Hyph_dict objects
+hdcache = {}
+# precompile some stuff
+parse_hex = re.compile(r'\^{2}([0-9a-f]{2})').sub
+parse = re.compile(r'(\d?)(\D?)').findall
+def hexrepl(matchObj):
+ return unichr(int(, 16))
+class parse_alt(object):
+ """
+ Parse nonstandard hyphen pattern alternative.
+ The instance returns a special int with data about the current position
+ in the pattern when called with an odd value.
+ """
+ def __init__(self, pat, alt):
+ alt = alt.split(',')
+ self.change = alt[0]
+ if len(alt) > 2:
+ self.index = int(alt[1])
+ self.cut = int(alt[2]) + 1
+ else:
+ self.index = 1
+ self.cut = len(re.sub(r'[\d\.]', '', pat)) + 1
+ if pat.startswith('.'):
+ self.index += 1
+ def __call__(self, val):
+ self.index -= 1
+ val = int(val)
+ if val & 1:
+ return dint(val, (self.change, self.index, self.cut))
+ else:
+ return val
+class dint(int):
+ """
+ Just an int some other data can be stuck to in a data attribute.
+ Call with ref=other to use the data from the other dint.
+ """
+ def __new__(cls, value, data=None, ref=None):
+ obj = int.__new__(cls, value)
+ if ref and type(ref) == dint:
+ =
+ else:
+ = data
+ return obj
+class Hyph_dict(object):
+ """
+ Reads a hyph_*.dic file and stores the hyphenation patterns.
+ Parameters:
+ -filename : filename of hyph_*.dic to read
+ """
+ def __init__(self, filename):
+ self.patterns = {}
+ f = open(filename)
+ charset = f.readline().strip()
+ if charset.startswith('charset '):
+ charset = charset[8:].strip()
+ for pat in f:
+ pat = pat.decode(charset).strip()
+ if not pat or pat[0] == '%': continue
+ # replace ^^hh with the real character
+ pat = parse_hex(hexrepl, pat)
+ # read nonstandard hyphen alternatives
+ if '/' in pat:
+ pat, alt = pat.split('/', 1)
+ factory = parse_alt(pat, alt)
+ else:
+ factory = int
+ tag, value = zip(*[(s, factory(i or "0")) for i, s in parse(pat)])
+ # if only zeros, skip this pattern
+ if max(value) == 0: continue
+ # chop zeros from beginning and end, and store start offset.
+ start, end = 0, len(value)
+ while not value[start]: start += 1
+ while not value[end-1]: end -= 1
+ self.patterns[''.join(tag)] = start, value[start:end]
+ f.close()
+ self.cache = {}
+ self.maxlen = max(map(len, self.patterns.keys()))
+ def positions(self, word):
+ """
+ Returns a list of positions where the word can be hyphenated.
+ E.g. for the dutch word 'lettergrepen' this method returns
+ the list [3, 6, 9].
+ Each position is a 'data int' (dint) with a data attribute.
+ If the data attribute is not None, it contains a tuple with
+ information about nonstandard hyphenation at that point:
+ (change, index, cut)
+ change: is a string like 'ff=f', that describes how hyphenation
+ should take place.
+ index: where to substitute the change, counting from the current
+ point
+ cut: how many characters to remove while substituting the nonstandard
+ hyphenation
+ """
+ word = word.lower()
+ points = self.cache.get(word)
+ if points is None:
+ prepWord = '.%s.' % word
+ res = [0] * (len(prepWord) + 1)
+ for i in range(len(prepWord) - 1):
+ for j in range(i + 1, min(i + self.maxlen, len(prepWord)) + 1):
+ p = self.patterns.get(prepWord[i:j])
+ if p:
+ offset, value = p
+ s = slice(i + offset, i + offset + len(value))
+ res[s] = map(max, value, res[s])
+ points = [dint(i - 1, ref=r) for i, r in enumerate(res) if r % 2]
+ self.cache[word] = points
+ return points
+class Hyphenator():
+ """
+ Reads a hyph_*.dic file and stores the hyphenation patterns.
+ Provides methods to hyphenate strings in various ways.
+ Parameters:
+ -filename : filename of hyph_*.dic to read
+ -left: make the first syllabe not shorter than this
+ -right: make the last syllabe not shorter than this
+ -cache: if true (default), use a cached copy of the dic file, if possible
+ left and right may also later be changed:
+ h = Hyphenator(file)
+ h.left = 1
+ """
+ #self.left=2
+ #def __init__(self, left=2, right=2, cache=True):
+ left = 2
+ right = 2
+ def __init__(self):
+ self.template=os.path.join(os.path.dirname(__file__), 'hyphenator.html')
+ self.hd=None
+ def loadHyphDict(self,lang, cache=True):
+ filename=os.path.join(os.path.dirname(__file__),"rules/hyph_"+lang+".dic")
+ if not cache or filename not in hdcache:
+ hdcache[filename] = Hyph_dict(filename)
+ self.hd = hdcache[filename]
+ def positions(self, word):
+ """
+ Returns a list of positions where the word can be hyphenated.
+ See also Hyph_dict.positions. The points that are too far to
+ the left or right are removed.
+ """
+ right = len(word) - self.right
+ return [i for i in self.hd.positions(word) if self.left <= i <= right]
+ def iterate(self, word):
+ """
+ Iterate over all hyphenation possibilities, the longest first.
+ """
+ if isinstance(word, str):
+ word = word.decode('latin1')
+ for p in reversed(self.positions(word)):
+ if
+ # get the nonstandard hyphenation data
+ change, index, cut =
+ if word.isupper():
+ change = change.upper()
+ c1, c2 = change.split('=')
+ yield word[:p+index] + c1, c2 + word[p+index+cut:]
+ else:
+ yield word[:p], word[p:]
+ def wrap(self, word, width, hyphen='-'):
+ """
+ Return the longest possible first part and the last part of the
+ hyphenated word. The first part has the hyphen already attached.
+ Returns None, if there is no hyphenation point before width, or
+ if the word could not be hyphenated.
+ """
+ width -= len(hyphen)
+ for w1, w2 in self.iterate(word):
+ if len(w1) <= width:
+ return w1 + hyphen, w2
+ def inserted(self, word, hyphen='-'):
+ """
+ Returns the word as a string with all the possible hyphens inserted.
+ E.g. for the dutch word 'lettergrepen' this method returns
+ the string 'let-ter-gre-pen'. The hyphen string to use can be
+ given as the second parameter, that defaults to '-'.
+ """
+ if isinstance(word, str):
+ word = word.decode('latin1')
+ l = list(word)
+ for p in reversed(self.positions(word)):
+ if
+ # get the nonstandard hyphenation data
+ change, index, cut =
+ if word.isupper():
+ change = change.upper()
+ l[p + index : p + index + cut] = change.replace('=', hyphen)
+ else:
+ l.insert(p, hyphen)
+ return ''.join(l)
+ def hyphenate(self,text, hyphen="&shy;"):
+ response=""
+ words=text.split(" ")
+ lang=None
+ for word in words:
+ lang = langdetect.detect_lang(word)
+ if lang and word>"" :
+ self.loadHyphDict(lang)
+ hyph_word = self.inserted(word, hyphen)
+ response = response + hyph_word + " "
+ else :
+ response = response + word
+ return response
diff --git a/hyphenation/inkscape/extensions/hyphenator/ b/hyphenation/inkscape/extensions/hyphenator/
new file mode 100644
index 0000000..db3ba5f
--- /dev/null
+++ b/hyphenation/inkscape/extensions/hyphenator/
@@ -0,0 +1,38 @@
+# Spellchecker with language detection
+# coding: utf-8
+# Copyright © 2008 Santhosh Thottingal
+# Released under the GPLV3+ license
+import string
+def detect_lang(word):
+ if(word):
+ for punct in string.punctuation:
+ word = word.replace(punct," ")
+ length = len(word)
+ index = 0
+ while index < length:
+ letter=word[index]
+ if not letter.isalpha():
+ index=index+1
+ continue
+ if ((letter >= u'ം') & (letter <=u'൯')):
+ return "ml_IN"
+ if ((letter >= u'ঁ') & (letter <= u'৺')):
+ return "bn_IN"
+ if ((letter >= u'ँ') & (letter <= u'ॿ')):
+ return "hi_IN"
+ if ((letter >=u'ઁ') & (letter <= u'૱')):
+ return "gu_IN"
+ if ((letter >= u'ਁ') & (letter <=u'ੴ')):
+ return "pa_IN"
+ if ((letter >= u'ಂ') & (letter <=u'ೲ')):
+ return "kn_IN"
+ if ((letter >= u'ଁ') & (letter <= u'ୱ')):
+ return "or_IN"
+ if ((letter >=u'ஂ') & (letter <= u'௺')):
+ return "ta_IN"
+ if ((letter >=u'ఁ') & (letter <= u'౯')):
+ return "te_IN"
+ if ((letter <= u'z')):
+ return "en_US"
+ index=index+1
diff --git a/hyphenation/inkscape/extensions/hyphenator/rules/hyph_as_IN.dic b/hyphenation/inkscape/extensions/hyphenator/rules/hyph_as_IN.dic
new file mode 100755
index 0000000..322d905
--- /dev/null
+++ b/hyphenation/inkscape/extensions/hyphenator/rules/hyph_as_IN.dic
@@ -0,0 +1,100 @@
+% Hyphenation for Assamese
+% Copyright (C) 2008-2009 Santhosh Thottingal <>
+% This library is free software; you can redistribute it and/or
+% modify it under the terms of the GNU General Public
+% License as published by the Free Software Foundation;
+% version 3 or later version of the License.
+% This library is distributed in the hope that it will be useful,
+% but WITHOUT ANY WARRANTY; without even the implied warranty of
+% Lesser General Public License for more details.
+% You should have received a copy of the GNU General Public
+% License along with this library; if not, write to the Free Software
+% Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+% Do not break either side of ZERO-WIDTH JOINER (U+200D)
+% Break on both sides of ZERO-WIDTH NON JOINER (U+200C)
+% Break before or after any independent vowel.
+% Break after any dependent vowel, but not before.
+% Break before or after any consonant.
+% Do not break after khanda ta.
+% Do not break before chandrabindu, anusvara, visarga, avagraha,
+% nukta and au length mark.
+% Do not break either side of virama (may be within conjunct).
diff --git a/hyphenation/inkscape/extensions/hyphenator/rules/hyph_bn_IN.dic b/hyphenation/inkscape/extensions/hyphenator/rules/hyph_bn_IN.dic
new file mode 100755
index 0000000..a6842cc
--- /dev/null
+++ b/hyphenation/inkscape/extensions/hyphenator/rules/hyph_bn_IN.dic
@@ -0,0 +1,100 @@
+% Hyphenation for Bengali
+% Copyright (C) 2008-2009 Santhosh Thottingal <>
+% This library is free software; you can redistribute it and/or
+% modify it under the terms of the GNU General Public
+% License as published by the Free Software Foundation;
+% version 3 or later version of the License.
+% This library is distributed in the hope that it will be useful,
+% but WITHOUT ANY WARRANTY; without even the implied warranty of
+% Lesser General Public License for more details.
+% You should have received a copy of the GNU General Public
+% License along with this library; if not, write to the Free Software
+% Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+% Do not break either side of ZERO-WIDTH JOINER (U+200D)
+% Break on both sides of ZERO-WIDTH NON JOINER (U+200C)
+% Break before or after any independent vowel.
+% Break after any dependent vowel, but not before.
+% Break before or after any consonant.
+% Do not break after khanda ta.
+% Do not break before chandrabindu, anusvara, visarga, avagraha,
+% nukta and au length mark.
+% Do not break either side of virama (may be within conjunct).
diff --git a/hyphenation/inkscape/extensions/hyphenator/rules/hyph_en_US.dic b/hyphenation/inkscape/extensions/hyphenator/rules/hyph_en_US.dic
new file mode 100755
index 0000000..d91204b
--- /dev/null
+++ b/hyphenation/inkscape/extensions/hyphenator/rules/hyph_en_US.dic
@@ -0,0 +1,9784 @@
diff --git a/hyphenation/inkscape/extensions/hyphenator/rules/hyph_gu_IN.dic b/hyphenation/inkscape/extensions/hyphenator/rules/hyph_gu_IN.dic
new file mode 100755
index 0000000..9b0b80c
--- /dev/null
+++ b/hyphenation/inkscape/extensions/hyphenator/rules/hyph_gu_IN.dic
@@ -0,0 +1,93 @@
+% Hyphenation for Guajrati
+% Copyright (C) 2008-2009 Santhosh Thottingal <>
+% This library is free software; you can redistribute it and/or
+% modify it under the terms of the GNU General Public
+% License as published by the Free Software Foundation;
+% version 3 or later version of the License.
+% This library is distributed in the hope that it will be useful,
+% but WITHOUT ANY WARRANTY; without even the implied warranty of
+% Lesser General Public License for more details.
+% You should have received a copy of the GNU General Public
+% License along with this library; if not, write to the Free Software
+% Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+% Do not break either side of ZERO-WIDTH JOINER (U+200D)
+% Break on both sides of ZERO-WIDTH NON JOINER (U+200C)
+% Break before or after any independent vowel.
+% Break after any dependent vowel but not before.
+% Break before or after any consonant.
+% Do not break before chandrabindu, anusvara, visarga, avagraha
+% and accents.
+% Do not break either side of virama (may be within conjunct).
diff --git a/hyphenation/inkscape/extensions/hyphenator/rules/hyph_hi_IN.dic b/hyphenation/inkscape/extensions/hyphenator/rules/hyph_hi_IN.dic
new file mode 100755
index 0000000..a0c7518
--- /dev/null
+++ b/hyphenation/inkscape/extensions/hyphenator/rules/hyph_hi_IN.dic
@@ -0,0 +1,97 @@
+% Hyphenation for Hindi
+% Copyright (C) 2008-2009 Santhosh Thottingal <>
+% This library is free software; you can redistribute it and/or
+% modify it under the terms of the GNU General Public
+% License as published by the Free Software Foundation;
+% version 3 or later version of the License.
+% This library is distributed in the hope that it will be useful,
+% but WITHOUT ANY WARRANTY; without even the implied warranty of
+% Lesser General Public License for more details.
+% You should have received a copy of the GNU General Public
+% License along with this library; if not, write to the Free Software
+% Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+% Do not break either side of ZERO-WIDTH JOINER (U+200D)
+% Break on both sides of ZERO-WIDTH NON JOINER (U+200C)
+% Break before or after any independent vowel.
+% Break after any dependent vowel but not before.
+% Break before or after any consonant.
+% Do not break before chandrabindu, anusvara, visarga, avagraha
+% and accents.
+% Do not break either side of virama (may be within conjunct).
diff --git a/hyphenation/inkscape/extensions/hyphenator/rules/hyph_kn_IN.dic b/hyphenation/inkscape/extensions/hyphenator/rules/hyph_kn_IN.dic
new file mode 100755
index 0000000..fa358c3
--- /dev/null
+++ b/hyphenation/inkscape/extensions/hyphenator/rules/hyph_kn_IN.dic
@@ -0,0 +1,100 @@
+% Hyphenation for Kannada
+% Copyright (C) 2008-2009 Santhosh Thottingal <>
+% This library is free software; you can redistribute it and/or
+% modify it under the terms of the GNU General Public
+% License as published by the Free Software Foundation;
+% version 3 or later version of the License.
+% This library is distributed in the hope that it will be useful,
+% but WITHOUT ANY WARRANTY; without even the implied warranty of
+% Lesser General Public License for more details.
+% You should have received a copy of the GNU General Public
+% License along with this library; if not, write to the Free Software
+% Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+% Do not break either side of ZERO-WIDTH JOINER (U+200D)
+% Break on both sides of ZERO-WIDTH NON JOINER (U+200C)
+% Break before or after any independent vowel.
+% Break after any dependent vowel, but not before.
+% Break before or after any consonant.
+% Do not break before anusvara, visarga, avagraha,
+% length mark and ai length mark.
+% Do not break either side of virama (may be within conjunct).
diff --git a/hyphenation/inkscape/extensions/hyphenator/rules/hyph_ml_IN.dic b/hyphenation/inkscape/extensions/hyphenator/rules/hyph_ml_IN.dic
new file mode 100755
index 0000000..d33a26a
--- /dev/null
+++ b/hyphenation/inkscape/extensions/hyphenator/rules/hyph_ml_IN.dic
@@ -0,0 +1,115 @@
+% Hyphenation for Malayalam
+% Copyright (C) 2008-2009 Santhosh Thottingal <>
+% This library is free software; you can redistribute it and/or
+% modify it under the terms of the GNU General Public
+% License as published by the Free Software Foundation;
+% version 3 or later version of the License.
+% This library is distributed in the hope that it will be useful,
+% but WITHOUT ANY WARRANTY; without even the implied warranty of
+% Lesser General Public License for more details.
+% You should have received a copy of the GNU General Public
+% License along with this library; if not, write to the Free Software
+% Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+% Do not break either side of ZERO-WIDTH JOINER (U+200D)
+% Break on both sides of ZERO-WIDTH NON JOINER (U+200C)
+% Break before or after any independent vowel.
+% Break after any dependent vowel, but not before.
+% Break before or after any consonant.
+% Do not break before anusvara, visarga
+% Do not break either side of virama (may be within conjunct).
+% Do not break left side of chillu
diff --git a/hyphenation/inkscape/extensions/hyphenator/rules/hyph_mr_IN.dic b/hyphenation/inkscape/extensions/hyphenator/rules/hyph_mr_IN.dic
new file mode 100755
index 0000000..3e98bfa
--- /dev/null
+++ b/hyphenation/inkscape/extensions/hyphenator/rules/hyph_mr_IN.dic
@@ -0,0 +1,97 @@
+% Hyphenation for Marathi
+% Copyright (C) 2008-2009 Santhosh Thottingal <>
+% This library is free software; you can redistribute it and/or
+% modify it under the terms of the GNU General Public
+% License as published by the Free Software Foundation;
+% version 3 or later version of the License.
+% This library is distributed in the hope that it will be useful,
+% but WITHOUT ANY WARRANTY; without even the implied warranty of
+% Lesser General Public License for more details.
+% You should have received a copy of the GNU General Public
+% License along with this library; if not, write to the Free Software
+% Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+% Do not break either side of ZERO-WIDTH JOINER (U+200D)
+% Break on both sides of ZERO-WIDTH NON JOINER (U+200C)
+% Break before or after any independent vowel.
+% Break after any dependent vowel but not before.
+% Break before or after any consonant.
+% Do not break before chandrabindu, anusvara, visarga, avagraha
+% and accents.
+% Do not break either side of virama (may be within conjunct).
diff --git a/hyphenation/inkscape/extensions/hyphenator/rules/hyph_or_IN.dic b/hyphenation/inkscape/extensions/hyphenator/rules/hyph_or_IN.dic
new file mode 100755
index 0000000..7d73740
--- /dev/null
+++ b/hyphenation/inkscape/extensions/hyphenator/rules/hyph_or_IN.dic
@@ -0,0 +1,92 @@
+% Hyphenation for Oriya
+% Copyright (C) 2008-2009 Santhosh Thottingal <>
+% This library is free software; you can redistribute it and/or
+% modify it under the terms of the GNU General Public
+% License as published by the Free Software Foundation;
+% version 3 or later version of the License.
+% This library is distributed in the hope that it will be useful,
+% but WITHOUT ANY WARRANTY; without even the implied warranty of
+% Lesser General Public License for more details.
+% You should have received a copy of the GNU General Public
+% License along with this library; if not, write to the Free Software
+% Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+% Do not break either side of ZERO-WIDTH JOINER (U+200D)
+% Break on both sides of ZERO-WIDTH NON JOINER (U+200C)
+% Break before or after any independent vowel.
+% Break after any dependent vowel, but not before.
+% Break before or after any consonant.
+% Do not break before anusvara, visarga and length mark.
+% Do not break either side of virama (may be within conjunct).
diff --git a/hyphenation/inkscape/extensions/hyphenator/rules/hyph_pa_IN.dic b/hyphenation/inkscape/extensions/hyphenator/rules/hyph_pa_IN.dic
new file mode 100755
index 0000000..f7643c4
--- /dev/null
+++ b/hyphenation/inkscape/extensions/hyphenator/rules/hyph_pa_IN.dic
@@ -0,0 +1,88 @@
+% Hyphenation for Panjabi
+% Copyright (C) 2008-2009 Santhosh Thottingal <>
+% This library is free software; you can redistribute it and/or
+% modify it under the terms of the GNU General Public
+% License as published by the Free Software Foundation;
+% version 3 or later version of the License.
+% This library is distributed in the hope that it will be useful,
+% but WITHOUT ANY WARRANTY; without even the implied warranty of
+% Lesser General Public License for more details.
+% You should have received a copy of the GNU General Public
+% License along with this library; if not, write to the Free Software
+% Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+% Do not break either side of ZERO-WIDTH JOINER (U+200D)
+% Break on both sides of ZERO-WIDTH NON JOINER (U+200C)
+% Break before or after any independent vowel.
+% Break after any dependent vowel but not before.
+% Break before or after any consonant.
+% Do not break before chandrabindu, anusvara, visarga, avagraha
+% and accents.
+% Do not break either side of virama (may be within conjunct).
diff --git a/hyphenation/inkscape/extensions/hyphenator/rules/hyph_ta_IN.dic b/hyphenation/inkscape/extensions/hyphenator/rules/hyph_ta_IN.dic
new file mode 100755
index 0000000..65bf2fa
--- /dev/null
+++ b/hyphenation/inkscape/extensions/hyphenator/rules/hyph_ta_IN.dic
@@ -0,0 +1,76 @@
+% Hyphenation for Tamil
+% Copyright (C) 2008-2009 Santhosh Thottingal <>
+% This library is free software; you can redistribute it and/or
+% modify it under the terms of the GNU General Public
+% License as published by the Free Software Foundation;
+% version 3 or later version of the License.
+% This library is distributed in the hope that it will be useful,
+% but WITHOUT ANY WARRANTY; without even the implied warranty of
+% Lesser General Public License for more details.
+% You should have received a copy of the GNU General Public
+% License along with this library; if not, write to the Free Software
+% Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+% Do not break either side of ZERO-WIDTH JOINER (U+200D)
+% Break on both sides of ZERO-WIDTH NON JOINER (U+200C)
+% Break before or after any independent vowel.
+% Break after any dependent vowel, but not before.
+% Break before or after any consonant.
+% Do not break before anusvara, visarga and length mark.
+% Do not break either side of virama (may be within conjunct).
diff --git a/hyphenation/inkscape/extensions/hyphenator/rules/hyph_te_IN.dic b/hyphenation/inkscape/extensions/hyphenator/rules/hyph_te_IN.dic
new file mode 100755
index 0000000..ef3ca2e
--- /dev/null
+++ b/hyphenation/inkscape/extensions/hyphenator/rules/hyph_te_IN.dic
@@ -0,0 +1,99 @@
+% Hyphenation for Telugu
+% Copyright (C) 2008-2009 Santhosh Thottingal <>
+% This library is free software; you can redistribute it and/or
+% modify it under the terms of the GNU General Public
+% License as published by the Free Software Foundation;
+% version 3 or later version of the License.
+% This library is distributed in the hope that it will be useful,
+% but WITHOUT ANY WARRANTY; without even the implied warranty of
+% Lesser General Public License for more details.
+% You should have received a copy of the GNU General Public
+% License along with this library; if not, write to the Free Software
+% Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+% Do not break either side of ZERO-WIDTH JOINER (U+200D)
+% Break on both sides of ZERO-WIDTH NON JOINER (U+200C)
+% Break before or after any independent vowel.
+% Break after any dependent vowel, but not before.
+% Break before or after any consonant.
+% Do not break before chandrabindu, anusvara, visarga,
+% length mark and ai length mark.
+% Do not break either side of virama (may be within conjunct).
diff --git a/hyphenation/inkscape/extensions/text_hyphenate.inx b/hyphenation/inkscape/extensions/text_hyphenate.inx
new file mode 100644
index 0000000..1250d0c
--- /dev/null
+++ b/hyphenation/inkscape/extensions/text_hyphenate.inx
@@ -0,0 +1,15 @@
+ <_name>Hyphenate</_name>
+ <id>org.inkscape.text.hyphenate</id>
+ <dependency type="executable" location="extensions"></dependency>
+ <dependency type="executable" location="extensions"></dependency>
+ <effect>
+ <object-type>all</object-type>
+ <effects-menu>
+ <submenu _name="Text"/>
+ </effects-menu>
+ </effect>
+ <script>
+ <command reldir="extensions" interpreter="python"></command>
+ </script>
diff --git a/hyphenation/inkscape/extensions/ b/hyphenation/inkscape/extensions/
new file mode 100644
index 0000000..2e7e10f
--- /dev/null
+++ b/hyphenation/inkscape/extensions/
@@ -0,0 +1,9 @@
+import chardataeffect, inkex, string
+from hyphenator import Hyphenator
+class C(chardataeffect.CharDataEffect):
+ def process_chardata(self,text, line=False, par=False):
+ # insert softhyphens
+ return Hyphenator().hyphenate(text, u'\u00AD')
+c = C()
diff --git a/hyphenation/inkscape/ b/hyphenation/inkscape/
new file mode 100644
index 0000000..eebe76d
--- /dev/null
+++ b/hyphenation/inkscape/
Binary files differ
diff --git a/hyphenation/tests/malayalam-out.txt b/hyphenation/tests/malayalam-out.txt
index acf60f9..9fdc1af 100644
--- a/hyphenation/tests/malayalam-out.txt
+++ b/hyphenation/tests/malayalam-out.txt
@@ -1,4 +1,3 @@
@@ -227,5 +226,462 @@
diff --git a/hyphenation/tests/malayalam.txt b/hyphenation/tests/malayalam.txt
index 6c5804c..8a59416 100644
--- a/hyphenation/tests/malayalam.txt
+++ b/hyphenation/tests/malayalam.txt
@@ -1,7 +1,687 @@
- അമേരിക്കന്‍ ഐക്യനാടുകളിലെ നഗരങ്ങളില്‍ വച്ച് വലിപ്പത്തില്‍ നാലാം സ്ഥാനത്തുള്ളതും ടെക്സസ് സംസ്ഥാനത്തിലെ ഏറ്റവും വലിയ നഗരവുമാണ്‌ ഹ്യൂസ്റ്റണ്‍ (ഉച്ചാരണം /ˈhjuːstən/). 2006ലെ കണക്കെടുപ്പുപ്രകാരം ഈ നഗരത്തില്‍ 600 ചതുരശ്രമൈല്‍ (1,600 കി.മീ²). പ്രദേശത്ത് 2.14 ദശലക്ഷം ആളുകള്‍ വസിക്കുന്നു. ഹാരിസ് കൗണ്ടിയുടെ ആസ്ഥാനവും 5.6 ദശലക്ഷം ജനങ്ങള്‍ വസിക്കുന്നതും അമേരിക്കയിലെ ഏറ്റവും വലിയ ആറാമത്തെ മഹാനഗര (മെട്രോപ്പോളിറ്റന്‍) പ്രദേശവുമായ ഹ്യൂസ്റ്റണ്‍–ഷുഗര്‍ലാന്‍ഡ്–ബേടൗണ്‍ മെട്രോപ്പോളീറ്റന്‍ പ്രദേശത്തിന്റെ സാമ്പത്തിക കേന്ദ്രവുമാണ്‌ ഹ്യൂസ്റ്റണ്‍.
-1836 ഓഗസ്റ്റ് 30ന്‌ സഹോദരന്മാരായ അഗസ്റ്റസ് ചാപ്പ്മാന്‍ അല്ലെനും ജോണ്‍ കിര്‍ബി അല്ലെനും [5] ബഫല്ലോ ബയൂവിന്റെ തീരപ്രദേശങ്ങളില്‍ ഹ്യൂസ്റ്റന്‍ സ്ഥാപിച്ചു. 1837 ജൂണ്‍ 5ന്‌ ഇതൊരു നഗരമായി ഇന്‍കോര്‍പ്പറേറ്റ് ചെയ്തു. ഈ അവസരത്തില്‍ അന്നത്തെ ടെക്സസ് റിപ്പബ്ലിക്കിന്റെ പ്രസിഡന്റും നഗരത്തിനു 25 മൈല്‍ (40 കി.മീ) കിഴക്കായി നടന്ന ജസീന്തോ യുദ്ധം നയിച്ച മുന്‍ ജനറലുമായ സാം ഹ്യൂസ്റ്റന്റെ നാമം നഗരത്തിനു നല്‍കുകയായിരുന്നു. അടിക്കടി വികാസം പ്രാപിച്ചുകൊണ്ടിരുന്ന തുറമുഖ, റെയില്‍ വ്യവസായവും 1901-ലെ എണ്ണ നിക്ഷേപങ്ങള്‍ കണ്ടെത്തിയതും നഗരത്തില്‍ ജനസംഖ്യാപ്രവാഹത്തിനു വഴിതെളിച്ചു. ഇരുപതാം നൂറ്റാണ്ടിന്റെ മദ്ധ്യത്തോടുകൂടി ലോകത്തെ ഏറ്റവും വലിയ രണ്ടാമത്തെ ആരോഗ്യസം‌രക്ഷണ-ഗവേഷണ സ്ഥാപനങ്ങളുടെ കൂട്ടായ്മയായ ടെക്സസ് മെഡിക്കല്‍ സെന്റര്‍, നാസയുടെ മിഷന്‍ കണ്ട്രോള്‍ സെന്റര്‍ ജോണ്‍സണ്‍ സ്പേസ് സെന്റര്‍ എന്നിവ ഹ്യൂസ്റ്റണില്‍ സ്ഥാപിക്കപ്പെട്ടു.
-ഹ്യൂസ്റ്റന്റെ സമ്പദ്‌വ്യവസ്ഥ, ഊര്‍ജ്ജ, നിര്‍മ്മാണ, വ്യോമനിര്‍മ്മാണ, സാങ്കേതികത തുടങ്ങിയ മേഖലകളിലുള്ള വിവിധതരം വ്യവസായങ്ങളില്‍ അധിഷ്ഠിതമാണ്‌; ഹ്യൂസണിലുള്ളതിനേക്കാള്‍ ഫോര്‍ച്ച്യൂണ്‍ 500 കമ്പനികള്‍ ന്യൂയോര്‍ക്കില്‍ മാത്രമാണുള്ളത്. വാണിജ്യപരമായി, ഹ്യൂസ്റ്റണ്‍, ഗാമാ വേള്‍ഡ് സിറ്റി എന്ന പേരില്‍ അറിയപ്പെടുന്നു. എണ്ണപ്പാടത്ത് ഉപയോഗിക്കുന്ന ഉപകരണങ്ങള്‍ നിര്‍മിക്കുന്നതിനുള്ള ഒരു പ്രധാന കേന്ദ്രവുമാണ്‌ ഇത്. അമേരിക്കന്‍ ഐക്യനാടുകളിലെ തുറമുഖളില്‍ വച്ച്, കൈകാര്യം ചെയ്യുന്ന ചരക്കിന്റെ ഭാരത്തിന്റെ മൊത്തക്കണക്കെടുത്താല്‍ ഏറ്റവുമധികം ടണ്‍ കൈകാര്യം ചെയ്യുന്ന രണ്ടാമത്തെ തുറമുഖവും, ജലമാര്‍ഗ്ഗമുള്ള അന്താരാഷ്ട്രകാര്‍ഗോ ഏറ്റവുമധികം കൈകാര്യം ചെയ്യുന്ന തുറമുഖവുമാണ്‌ ഹ്യൂസ്റ്റണ്‍ തുറമുഖം.[6] അനേകം സംസ്കാരങ്ങളില്‍നിന്നുള്ള ജനങ്ങളുള്ള ഈ നഗരം അനുദിനം വളരുന്ന ഒരു അന്താരാഷ്ട്ര സമൂഹത്തിനും വേദിയാണ്‌. ഇവിടെയുള്ള സാംസ്കാരിക സ്ഥാപനങ്ങള്‍ വര്‍ഷംതോറും 7 ദശലക്ഷം സന്ദര്‍ശകരെ ഹ്യൂസ്റ്റണ്‍ മ്യൂസിക് ഡിസ്ട്രിക്റ്റിലേക്ക് ആകര്‍ഷിക്കുന്നു. ദൃശ്യകലകള്‍ക്കും പ്രകടനകലകള്‍ക്കുമുള്ള ഒരു സജീവവേദി മ്യൂസിക് ഡിസ്ട്രിക്റ്റിലുണ്ട്. വര്‍ഷം മുഴുവന്‍ പ്രധാന പ്രകടനകലകളിലെല്ലാം പ്രദര്‍ശനം നടത്തുന്ന ചുരുക്കം ചില അമേരിക്കന്‍ നഗരങ്ങളിലൊന്നാണ്‌ ഹ്യൂസ്റ്റണ്‍.[7]