summaryrefslogtreecommitdiffstats
path: root/silpa/modules/syllabalizer/syllabalizer.py
diff options
context:
space:
mode:
Diffstat (limited to 'silpa/modules/syllabalizer/syllabalizer.py')
-rwxr-xr-x[-rw-r--r--]silpa/modules/syllabalizer/syllabalizer.py58
1 files changed, 57 insertions, 1 deletions
diff --git a/silpa/modules/syllabalizer/syllabalizer.py b/silpa/modules/syllabalizer/syllabalizer.py
index 706ee77..39c140f 100644..100755
--- a/silpa/modules/syllabalizer/syllabalizer.py
+++ b/silpa/modules/syllabalizer/syllabalizer.py
@@ -51,6 +51,54 @@ class Syllabalizer(SilpaModule):
lst_chars.append(char)
return lst_chars
+ def syllabalize_kn(self,text):
+ signs = [
+ u'\u0c82', u'\u0c83', u'\u0cbd', u'\u0cbe', u'\u0cbf', u'\u0cc0', u'\u0cc1',
+ u'\u0cc2', u'\u0cc3', u'\u0cc4', u'\u0cc6', u'\u0cc7', u'\u0cc8',
+ u'\u0cca', u'\u0ccb', u'\u0ccc', u'\u0ccd']
+ limiters = ['.','\"','\'','`','!',';',',','?']
+
+ halant = u'\u0ccd'
+ lst_chars = []
+ for char in text:
+ if char in limiters:
+ lst_chars.append(char)
+ elif char in signs:
+ lst_chars[-1] = lst_chars[-1] + char
+ else:
+ try:
+ if lst_chars[-1][-1] == halant:
+ lst_chars[-1] = lst_chars[-1] + char
+ else:
+ lst_chars.append(char)
+ except IndexError:
+ lst_chars.append(char)
+
+ return lst_chars
+ def syllabalize_bn(self,text):
+ signs = [
+ u'\u0981', u'\u0982', u'\u0983', u'\u09bd', u'\u09be', u'\u09bf', u'\u09c0', u'\u09c1',
+ u'\u09c2', u'\u09c3', u'\u09c4', u'\u09c6', u'\u09c7', u'\u09c8',
+ u'\u09ca', u'\u09cb', u'\u09cc', u'\u09cd', u'\u09d7']
+ limiters = ['.','\"','\'','`','!',';',',','?']
+
+ halant = u'\u09cd'
+ lst_chars = []
+ for char in text:
+ if char in limiters:
+ lst_chars.append(char)
+ elif char in signs:
+ lst_chars[-1] = lst_chars[-1] + char
+ else:
+ try:
+ if lst_chars[-1][-1] == halant:
+ lst_chars[-1] = lst_chars[-1] + char
+ else:
+ lst_chars.append(char)
+ except IndexError:
+ lst_chars.append(char)
+
+ return lst_chars
def syllabalize_hi(self,text):
signs = [
u'\u0902', u'\u0903', u'\u093e', u'\u093f', u'\u0940', u'\u0941',
@@ -176,11 +224,19 @@ class Syllabalizer(SilpaModule):
def syllabalize(self,text):
mm=ModuleManager()
ld = mm.getModuleInstance("Detect Language")
- lang=ld.detect_lang(text)[text]
+ lang = None
+ try:
+ lang=ld.detect_lang(text)[text]
+ except:
+ pass #FIXME
if(lang=="ml_IN"):
return self.syllabalize_ml(text)
if(lang=="hi_IN"):
return self.syllabalize_hi(text)
+ if(lang=="kn_IN"):
+ return self.syllabalize_kn(text)
+ if(lang=="bn_IN"):
+ return self.syllabalize_bn(text)
if(lang=="en_US"):
return self.syllabalize_en(text)
lst_chars=[]