summaryrefslogtreecommitdiffstats
path: root/silpa/modules/transliterator
diff options
context:
space:
mode:
authorSanthosh Thottingal <santhosh.thottingal@gmail.com>2009-03-24 21:53:02 +0530
committerSanthosh Thottingal <santhosh.thottingal@gmail.com>2009-03-24 21:53:02 +0530
commit925ba08ecb3ff12496d343a1a0a99daf9a32ad3d (patch)
tree91aff87b0818e4e861a0ad2c6843d2286cb3775b /silpa/modules/transliterator
parent886b09e5d997af17d1b0a9c7fad6e952a94bed45 (diff)
downloadRachana.git-925ba08ecb3ff12496d343a1a0a99daf9a32ad3d.tar.gz
Rachana.git-925ba08ecb3ff12496d343a1a0a99daf9a32ad3d.tar.xz
Rachana.git-925ba08ecb3ff12496d343a1a0a99daf9a32ad3d.zip
Adding Silpa framework
Diffstat (limited to 'silpa/modules/transliterator')
-rw-r--r--silpa/modules/transliterator/transliterate.py93
1 files changed, 93 insertions, 0 deletions
diff --git a/silpa/modules/transliterator/transliterate.py b/silpa/modules/transliterator/transliterate.py
new file mode 100644
index 0000000..6603b27
--- /dev/null
+++ b/silpa/modules/transliterator/transliterate.py
@@ -0,0 +1,93 @@
+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+# Paralperu
+# Copyright 2008 Santhosh Thottingal <santhosh.thottingal@gmail.com>
+# http://www.smc.org.in
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+#
+# If you find any bugs or have any suggestions email: santhosh.thottingal@gmail.com
+# URL: http://www.smc.org.in
+from langdetect import LangDetect
+class Transliterator:
+ def transliterate(self,text, target_lang_code):
+ ld=LangDetect()
+ tx_str=""
+ words=text.split(" ")
+ for word in words:
+ src_lang_code= ld.detect_lang(word)
+ tx_str = tx_str
+ for chr in word:
+ offset=ord(chr) + self.getOffset(src_lang_code, target_lang_code)
+ if(offset>0):
+ tx_str=tx_str + unichr (offset)
+ tx_str=tx_str + " "
+ return tx_str
+ def getOffset(self,src,target):
+ hi_IN = 0x0901
+ bn_IN = 0x0981
+ pa_IN = 0x0A01
+ gu_IN = 0x0A81
+ or_IN = 0x0B01
+ ta_IN = 0x0B81
+ te_IN = 0x0C01
+ ka_IN = 0x0C81
+ ml_IN = 0x0D01
+ src_id=0
+ target_id=0
+ if(src=="en_US"):
+ return 0
+ if(src=="hi_IN"):
+ src_id=hi_IN
+ if(src=="bn_IN"):
+ src_id=bn_IN
+ if(src=="pa_IN"):
+ src_id=pa_IN
+ if(src=="gu_IN"):
+ src_id=gu_IN
+ if(src=="or_IN"):
+ src_id=or_IN
+ if(src=="ta_IN"):
+ src_id=ta_IN
+ if(src=="te_IN"):
+ src_id=te_IN
+ if(src=="ka_IN"):
+ src_id=ka_IN
+ if(src=="ml_IN"):
+ src_id=ml_IN
+ if(target=="hi_IN"):
+ target_id=hi_IN
+ if(target=="bn_IN"):
+ target_id=bn_IN
+ if(target=="pa_IN"):
+ target_id=pa_IN
+ if(target=="gu_IN"):
+ target_id=gu_IN
+ if(target=="or_IN"):
+ target_id=or_IN
+ if(target=="ta_IN"):
+ target_id=ta_IN
+ if(target=="te_IN"):
+ target_id=te_IN
+ if(target=="ka_IN"):
+ target_id=ka_IN
+ if(target=="ml_IN"):
+ target_id=ml_IN
+ if(src=="Unknown"):
+ return 0
+ return (target_id - src_id)
+if __name__ == "__main__":
+ t=Transliterator ()
+ print t.transliterate (u"കരയുന്നോ കരയുന്നോ?" , "ta_IN")