diff options
Diffstat (limited to 'silpa/modules/transliterator/transliterate.py')
-rw-r--r-- | silpa/modules/transliterator/transliterate.py | 51 |
1 files changed, 48 insertions, 3 deletions
diff --git a/silpa/modules/transliterator/transliterate.py b/silpa/modules/transliterator/transliterate.py index 206a6bf..1b534cf 100644 --- a/silpa/modules/transliterator/transliterate.py +++ b/silpa/modules/transliterator/transliterate.py @@ -1,6 +1,6 @@ #! /usr/bin/env python # -*- coding: utf-8 -*- -# Paralperu +# Any Indian Language to any other Indian language transliterator # Copyright 2008 Santhosh Thottingal <santhosh.thottingal@gmail.com> # http://www.smc.org.in # @@ -23,6 +23,48 @@ from common import * class Transliterator(SilpaModule): + def transliterate_ml_en(self, word): + virama=u"്" + #TODO: how to make this more generic so that more languages can be handled here? + #idea1: transliterate any langauge to a common language say hindi and the n do conversion? + #existing transliterate.py can be used? + #idea2: Have dictionaries for each language like english_xx_dict ? + #TODO: complete this + english_ml_dict={u'അ':'a',u'ആ':'a',u'ഇ':'a',u'ഈ':'a',u'ഉ':'a',u'ഊ':'a',u'ഋ':'a',\ + u'എ':'a',u'ഏ':'a',u'ഐ':'a',u'ഒ':'a',u'ഓ':'a',u'ഔ':'a',\ + u'ക':'k',u'ഖ':'kh',u'ഗ':'g',u'ഘ':'gh',u'ങ്ങ':'ng',\ + u'ച':'ch',u'ഛ':'chh',u'ജ':'j',u'ഝ':'jhh',u'ഞ':'nj',\ + u'ട':'t',u'ഠ':'th',u'ഡ':'d',u'ഢ':'dh',u'ണ':'n',\ + u'ത':'th',u'ഥ':'th',u'ദ':'d',u'ധ':'dh',u'ന':'n',\ + u'പ':'p',u'ഫ':'ph',u'ബ':'b',u'ഭ':'bh',u'മ':'m',\ + u'യ':'y',u'ര':'r',u'ല':'l', u'വ':'v', u'റ':'r',\ + u'ശ':'sa',u'ഷ':'sh',u'സ':'s', u'ഹ':'h',u'ള':'l',u'ഴ':'zh',\ + u'ാ':'a',u'ി':'i' ,u'ീ':'ee' ,u'ു':'u',\ + u'ൂ':'uu',u'ൃ':'ri' ,u'െ':'e' ,u'േ':'e',\ + u'ൈ':'ai',u'ൊ':'o' ,u'ോ':'oo' ,u'ൗ':'au'} + word_length =len(word) + index=0 + tx_string="" + while index<word_length: + a_vowel="" + try: + if(index+1<word_length): + if(word[index+1]==virama): + a_vowel="" + else: + if(index+1<word_length): + if (english_ml_dict[word[index+1]] in ['a','e','i','o','u']): + a_vowel="" + else: + a_vowel="a" + if (english_ml_dict[word[index]] in ['a','e','i','o','u']): + a_vowel="" + tx_string=tx_string+ english_ml_dict[word[index]] + a_vowel + except: + tx_string=tx_string+ word[index] + index=index+1 + return tx_string + def transliterate(self,text, target_lang_code): mm=ModuleManager() ld = mm.getModuleInstance("Detect Language") @@ -31,7 +73,9 @@ class Transliterator(SilpaModule): for word in words: if(word.strip()>""): src_lang_code=ld.detect_lang(word)[word] - tx_str = tx_str + if((target_lang_code=="en_US") and (src_lang_code=="ml_IN")): + tx_str=tx_str + self.transliterate_ml_en(word) + continue for chr in word: offset=ord(chr) + self.getOffset(src_lang_code, target_lang_code) if(offset>0): @@ -60,7 +104,7 @@ class Transliterator(SilpaModule): You can give the text in any language and even with mixed language </p> <form action="" method="post"> - <textarea cols='100' rows='25' name='input_text' id='id1'>%s</textarea></br> + <textarea name='input_text' id='id1'>%s</textarea></br> <select id="trans-lang" name="trans-lang" style="width:12em;"> <option value="hi_IN">Hindi</option> <option value="ml_IN">Malayalam</option> @@ -71,6 +115,7 @@ class Transliterator(SilpaModule): <option value="gu_IN">Gujarai</option> <option value="pa_IN">Panjabi</option> <option value="ka_IN">Kannada</option> + <option value="en_US">English</option> </select> <input type="submit" id="Transliterate" value="Transliterate" name="action" style="width:12em;"/> <input type="reset" value="Clear" style="width:12em;"/> |