summaryrefslogtreecommitdiffstats
path: root/silpa/modules/transliterator/transliterate.py
diff options
context:
space:
mode:
Diffstat (limited to 'silpa/modules/transliterator/transliterate.py')
-rw-r--r--silpa/modules/transliterator/transliterate.py51
1 files changed, 48 insertions, 3 deletions
diff --git a/silpa/modules/transliterator/transliterate.py b/silpa/modules/transliterator/transliterate.py
index 206a6bf..1b534cf 100644
--- a/silpa/modules/transliterator/transliterate.py
+++ b/silpa/modules/transliterator/transliterate.py
@@ -1,6 +1,6 @@
#! /usr/bin/env python
# -*- coding: utf-8 -*-
-# Paralperu
+# Any Indian Language to any other Indian language transliterator
# Copyright 2008 Santhosh Thottingal <santhosh.thottingal@gmail.com>
# http://www.smc.org.in
#
@@ -23,6 +23,48 @@
from common import *
class Transliterator(SilpaModule):
+ def transliterate_ml_en(self, word):
+ virama=u"്"
+ #TODO: how to make this more generic so that more languages can be handled here?
+ #idea1: transliterate any langauge to a common language say hindi and the n do conversion?
+ #existing transliterate.py can be used?
+ #idea2: Have dictionaries for each language like english_xx_dict ?
+ #TODO: complete this
+ english_ml_dict={u'അ':'a',u'ആ':'a',u'ഇ':'a',u'ഈ':'a',u'ഉ':'a',u'ഊ':'a',u'ഋ':'a',\
+ u'എ':'a',u'ഏ':'a',u'ഐ':'a',u'ഒ':'a',u'ഓ':'a',u'ഔ':'a',\
+ u'ക':'k',u'ഖ':'kh',u'ഗ':'g',u'ഘ':'gh',u'ങ്ങ':'ng',\
+ u'ച':'ch',u'ഛ':'chh',u'ജ':'j',u'ഝ':'jhh',u'ഞ':'nj',\
+ u'ട':'t',u'ഠ':'th',u'ഡ':'d',u'ഢ':'dh',u'ണ':'n',\
+ u'ത':'th',u'ഥ':'th',u'ദ':'d',u'ധ':'dh',u'ന':'n',\
+ u'പ':'p',u'ഫ':'ph',u'ബ':'b',u'ഭ':'bh',u'മ':'m',\
+ u'യ':'y',u'ര':'r',u'ല':'l', u'വ':'v', u'റ':'r',\
+ u'ശ':'sa',u'ഷ':'sh',u'സ':'s', u'ഹ':'h',u'ള':'l',u'ഴ':'zh',\
+ u'ാ':'a',u'ി':'i' ,u'ീ':'ee' ,u'ു':'u',\
+ u'ൂ':'uu',u'ൃ':'ri' ,u'െ':'e' ,u'േ':'e',\
+ u'ൈ':'ai',u'ൊ':'o' ,u'ോ':'oo' ,u'ൗ':'au'}
+ word_length =len(word)
+ index=0
+ tx_string=""
+ while index<word_length:
+ a_vowel=""
+ try:
+ if(index+1<word_length):
+ if(word[index+1]==virama):
+ a_vowel=""
+ else:
+ if(index+1<word_length):
+ if (english_ml_dict[word[index+1]] in ['a','e','i','o','u']):
+ a_vowel=""
+ else:
+ a_vowel="a"
+ if (english_ml_dict[word[index]] in ['a','e','i','o','u']):
+ a_vowel=""
+ tx_string=tx_string+ english_ml_dict[word[index]] + a_vowel
+ except:
+ tx_string=tx_string+ word[index]
+ index=index+1
+ return tx_string
+
def transliterate(self,text, target_lang_code):
mm=ModuleManager()
ld = mm.getModuleInstance("Detect Language")
@@ -31,7 +73,9 @@ class Transliterator(SilpaModule):
for word in words:
if(word.strip()>""):
src_lang_code=ld.detect_lang(word)[word]
- tx_str = tx_str
+ if((target_lang_code=="en_US") and (src_lang_code=="ml_IN")):
+ tx_str=tx_str + self.transliterate_ml_en(word)
+ continue
for chr in word:
offset=ord(chr) + self.getOffset(src_lang_code, target_lang_code)
if(offset>0):
@@ -60,7 +104,7 @@ class Transliterator(SilpaModule):
You can give the text in any language and even with mixed language
</p>
<form action="" method="post">
- <textarea cols='100' rows='25' name='input_text' id='id1'>%s</textarea></br>
+ <textarea name='input_text' id='id1'>%s</textarea></br>
<select id="trans-lang" name="trans-lang" style="width:12em;">
<option value="hi_IN">Hindi</option>
<option value="ml_IN">Malayalam</option>
@@ -71,6 +115,7 @@ class Transliterator(SilpaModule):
<option value="gu_IN">Gujarai</option>
<option value="pa_IN">Panjabi</option>
<option value="ka_IN">Kannada</option>
+ <option value="en_US">English</option>
</select>
<input type="submit" id="Transliterate" value="Transliterate" name="action" style="width:12em;"/>
<input type="reset" value="Clear" style="width:12em;"/>