silpa/modules/transliterator/transliterate.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103

#! /usr/bin/env python
# -*- coding: utf-8 -*-
# Paralperu
# Copyright 2008 Santhosh Thottingal <santhosh.thottingal@gmail.com>
# http://www.smc.org.in
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Library General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
# If you find any bugs or have any suggestions email: santhosh.thottingal@gmail.com
# URL: http://www.smc.org.in

from common import *
class Transliterator(SilpaModule):
	def transliterate(self,text, target_lang_code):
		mm=ModuleManager()
		ld = mm.getModuleInstance("Detect Language")
		tx_str=""
		words=text.split(" ")
		for word in words:
			if(word.strip()>""):
				src_lang_code=ld.detect_lang(word)[word]
				tx_str = tx_str
				for chr in word:
					offset=ord(chr) + self.getOffset(src_lang_code, target_lang_code) 
					if(offset>0):
						tx_str=tx_str + unichr (offset) 
				tx_str=tx_str	+ " "
			else:
				tx_str=tx_str	+ word
		return 	tx_str

	def getOffset(self,src,target):
		lang_bases={'en_US':0,'hi_IN': 0x0901,'bn_IN': 0x0981, 'pa_IN':0x0A01,'gu_IN':0x0A81 , 'or_IN': 0x0B01,'ta_IN': 0x0B81,'te_IN' : 0x0C01,	'ka_IN' :0x0C81	,'ml_IN': 0x0D01}
		src_id=0
		target_id=0
		try:
			src_id=lang_bases[src]
			target_id=lang_bases[target]
			return (target_id - src_id)
		except:
			return 0	

	def process(self, form):
		response = """
		<h2>Transliterator</h2></hr>
		<p>Enter the text for transliteration in the below text area.
		 Language of each  word will be detected. 
		 You can give the text in any language and even with mixed language
		</p>
		<form action="" method="post">
		<textarea cols='100' rows='25' name='input_text' id='id1'>%s</textarea></br> 
		<select id="trans-lang" name="trans-lang" style="width:12em;">
		  <option value="hi_IN">Hindi</option>
		  <option value="ml_IN">Malayalam</option>
		  <option value="bn_IN">Bengali</option>
		  <option value="ta_IN">Tamil</option>
		  <option value="te_IN">Telugu</option>
		  <option value="or_IN">Oriya</option>
		  <option value="gu_IN">Gujarai</option>
		  <option value="pa_IN">Panjabi</option>
		  <option value="ka_IN">Kannada</option>
		</select>
		<input  type="submit" id="Transliterate" value="Transliterate"  name="action" style="width:12em;"/>
		<input type="reset" value="Clear" style="width:12em;"/>
		</br>
		</form>
		"""
		if(form.has_key('input_text')):
			text = form['input_text'].value.decode('utf-8')
			target_lang = form['trans-lang'].value.decode('utf-8')
			response=response % text
			response = response+"<h2>Transliterated Text</h2></hr>"
			result = self.transliterate(text,target_lang)
			result = result.replace('\n', '<br/>')
			response = response+result
		else:
			response=response % ""	
		return response		
	def get_module_name(self):
		return "Transliterator"
	def get_info(self):
		return 	"Transliterated the text between any Indian Language"	
		
		
def getInstance():
	return Transliterator()		
		
						
if __name__ == "__main__":
	t=Transliterator () 
	print t.transliterate (u"കരയുന്നോ  കരയുന്നോ?" , "ta_IN")