Adding Silpa framework

author: Santhosh Thottingal <santhosh.thottingal@gmail.com> 2009-03-24 21:53:02 +0530
committer: Santhosh Thottingal <santhosh.thottingal@gmail.com> 2009-03-24 21:53:02 +0530
commit: 925ba08ecb3ff12496d343a1a0a99daf9a32ad3d (patch)
tree: 91aff87b0818e4e861a0ad2c6843d2286cb3775b /silpa/modules
parent: 886b09e5d997af17d1b0a9c7fad6e952a94bed45 (diff)
download: AnjaliOldLipi.git-925ba08ecb3ff12496d343a1a0a99daf9a32ad3d.tar.gz
AnjaliOldLipi.git-925ba08ecb3ff12496d343a1a0a99daf9a32ad3d.tar.xz
AnjaliOldLipi.git-925ba08ecb3ff12496d343a1a0a99daf9a32ad3d.zip
15 files changed, 1865 insertions, 0 deletions
diff --git a/silpa/modules/__init__.py b/silpa/modules/__init__.py
new file mode 100644
index 0000000..a436dc9
--- /dev/null
+++ b/silpa/modules/__init__.py
@@ -0,0 +1,3 @@
+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+from  lemmatizer import *
diff --git a/silpa/modules/fortune/fortune.py b/silpa/modules/fortune/fortune.py
new file mode 100644
index 0000000..c6340c3
--- /dev/null
+++ b/silpa/modules/fortune/fortune.py
@@ -0,0 +1,16 @@
+#  Spellchecker with language detection
+#  coding: utf-8
+#
+#  Copyright © 2008  Santhosh Thottingal
+#  Released under the GPLV3+ license
+
+import commands
+class Fortune:
+	def fortune(self, word):
+		if(word):
+			command = "/usr/games/fortune -m" + word
+			return commands.getoutput(command)
+		else:
+			command = "/usr/games/fortune"
+			return commands.getoutput(command)
+
diff --git a/silpa/modules/hyphenator/hyphenator.py b/silpa/modules/hyphenator/hyphenator.py
new file mode 100644
index 0000000..9efe8de
--- /dev/null
+++ b/silpa/modules/hyphenator/hyphenator.py
@@ -0,0 +1,35 @@
+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+# Malayalam Rule Based Normalizer
+# Copyright 2008 Santhosh Thottingal <santhosh.thottingal@gmail.com>, 
+# http://www.smc.org.in
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Library General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+#
+# If you find any bugs or have any suggestions email: santhosh.thottingal@gmail.com
+# URL: http://www.smc.org.in
+
+ 
+import string
+ 
+class Hyphenator:
+
+	def __init__(self):
+		self.langauge =""
+		
+	def Normalize(self, word):
+		
+		return word
+		
diff --git a/silpa/modules/lemmatizer/__init__.py b/silpa/modules/lemmatizer/__init__.py
new file mode 100644
index 0000000..e1f7e6e
--- /dev/null
+++ b/silpa/modules/lemmatizer/__init__.py
@@ -0,0 +1,2 @@
+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
diff --git a/silpa/modules/lemmatizer/lemmatizer.py b/silpa/modules/lemmatizer/lemmatizer.py
new file mode 100644
index 0000000..8b9a35c
--- /dev/null
+++ b/silpa/modules/lemmatizer/lemmatizer.py
@@ -0,0 +1,132 @@
+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import sys  
+import codecs  
+import os  
+import string
+import curses.ascii 
+class Lemmatizer:
+
+	def __init__(self):
+		self.input_filename =""
+		self.output_filename = ""
+		self.rules_file = ""
+		self.rulesDict = dict()
+		
+	def Lemmatize(self):
+		result = ""
+		self.rulesDict = self.LoadRules()
+		if self.input_filename :
+			uni_file = codecs.open(self.input_filename, encoding = 'utf-8', errors = 'ignore')
+		else :
+			uni_file = codecs.open(sys.stdin, encoding = 'utf-8', errors = 'ignore')			
+		text = ""
+		if self.output_filename :
+			output_file = codecs.open(self.output_filename, encoding = 'utf-8', errors = 'ignore',  mode='w+')			
+		line_number = 0
+		while 1:
+   			text = uni_file.readline()
+   			line_number = line_number + 1
+			if text == "":
+				break
+			words = text.split(" ")
+			word_count = len(words)
+			word_iter = 0
+			word = ""
+			while word_iter < word_count:
+				word = words[word_iter]
+				word_length = len(word)
+				print word_length
+				suffix_pos_itr = 2
+				while suffix_pos_itr   <  word_length : 
+					suffix = word[suffix_pos_itr:word_length]
+					if suffix in self.rulesDict:
+						word = word[0:suffix_pos_itr] +  self.rulesDict[suffix]
+						break
+					suffix_pos_itr = suffix_pos_itr + 1	
+				word_iter = word_iter + 1
+				print word	
+				result = result + word + ""
+			result="\n"	
+		return result
+	def Lemmatize(self, text):
+		result = ""
+		self.rulesDict = self.LoadRules()
+		words=text.split(" ")
+		word_count=len(words)
+		word_iter=0
+		word=""
+		while word_iter < word_count:
+			word = words[word_iter]
+			word = self.trim(word)
+			word_length = len(word)
+			suffix_pos_itr = 2
+			while suffix_pos_itr < word_length :
+				suffix = word[suffix_pos_itr:word_length]
+				if suffix in self.rulesDict:
+					word= word[0:suffix_pos_itr] +  self.rulesDict[suffix]
+					break;
+				suffix_pos_itr = suffix_pos_itr+1	
+			word_iter = word_iter+1
+			#print word	
+			result = result + word + " "
+		return result
+					
+	def LoadRules(self):	
+		print "Loading the rules..."
+		rules_dict = dict()
+		line = []
+		line_number = 0
+		rule_number = 0
+		rules_file = codecs. open(self.rules_file,encoding='utf-8', errors='ignore')
+		while 1:
+			line_number = line_number +1 
+   			text = unicode( rules_file.readline())
+			if text == "":
+			      break
+			if text[0] == '#': 
+			      continue  #this is a comment - ignore
+			text = text.split("#")[0]   #remove the comment part of the line     
+			line_number = line_number +1       
+			line = text.strip()  # remove unwanted space
+			if(line == ""):
+				  continue 
+			if(len(line.split("=")) != 2):
+					print "[Error] Syntax Error in the Rules. Line number: ",  line_number
+				  	print "Line: "+ text
+				  	continue 
+	 		lhs = line.split("=") [ 0 ]  .strip()
+	 		rhs = line.split("=") [ 1 ]  .strip()
+	 		if(len(rhs)>0):
+	 			if(lhs[0]=='"'):
+	 				lhs=lhs[1:len(lhs)] # if the string is "quoted"
+	 			if(lhs[len(lhs)-1]=='"'):
+	 				lhs=lhs[0:len(lhs)-1] # if the string is "quoted"
+	 		if(len(rhs)>0):
+	 			if(rhs[0]=='"'):
+	 				rhs=rhs[1:len(rhs)]  # if the string is "quoted"
+	 			if(rhs[len(rhs)-1]=='"'):
+	 				rhs=rhs[0:len(rhs)-1]	 # if the string is "quoted"			
+	 		rule_number=rule_number+1
+			rules_dict[lhs]=rhs
+			#print "[", rule_number ,"] " +lhs + " : " +rhs
+		print "Found ",rule_number, " rules."
+		return rules_dict
+	
+	def trim(self,word):
+		punctuations=['~','!','@','#','$','%','^','&','*','(',')','-','+','_','=','{','}','|' ,':',';','<','>','\,','.','?']
+		word=word.strip()
+		index=len(word)-1
+		while index>0:
+			if word[index] in punctuations:
+				word=word[0:index]
+			else:
+				break 
+			index=index-1	
+		return word
+if __name__ == "__main__":
+	lemmatizer= Lemmatizer()
+	lemmatizer.rules_file="/home/santhosh/www/malayalam.map"
+	lemmatizer.Lemmatize("മുദ്രാവാക്യവുമായി മുറ്റത്തില്‍")
+	
diff --git a/silpa/modules/lemmatizer/lemmatizer_ml.rules b/silpa/modules/lemmatizer/lemmatizer_ml.rules
new file mode 100644
index 0000000..5bfc40d
--- /dev/null
+++ b/silpa/modules/lemmatizer/lemmatizer_ml.rules
@@ -0,0 +1,193 @@
+#Malayalam Prathyaya Rules
+$min_root_length=1
+$iterations=1
+#അനുസ്വാരത്തിലവസാനിക്കുന്ന ക്രിയ/നാമം
+ത്തില്‍ = ം
+ത്ത്=ം
+ത്തു്=ം
+ത്തു=ം
+ത്തെ  = ം
+വുമായി = ം
+ത്തിനെ=ം
+ത്തിലെ=ം
+ത്തിലേയ്ക്ക് =ം
+ത്തിലേയ്ക്കു്= ം
+ത്തേക്ക്=ം
+ത്തിന്റെ=ം
+ത്തേയ്ക്കു്=ം
+ത്തിലായി= ം
+മാക്കി=ം
+മായും=ം
+മെങ്കില്‍=ം എങ്കില്‍
+വും=ം
+മാണു് = ം ആണു്
+മാണ്=ം ആണ്
+മല്ല = ം അല്ല 
+മില്ല = ം ഇല്ല
+മേ =ം
+മാകാം = ം ആകാം
+മായിരിക്കും=ം‌ ആയിരിക്കുക
+മുണ്ടായിരുന്നതായി=ം ഉണ്ടു് ആവുക
+മുണ്ടായി=ം ഉണ്ടു് ആവുക
+മുണ്ടായിരുന്ന=ം ഉണ്ടു് ആവുക
+മുണ്ടായിരുന്നു=ം ഉണ്ടു് ആവുക
+മുള്ള=ം ഉണ്ടാവുക
+മോ = ം
+മെ = ം
+ങ്ങളില്‍= ം
+ങ്ങള്‍ = ം
+ങ്ങളെ=ം
+ങ്ങള്‍ക്ക്=ം
+ങ്ങള്‍ക്കു്=ം
+ുകള്‍ = ു്
+#ക്രിയകള്‍
+യ്ക്കുന്ന =  യ്ക്കുക
+ക്കുന്ന = ക്കുക
+#ചില്ലിലവസാനിക്കുന്ന നാമങ്ങള്‍
+രില്‍ = ര്‍
+രാണു് = ര്‍ ആണു്
+രാണ് = ര്‍ ആണ്
+രല്ല = ര്‍ അല്ല
+രില്ല = ര്‍ ഇല്ല
+രുടെ = ര്‍
+രുടേ = ര്‍
+രു് = ര്‍
+രെ = ര്‍
+രോടു് = ര്‍
+രൊടു് = ര്‍
+രൊട് = ര്‍
+രോട് = ര്‍
+രാല്‍ = ര്‍
+രോ = ര്‍
+രും=ര്‍
+നില്‍ = ന്‍
+നാണു് = ന്‍ ആണു്
+നാണ്=ന്‍ ആണ്
+നല്ല = ന്‍ അല്ല 
+നില്ല= ന്‍ ഇല്ല
+നുടെ = ന്‍
+നുടേ= ന്‍
+നോടു് = ന്‍
+നോട് = ന്‍
+നായി = ന്‍ ആയി
+നോ =ന്‍
+നും=ന്‍
+ലാണു്= ല്‍ ആണു്
+ലാണ് = ല്‍ ആണ്
+ലല്ല= ല്‍ അല്ല 
+ലില്ല = ല്‍ ഇല്ല 
+ലില്‍ = ല്‍
+ലിന്റെ = ല്‍
+ലോട് = ല്‍
+ലോടു് = ല്‍
+ലായി=ല്‍ ആയി
+ലോ= ല്‍
+ലും=ല്‍
+ണാണു്= ണ്‍ ആണു്
+ണാണ് = ണ്‍ ആണ്
+ണിന്റെ = ണ്‍
+ണല്ല = ണ്‍ അല്ല 
+ണില്ല = ണ്‍ ഇല്ല 
+ണോട് =ണ്‍ 
+ണോടു = ണ്‍
+ണോടു്= ണ്‍
+ണോ= ണ്‍
+ണും=ണ്‍
+ളാണു്= ള്‍ ആണു്
+ളാണ്=ള്‍ ആണ്
+ളല്ല = ള്‍ അല്ല 
+ളില്ല= ള്‍ ഇല്ല 
+ളില്‍=ള്‍
+ളോട് = ള്‍
+ളോടു് =ള്‍
+ളോടു = ള്‍
+ളോ=ള്‍
+ളുടെ =ള്‍
+ളുടേ=ള്‍
+ള്‍ക്കായി=ള്‍
+ള്‍ക്കുള്ള =ള്‍ ഉള്ള
+ളും=ള്‍
+#അ/ഇകാരത്തിലവസാനിക്കുന്ന നാമങ്ങള്‍
+യില്‍= 
+യുടെ=
+യെ=
+യാണു്=
+യിലുള്ള=യില്‍ ഉള്ള
+യാണ്=
+യും=
+യ്ക്ക് = 
+യ്ക്കു് =
+യോ=
+യായ=
+യ്ക്കായി=
+ക്കായി=
+#ഉകാരത്തിലവസാനിക്കുന്ന നാമങ്ങള്‍
+വിനെ=
+വിന്റെ=
+വിന്=
+വിനു്=
+വെ=
+#്  എന്നതിലവസാനിക്കുന്ന നാമങ്ങള്‍
+ില്‍=ു്
+ിനെ=ു്
+ിന്റെ=ു് 
+യായി=
+#അ/ഇകാരത്തിലവസാനിക്കുന്ന ക്രിയകള്‍
+താണു്=
+താണ്=
+കള്‍=
+യാകാന്‍=" ആകാന്‍"
+യുള്ള=" ഉള്ള"
+ീകരിച്ചതു്=ിയാക്കുക
+#ഉകാരത്തിലവസാനിക്കുന്ന ക്രിയകള്‍
+ുണ്ടായ= ു് ഉണ്ടായ
+ുണ്ടാവുക= ു് ഉണ്ടാവുക
+#ത്തുക എന്നതില്‍ അവസാനിക്കുന്ന ക്രിയകള്‍= ഉണര്‍ത്തുക, പകര്‍ത്തുക
+ത്താറുണ്ടു്= ത്തുക ഉണ്ടു്
+ത്താറുണ്ട്= ത്തുക ഉണ്ടു്
+ത്തിയിട്ടുണ്ടു്= ത്തുക ഉണ്ടു്
+ത്തുയിട്ടുണ്ട്=ത്തുക ഉണ്ടു്
+ത്തിയില്ല=ത്തുക ഇല്ല
+ത്തുമോ=ത്തുക
+ത്തി=ത്തുക
+#തുക എന്നതില്‍ അവസാനിക്കുന്ന ക്രിയകള്‍ = കരുതുക. പൊരുതുക
+തി=തുക
+താറുണ്ടു്=തുക ഉണ്ടു്
+താറുണ്ട്=തുക ഉണ്ടു്
+തിയില്ല=തുക ഇല്ല
+#ക്കുക എന്നതില്‍ അവസാനിക്കുന്ന ക്രിയകള്‍ = സഞ്ചരിക്കുക, ഉറക്കുക പറക്കുക
+ക്കാന്‍=ക്കുക
+ക്കാറാണു്= ക്കുക ആണു്
+ക്കാറാണ്=ക്കുക ആണു്
+ക്കി=ക്കുക
+യ്ക്കാനായി=യ്ക്കാന്‍ ആവുക
+ക്കാനായി=ക്കാന്‍ ആവുക
+ച്ചു=യ്ക്കുക
+ിച്ച്=ിയ്ക്കുക
+ിച്ചു്=ിയ്ക്കുക
+റന്നു്=റക്കുക
+റന്ന്=റക്കുക
+ന്നിട്ടുണ്ടു്=ക്കുക ഉണ്ടു്
+ന്നിട്ടുണ്ട്=ക്കുക ഉണ്ടു്
+#കുക എന്നതില്‍ അവസാനിക്കുന്ന ക്രിയകള്‍= പടരുക, തുടരുക, വിടരുക
+ര്‍ന്നു്=രുക
+ര്‍ന്നു=രുക
+ര്‍ന്നില്ല=രുക ഇല്ല
+രാനായി=രുക ആവുക
+രുന്നതിനു=രുക
+ടരും=ടരുക
+#ങ്ങുക എന്നതില്‍ അവസാനിക്കുന്ന ക്രിയകള്‍=കറങ്ങുക
+ങ്ങന്നതിനു്=ങ്ങുക
+ക്കം=ക്കുക
+#msc
+യാണിവ=" ആണു് ഇവ"
+യുള്ളവ=" ഉള്ള അവ"
+യുള്ളതു്=" ഉള്ള അതു്"
+ാക്കപ്പെടുന്ന=ു് ആക്കുക പെടുക"
+ായിരിക്കും=" ു് ആയിരിക്കും"
+യാണുണ്ടാവുക=" ഉണ്ടാവുക"
+ളാണുള്ളത്="ള്‍ ആണു് ഉള്ളതു്"
+യുപയോഗിച്ചു്=ു് ഉപയോഗിക്കുക
+ുപയോഗിച്ചു്=ു് ഉപയോഗിക്കുക
+യുപയോഗിച്ച്=ു് ഉപയോഗിക്കുക
+ുപയോഗിച്ച്=ു് ഉപയോഗിക്കുക
diff --git a/silpa/modules/paralperu/paralperu.py b/silpa/modules/paralperu/paralperu.py
new file mode 100644
index 0000000..b5398c4
--- /dev/null
+++ b/silpa/modules/paralperu/paralperu.py
@@ -0,0 +1,139 @@
+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+# Paralperu
+# Copyright 2008 Santhosh Thottingal <santhosh.thottingal@gmail.com>
+# http://www.smc.org.in
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Library General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+#
+# If you find any bugs or have any suggestions email: santhosh.thottingal@gmail.com
+# URL: http://www.smc.org.in
+
+     
+class Paralperu:
+	def paralperu(self,text):
+		result = ""
+		text=unicode(text)
+		index = len(text) - 1
+		while index >= 0:
+			if  ((index)>= 0 ) :
+				if (text[index]== '്'):
+					index = index - 2 #skip the letter before chandrakkala
+					continue
+			if (text[index] == u'ക'):
+				result = result + '1'
+			if (text[index]== u'ട'):
+				result = result + '1'
+			if (text[index]== u'പ'):
+				result = result + '1'
+			if (text[index]== u'യ'):
+				result = result + '1'
+			if (text[index]== u'ഖ'):
+				result = result + '2'
+			if (text[index]== u'ഠ'):
+				result = result + '2'
+			if (text[index]== u'ഫ'):
+				result = result + '2'
+			if (text[index]== u'ര'):
+				result = result + '2'
+			if (text[index]== u'ഗ'):
+				result = result + '3'
+			if (text[index]== u'ഡ'):
+				result = result + '3'
+			if (text[index]== u'ബ'):
+				result = result + '3'
+			if (text[index]== u'ല'):
+				result = result + '3'
+			if (text[index]== u'ഘ'):
+				result = result + '4'
+			if (text[index]== u'ഢ'):
+				result = result + '4'
+			if (text[index]== u'ഭ'):
+				result = result + '4'
+			if (text[index]== u'വ'):
+				result = result + '4'
+			if (text[index]== u'ങ'):
+				result = result + '5'
+			if (text[index]== u'ണ'):
+				result = result + '5'
+			if (text[index]== u'മ'):
+				result = result + '5'
+			if (text[index]== u'ശ'):
+				result = result + '5'
+			if (text[index]== u'ച'):
+				result = result + '6'
+			if (text[index]== u'ത'):
+				result = result + '6'
+			if (text[index]== u'ഷ'):
+				result = result + '6'
+			if (text[index]== u'ഛ'):
+				result = result + '7'
+			if (text[index]== u'ഥ'):
+				result = result + '7'
+			if (text[index]== u'സ'):
+				result = result + '7'
+			if (text[index]== u'ജ'):
+				result = result + '8'
+			if (text[index]== u'ദ'):
+				result = result + '8'
+			if (text[index]== u'ഹ'):
+				result = result + '8'
+			if (text[index]== u'ഝ'):
+				result = result + '9'
+			if (text[index]== u'ധ'):
+				result = result + '9'
+			if (text[index]== u'ള'):
+				result = result + '9'
+			if (text[index]== u'ഞ'):
+				result = result + '0'
+			if (text[index]== u'ന'):
+				result = result + '0'
+			if (text[index]== u'ഴ'):
+				result = result + '0'
+			if (text[index]== u'റ'):
+				result = result + '0'
+			if (text[index]== u'അ'):
+				result = result + '0'
+			if (text[index]== u'ആ'):
+				result = result + '0'
+			if (text[index]== u'ഇ'):
+				result = result + '0'
+			if (text[index]== u'ഈ'):
+				result = result + '0'
+			if (text[index]== u'ഉ'):
+				result = result + '0'
+			if (text[index]== u'ഊ'):
+				result = result + '0'
+			if (text[index]== u'ഋ'):
+				result = result + '0'
+			if (text[index]== u'ൠ'):
+				result = result + '0'
+			if (text[index]== u'ഌ'):
+				result = result + '0'
+			if (text[index]== u'ൡ'):
+				result = result + '0'
+			if (text[index]== u'എ'):
+				result = result + '0'
+			if (text[index]== u'ഏ'):
+				result = result + '0'
+			if (text[index]== u'ഒ'):
+				result = result + '0'
+			if (text[index]== u'ഓ'):
+				result = result + '0'
+			if (text[index]== u'ഔ'):
+				result = result + '0'
+			index = index-1
+		return result
+
diff --git a/silpa/modules/payyans/maps/ambili.map b/silpa/modules/payyans/maps/ambili.map
new file mode 100644
index 0000000..22fa9f0
--- /dev/null
+++ b/silpa/modules/payyans/maps/ambili.map
@@ -0,0 +1,191 @@
+#Ambili Font map for Payyans
+#http://download.savannah.gnu.org/releases/smc/payyans
+#Copyright 2009 Zyxware (www.zyxware.com)
+#Copyright 2008 SMC (www.smc.org.in)
+#Licensed under GPLv3
+#Contact smc-discuss@googlegroups.com for bug reports
+w=ം
+x=ഃ
+A=അ
+B=ആ
+C=ഇ
+Cu=ഈ
+D=ഉ
+Du=ഊ
+E=ഋ
+\p=ഌ
+F=എ
+G=ഏ
+sF=ഐ
+H=ഒ
+Hm=ഓ
+Hu=ഔ
+I=ക
+J=ഖ
+K=ഗ
+L=ഘ
+M=ങ
+N=ച
+O=ഛ
+P=ജ
+Q=ഝ
+R=ഞ
+S=ട
+T=ഠ
+U=ഡ
+V=ഢ
+W=ണ
+X=ത
+Y=ഥ
+Z=ദ
+[=ധ
+\=ന
+]=പ
+^=ഫ
+_=ബ
+`=ഭ
+a=മ
+b=യ
+c=ര
+d=റ
+e=ല
+f=ള
+g=ഴ
+h=വ
+i=ശ
+j=ഷ
+k=സ
+l=ഹ
+m=ാ
+n=ി
+o=ീ
+p=ു
+q=ൂ
+r=ൃ
+s=െ
+t=േ
+ss=ൈ
+sm=ൊ
+tm=ോ
+su=ൌ
+v=്‌
+u=ൗ
+¡=ത്ഥ
+¢=ക്ല
+£=ക്ഷ
+€=ഗ്ഗ
+¥=ഗ്ല
+Š=ങ്ക
+§=ങ്ങ
+š=ണ്‍ 
+©=ഞ്ച
+ª=ഞ്ഞ
+«=ട്ട
+¬=ല്‍  
+=ണ്ട
+®=ച്ച
+¯=സ്ഥ
+°=ക്ക
+±=ദ്ദ
+²=ദ്ധ
+³=ന്‍
+Ž=ന്ത
+µ=ന്ദ
+¶=ങ്ക
+·=ന്മ
+ž=പ്പ
+¹=പ്ലശ്ശ 
+º=മ്പ
+»=ശ്ശ 
+Œ=മ്പ
+œ=മ്മ
+Ÿ=മ്ല
+¿=ര്‍
+À=ഹ്ല
+Á=റ്റ
+Â=ണ്ണ
+Ã=സ്റ്റ
+Ä=ള്‍
+Å=ള്ള
+Æ=ണ്ണ
+Ç=ശ്ല
+È=ശ്ശ
+É=സ്ല
+Ê=സ്സ
+Ë=ഹ്ല
+Ì=സ്റ്റ
+Í=ഡ്ഡ
+Î=ക്ട
+Ï=ബ്ധ
+Ð=ബ്ദ
+Ñ=ച്ഛ
+Ò=ഹ്മ
+Ó=ഹ്ന
+Ô=ന്ധ
+Õ=ത്സ
+Ö=ജ്ജ
+×=ണ്മ
+Ø=ത്ത
+Ù=ന്ഥ
+Ú=ജ്ഞ
+Û=ത്ഭ
+Ü=ഗ്മ
+Ý=ശ്ച
+Þ=ണ്ഡ
+ß=ങ്ങ
+à=ക്ത
+á=ഗ്ന
+â=ന്റ
+ã=ഷ്ട
+ä=റ്റ
+å=ന്
+ó=ന്ന
+y=്യ
+z=്വ
+{=്ര
+ð=ല്‍
+ï=ണ്ട
+ñ=ല്ല
+ò=ന്മ
+´=ട്ട
+¸=പ്പ
+¨=ണ്‍ 
+¦=ങ്ക
+¼=മ്പ
+½=മ്മ
+¤=ഗ്ഗ
+ô=ഞ്ച
+þ=-
+∂=ന്ന
+-=
+≤=ദ്ധ
+≥=ന്‍ 
+ƒ=ള്‍ 
+˛=-
+Ω=മ്മ
+‰=റ്റ
+ÿ=സ്ഥ
+›=ശ്ച
+∞=ത്ഥ
+≠=ണ്ട
+‡=ക്ത
+√=ല്ല
+∏=പ്പ
+≈=ള്ള
+π=പ്ല
+ø=യ്യ
+ﬂ=ത്മ
+∑=ന്മ
+÷=ജ്ജ
+∆=വ്വ
+™=ഞ്ഞ
+—=ച്ഛ
+‚=ന്റ
+¥=ന്ത
+ =സ്സ
+⁄=ജ്ഞ
+•=ഗ്ല
+’=ത്സ
+‘=ന്ധ
+...=സ്ല
+“=ഹ്മ
diff --git a/silpa/modules/payyans/maps/indulekha.map b/silpa/modules/payyans/maps/indulekha.map
new file mode 100644
index 0000000..126031d
--- /dev/null
+++ b/silpa/modules/payyans/maps/indulekha.map
@@ -0,0 +1,160 @@
+#Givetherulesinfollowingformat
+w=ം
+x=ഃ
+A=അ
+B=ആ
+C=ഇ
+Cu=ഈ
+D=ഉ
+Du=ഊ
+E=ഋ
+\p=ഌ
+F=എ
+G=ഏ
+sF=ഐ
+H=ഒ
+Hm=ഓ
+Hu=ഔ
+I=ക
+J=ഖ
+K=ഗ
+L=ഘ
+M=ങ
+N=ച
+O=ഛ
+P=ജ
+Q=ഝ
+R=ഞ
+S=ട
+T=ഠ
+U=ഡ
+V=ഢ
+W=ണ
+X=ത
+Y=ഥ
+Z=ദ
+[=ധ
+\=ന
+]=പ
+^=ഫ
+_=ബ
+`=ഭ
+õ=ഭ
+a=മ
+b=യ
+c=ര
+d=ല
+e=വ
+f=ശ
+g=ഷ
+h=സ
+i=ഹ
+j=ള
+k=ഴ
+l=റ
+m=്
+n=ാ
+o=ി
+p=ീ
+q=ു
+r=ൂ
+s=ൃ
+t=െ
+u=േ
+ss=ൈ
+sm=ൊ
+tm=ോ
+su=ൌ
+v=ൌ
+¡=ക്ക
+¡=ക്ക
+¢=ക്ല
+£=ക്ഷ
+€=ഗ്ഗ
+¥=ദ്ദ
+Š=ങ്ക
+§=ങ്ങ
+š=ച്ച
+©=ഞ്ച
+ª=ദ്ധ
+«=ട്ട
+¬=ണ്‍
+=ണ്ട
+®=ണ്ണ
+¯=ത്ത
+°=ന്‍
+±=ര്‍
+²=ല്‍
+³=ള്‍
+Ž=ന്ത
+µ=ന്ദ
+¶=ന്ന
+·=ന്മ
+ž=പ്പ
+¹=ങ്ക
+º=ങ്ങ
+»=ച്ച
+Œ=മ്പ
+œ=മ്മ
+Ÿ=മ്ല
+¿=ത്ത
+À=ന്ദ
+Á=ന്ന
+Â=ന്റ
+Ã=പ്പ
+Ä=മ്പ
+Å=മ്മ
+Æ=വ്വ
+Ç=യ്യ
+È=ല്ല
+É=ള്ള
+Ê=റ്റ
+Ë=ഹ്ല
+Ì=റ്റ
+Í=ഡ്ഡ
+Î=ക്ട
+Ï=ബ്ധ
+Ð=ന്ത
+Ñ=ച്ഛ
+Ò=ഹ്മ
+Ó=ഹ്ന
+Ô=ന്ധ
+Õ=ഞ്ച
+Ö=ജ്ജ
+×=ണ്മ
+Ø=സ്ഥ
+Ù=സ്ഥ
+Ú=ജ്ഞ
+Û=ത്ഭ
+Ü=ഗ്മ
+Ý=ശ്ച
+Þ=ണ്ഡ
+ß=ത്മ
+à=ക്ത
+á=ഗ്ന
+â=ന്റ
+ã=ഷ്ട
+ä=റ്റ
+å=ന്
+ó=ന്ന
+y=്യ
+z=്വ
+{=്ര
+}=്ര
+ð=ല്‍
+ï=ണ്ട
+ñ=ല്ല
+ò=ന്മ
+´=ന്ത
+¸=ക്ഷ
+¨=ഓ
+¦=ങ്ക
+|=്വ
+¼=ഞ്ഞ
+½=ട്ട
+¤=ഈ
+ô=സ്സ
+þ=-
+¾=ണ്ട
+î=ന്മ
+$=സ്റ്റ
diff --git a/silpa/modules/payyans/maps/karthika.map b/silpa/modules/payyans/maps/karthika.map
new file mode 100644
index 0000000..c4e1765
--- /dev/null
+++ b/silpa/modules/payyans/maps/karthika.map
@@ -0,0 +1,157 @@
+#Givetherulesinfollowingformat
+w=ം
+x=ഃ
+A=അ
+B=ആ
+C=ഇ
+Cu=ഈ
+D=ഉ
+Du=ഊ
+E=ഋ
+\p=ഌ
+F=എ
+G=ഏ
+sF=ഐ
+H=ഒ
+Hm=ഓ
+Hu=ഔ
+I=ക
+J=ഖ
+K=ഗ
+L=ഘ
+M=ങ
+N=ച
+O=ഛ
+P=ജ
+Q=ഝ
+R=ഞ
+S=ട
+T=ഠ
+U=ഡ
+V=ഢ
+W=ണ
+X=ത
+Y=ഥ
+Z=ദ
+[=ധ
+\=ന
+]=പ
+^=ഫ
+_=ബ
+`=ഭ
+a=മ
+b=യ
+c=ര
+d=റ
+e=ല
+f=ള
+g=ഴ
+h=വ
+i=ശ
+j=ഷ
+k=സ
+l=ഹ
+m=ാ
+n=ി
+o=ീ
+p=ു
+q=ൂ
+r=ൃ
+s=െ
+t=േ
+ss=ൈ
+sm=ൊ
+tm=ോ
+su=ൌ
+v=്‌
+u=ൗ
+¡=ക്ക
+¡=ക്ക
+¢=ക്ല
+£=ക്ഷ
+€=ഗ്ഗ
+¥=ഗ്ല
+Š=ങ്ക
+§=ങ്ങ
+š=ച്ച
+©=ഞ്ച
+ª=ഞ്ഞ
+«=ട്ട
+¬=ണ്‍
+=ണ്ട
+®=ണ്ണ
+¯=ത്ത
+°=ത്ഥ
+±=ദ്ദ
+²=ദ്ധ
+³=ന്‍
+Ž=ന്ത
+µ=ന്ദ
+¶=ന്ന
+·=ന്മ
+ž=പ്പ
+¹=പ്ല
+º=ബ്ബ
+»=ബ്ല
+Œ=മ്പ
+œ=മ്മ
+Ÿ=മ്ല
+¿=യ്യ
+À=ര്‍
+Á=റ്റ
+Â=ല്‍
+Ã=ല്ല
+Ä=ള്‍
+Å=ള്ള
+Æ=വ്വ
+Ç=ശ്ല
+È=ശ്ശ
+É=സ്ല
+Ê=സ്സ
+Ë=ഹ്ല
+Ì=സ്റ്റ
+Í=ഡ്ഡ
+Î=ക്ട
+Ï=ബ്ധ
+Ð=ബ്ദ
+Ñ=ച്ഛ
+Ò=ഹ്മ
+Ó=ഹ്ന
+Ô=ന്ധ
+Õ=ത്സ
+Ö=ജ്ജ
+×=ണ്മ
+Ø=സ്ഥ
+Ù=ന്ഥ
+Ú=ജ്ഞ
+Û=ത്ഭ
+Ü=ഗ്മ
+Ý=ശ്ച
+Þ=ണ്ഡ
+ß=ത്മ
+à=ക്ത
+á=ഗ്ന
+â=ന്റ
+ã=ഷ്ട
+ä=റ്റ
+å=ന്
+ó=ന്ന
+y=്യ
+z=്വ
+{=്ര
+##--fromhereaddedbynishan##
+ð=ല്‍
+ï=ണ്ട
+ñ=ല്ല
+ò=ന്മ
+´=ന്ത
+¸=പ്പ
+¨=ച്ച
+¦=ങ്ക
+¼=മ്പ
+½=മ്മ
+¤=ഗ്ഗ
+ô=ഞ്ച
+þ=-
+∂=ന്ന
+
diff --git a/silpa/modules/payyans/maps/revathi.map b/silpa/modules/payyans/maps/revathi.map
new file mode 100644
index 0000000..76068de
--- /dev/null
+++ b/silpa/modules/payyans/maps/revathi.map
@@ -0,0 +1,131 @@
+# ML-Revathi mapping to Unicode
+# Givetherulesinfollowingformat
+A=അ
+B=ആ
+C=ഇ
+D=ഉ
+E=ഋ
+F=എ
+G=ഏ
+H=ഒ
+I=ക
+J=ഖ
+K=ഗ
+L=ഘ
+M=ങ
+N=ച
+O=ഛ
+P=ജ
+Q=ഝ
+R=ഞ
+S=ട
+T=ഠ
+U=ഡ
+V=ഢ
+W=ണ
+X=ത
+Y=ഥ
+Z=ദ
+[=ധ
+\=ന
+]=പ
+^=ഫ
+_=ബ
+`=ഭ
+a=മ
+b=യ
+c=ര
+d=റ
+e=ല
+f=ള
+g=ഴ
+h=വ
+i=ശ
+j=ഷ
+k=സ
+l=ഹ
+m=ാ
+n=ി
+o=ീ
+p=ു
+q=ൂ
+r=ൃ
+s=െ
+t=േ
+u=ൗ
+v=്
+w=ം
+x=ഃ
+y=്യ
+z=്വ
+{=്ര
+‚=ന്റ
+ƒ=ള്‍
+„=ഷ്ട
+…=സ്ല
+‡=ക്ത
+‰=റ്റ
+‹=ഗ്മ
+Œ=ക്ട
+o=ീ
+™=ഞ്ഞ
+›=ശ്ച
+œ=ബ്ധ
+Ÿ=ന്ഥ
+¡=ററ
+¢=ക്ല
+£=ക്ഷ
+¤=ത്ഭ
+¥=ന്ത
+§=ഗ്ഗ
+¨=ണ്‍
+©=ഞ്ച
+ª=ബ്ല
+«=ശ്ല
+®=ച്ച
+°=ക്ക
+±=ദ്ദ
+´=ട്ട
+µ=ന്ദ
+·=ഗ്ന
+º=മ്പ
+»=ശ്ശ
+¿=ര്‍
+À=ഹ്ല
+Â=നു
+Ã=സ്റ്റ
+Æ=ണ്ണ
+Õ=ഡ്ഡ
+Ø=ത്ത
+ß=ങ്ങ
+æ=മ്ല
+÷=ജ്ജ
+ø=യ്യ
+ÿ=സ്ഥ
+ss=ൈ
+sm=ൊ
+tm=ോ
+su=ൌ
+’=ത്സ
+∂=ന്ന
+¬=ല്‍
+¶=ങ്ക
+Ω=മ്മ
+Δ=വ്വ
+π=പ്ല
+∏=പ്പ
+∑=ന്മ
+∕=ജ്ഞ
+√=ല്ല
+∞=ത്ഥ
+∫=ബ്ബ
+≈=ള്ള
+≠=ണ്ട
+≤=ദ്ധ
+≥=ന്‍
+–=ബ്ദ
+—=ച്ഛ
+‘=ന്ധ
+“=ഹ്മ
+”=ഹ്ന
+•=ഗ്ല
diff --git a/silpa/modules/payyans/maps/template.map b/silpa/modules/payyans/maps/template.map
new file mode 100644
index 0000000..dba911f
--- /dev/null
+++ b/silpa/modules/payyans/maps/template.map
@@ -0,0 +1,189 @@
+! = ? 
+" = ? 
+# = ? 
+$ = ? 
+% = ? 
+& = ? 
+' = ? 
+( = ? 
+) = ? 
+* = ? 
++ = ? 
+, = ? 
+- = ? 
+. = ? 
+/ = ? 
+0 = ? 
+1 = ? 
+2 = ? 
+3 = ? 
+4 = ? 
+5 = ? 
+6 = ? 
+7 = ? 
+8 = ? 
+9 = ? 
+: = ? 
+; = ? 
+< = ? 
+= = ? 
+> = ? 
+? = ? 
+@ = ? 
+A = ? 
+B = ? 
+C = ? 
+D = ? 
+E = ? 
+F = ? 
+G = ? 
+H = ? 
+I = ? 
+J = ? 
+K = ? 
+L = ? 
+M = ? 
+N = ? 
+O = ? 
+P = ? 
+Q = ? 
+R = ? 
+S = ? 
+T = ? 
+U = ? 
+V = ? 
+W = ? 
+X = ? 
+Y = ? 
+Z = ? 
+[ = ? 
+\ = ? 
+] = ? 
+^ = ? 
+_ = ? 
+` = ? 
+a = ? 
+b = ? 
+c = ? 
+d = ? 
+e = ? 
+f = ? 
+g = ? 
+h = ? 
+i = ? 
+j = ? 
+k = ? 
+l = ? 
+m = ? 
+n = ? 
+o = ? 
+p = ? 
+q = ? 
+r = ? 
+s = ? 
+t = ? 
+u = ? 
+v = ? 
+w = ? 
+x = ? 
+y = ? 
+z = ? 
+{ = ? 
+| = ? 
+} = ? 
+~ = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
+� = ? 
diff --git a/silpa/modules/payyans/payyans.py b/silpa/modules/payyans/payyans.py
new file mode 100644
index 0000000..d1de143
--- /dev/null
+++ b/silpa/modules/payyans/payyans.py
@@ -0,0 +1,275 @@
+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+# Payyans Ascii to Unicode Convertor
+# Copyright 2008-2009 Santhosh Thottingal <santhosh.thottingal@gmail.com>,
+# Nishan Naseer <nishan.naseer@gmail.com>, Manu S Madhav <manusmad@gmail.com>,
+# Rajeesh K Nambiar <rajeeshknambiar@gmail.com>
+# http://www.smc.org.in
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Library General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+#
+# If you find any bugs or have any suggestions email: santhosh.thottingal@gmail.com
+# URL: http://www.smc.org.in
+
+'''
+പയ്യന്‍ ആളു തരികിടയാകുന്നു. ആസ്കി വേറൊരു തരികിടയും.
+തരികിടയെ തരികിടകൊണ്ടു നേരിടുന്നതാണു് ബുദ്ധി.
+അമേരിക്കാ-ഇറാഖ് യുദ്ധം താഴെപ്പറയും വിധമാകുന്നു.
+'''
+
+'''ആവശ്യത്തിനുള്ള കോപ്പുകള്‍ കൂട്ടുക '''
+import sys #കുന്തം
+import codecs #കൊടച്ചക്രം
+import os #ശീലക്കുട
+from optparse import OptionParser #മുറുക്കാന്‍ചെല്ലം
+ 
+'''പയ്യന്റെ ക്ലാസ് ഉന്നതകുലമാകുന്നു. ച്ചാല്‍ ആഢ്യന്‍ തന്നെ. ഏ ക്ലാസ് പയ്യന്‍...!'''
+class Payyans:
+
+	def __init__(self):
+		self.input_filename =""
+		self.output_filename=""
+		self.mapping_filename=""
+		self.rulesDict=None
+		self.pdf=0
+		
+	def word2ASCII(self, unicode_text):
+		index = 0
+		prebase_letter = ""
+		ascii_text=""
+		self.direction = "u2a"
+		self.rulesDict = self.LoadRules()
+		while index < len(unicode_text):
+			'''കൂട്ടക്ഷരങ്ങള്‍ക്കൊരു കുറുക്കുവഴി'''
+			for charNo in [3,2,1]:
+				letter = unicode_text[index:index+charNo]
+				if letter in self.rulesDict:
+					ascii_letter = self.rulesDict[letter]
+					letter = letter.encode('utf-8')
+					'''കിട്ടിയ അക്ഷരങ്ങളുടെ അപ്പുറത്തും ഇപ്പുറത്തും സ്വരചിഹ്നങ്ങള്‍ ഫിറ്റ് ചെയ്യാനുള്ള ബദ്ധപ്പാട്'''
+					if letter == 'ൈ':	# പിറകില്‍ രണ്ടു സാധനം പിടിപ്പിക്കുക
+						ascii_text = ascii_text[:-1] + ascii_letter*2 + ascii_text[-1:]
+					elif (letter == 'ോ') | (letter == 'ൊ') | (letter == 'ൌ'):		#മുമ്പിലൊന്നും പിറകിലൊന്നും
+						ascii_text = ascii_text[:-1] + ascii_letter[0] + ascii_text[-1:] + ascii_letter[1]
+					elif (letter == 'െ') | (letter == 'േ') |(letter == '്ര'):		#പിറകിലൊന്നുമാത്രം
+						ascii_text = ascii_text[:-1] + ascii_letter + ascii_text[-1:]
+					else:
+						ascii_text = ascii_text + ascii_letter						
+					index = index+charNo
+					break
+				else:
+					if(charNo==1):
+						index=index+1
+						ascii_text = ascii_text + letter
+						break;
+					'''നോക്കിയിട്ടു കിട്ടുന്നില്ല ബായി'''				
+					ascii_letter = letter
+					#ascii_text = ascii_text + ascii_letter
+					#index = index+1
+
+		return ascii_text
+		
+	def Uni2Ascii(self):
+		'''പണിതുടങ്ങട്ടെ'''
+		if self.input_filename :
+			uni_file = codecs.open(self.input_filename, encoding = 'utf-8', errors = 'ignore')
+		else :
+			uni_file = codecs.open(sys.stdin, encoding = 'utf-8', errors = 'ignore')			
+		text = ""
+		if self.output_filename :
+			output_file = codecs.open(self.output_filename, encoding = 'utf-8', errors = 'ignore',  mode='w+')			
+		while 1:
+   			text =uni_file.readline()
+			if text == "":
+				break
+			ascii_text = ""	
+			# ഹീന ജാതിയിലേയ്ക്ക് തരം താഴ്ത്ത്വാ !
+			ascii_text = self.word2ASCII(text)
+									
+			if self.output_filename :
+				output_file.write(ascii_text)
+			else:
+				print ascii_text.encode('utf-8')
+		''' പയ്യന്‍ നല്ലോരു യൂണിക്കോട് ഫയലില്‍ കേറി നെരങ്ങി ആസ്ക്കിയാക്കി. ദൈവമേ, ഈ പയ്യനു നല്ലബുദ്ധി തോന്നിക്കണേ... '''
+		return 0
+		
+	def word2Unicode(self, ascii_text):
+		index = 0
+		post_index = 0
+		prebase_letter = ""
+		postbase_letter = ""	# "‌‌്യ", "്വ"
+		unicode_text = ""
+		next_ucode_letter = ""
+		self.direction="a2u"
+		self.rulesDict = self.LoadRules()
+		while index < len(ascii_text):
+			for charNo in [2,1]:
+				letter = ascii_text[index:index+charNo]
+				if letter in self.rulesDict:
+					unicode_letter = self.rulesDict[letter]
+					if(self.isPrebase(unicode_letter)):	#സ്വരചിഹ്നമാണോ?
+						prebase_letter = unicode_letter
+					else:					#സ്വരചിഹ്നമല്ല
+						#എങ്കില്‍ വ്യഞ്ജനത്തിനു ശേഷം പോസ്റ്റ്-ബേസ് ഉണ്ടോ എന്നു നോക്കൂ
+						post_index = index+charNo
+						if post_index < len(ascii_text):
+							letter = ascii_text[post_index]
+							if letter in self.rulesDict:
+								next_ucode_letter = self.rulesDict[letter]
+								if self.isPostbase(next_ucode_letter):
+									postbase_letter = next_ucode_letter
+									index = index + 1
+						if  ((unicode_letter.encode('utf-8') == "എ") |
+						    ( unicode_letter.encode('utf-8') == "ഒ" )):
+							unicode_text = unicode_text + postbase_letter + self.getVowelSign(prebase_letter , unicode_letter)
+						else:
+							unicode_text = unicode_text + unicode_letter + postbase_letter + prebase_letter
+						prebase_letter=""
+						postbase_letter=""
+					index = index + charNo
+					break
+				else:
+					if charNo == 1:
+						unicode_text = unicode_text + letter
+						index = index + 1
+						break
+					unicode_letter = letter
+		return unicode_text	# മതം മാറ്റി തിരിച്ചു കൊടുക്ക്വാ ! 
+	
+	def Ascii2Uni(self):
+		if self.pdf :
+			command = "pdftotext '" + self.input_filename +"'"
+			process = os.popen(command, 'r')
+			status = process.close()
+			if status:
+				print "The input file is a PDF file. To convert this the  pdftotext  utility is required. "
+				print "This feature is available only for GNU/Linux Operating system."
+				'''ഊഹും. കൊന്നാലും ഇനി മുന്നോട്ടില്ല. മുന്നില്‍ മറ്റവനാകുന്നു. ഏതു്? '''
+				return 1	# Error - no pdftotext !
+			else:
+				self.input_filename =  os.path.splitext(self.input_filename)[0] + ".txt"
+		if self.input_filename :
+			ascii_file = codecs.open(self.input_filename, encoding = 'utf-8', errors = 'ignore')
+		else :
+			ascii_file = codecs.open(sys.stdin, encoding = 'utf-8', errors = 'ignore')			
+		
+		text = ""
+		if self.output_filename :
+			output_file = codecs.open(self.output_filename, encoding = 'utf-8', errors = 'ignore',  mode='w+')			
+	
+		'''സത്യമുള്ളടത്തോളം... അതുകൊണ്ടു തന്നെ ടെര്‍മിനേഷന്‍ ഉറപ്പു്'''	
+		while 1:
+   			text =ascii_file.readline()
+			if text == "":
+				break
+			unicode_text = ""
+			''' അങ്ങട്ട് മതം മാറ്റ്വാ... ആസ്കിതനും നാസ്തികനും ഒന്നന്നെ! '''
+			unicode_text = self.word2Unicode(text)
+			
+			if self.output_filename :
+				output_file.write(unicode_text)
+			else:
+				print unicode_text.encode('utf-8')
+
+		''' പയ്യന്റെ അവതാരോദ്ദേശ്യം പൂര്‍ണ്ണമായിരിക്കുന്നു. ഇനി മടക്കം. റിട്ടേണ്‍...! '''
+		return 0
+
+	def getVowelSign(self, vowel_letter, vowel_sign_letter):
+		vowel=  vowel_letter.encode('utf-8')
+		vowel_sign=  vowel_sign_letter.encode('utf-8')
+		if vowel == "എ":
+			if vowel_sign == "െ":
+				return "ഐ"
+		if vowel == "ഒ":
+			if vowel_sign == "ാ":
+				return "ഓ"
+			if vowel_sign =="ൗ":
+				return "ഔ"
+		return (vowel_letter+ vowel_sign_letter)
+
+	def isPrebase(self, letter):
+		 '''
+		 ഇതെന്തിനാന്നു ചോദിച്ചാ, ഈ അക്ഷരങ്ങളുടെ ഇടതു വശത്തെഴുതുന്ന സ്വര ചിഹ്നങ്ങളുണ്ടല്ലോ?
+		 അവ ആസ്കി തരികിടയില്‍ എഴുതുന്നതു് ഇടതു വശത്തു തന്നെയാ. യൂണിക്കോഡില്‍ അക്ഷരത്തിനു ശേഷവും
+		 അപ്പൊ ആ വക സംഭവങ്ങളെ തിരിച്ചറിയാനാണു് ഈ സംഭവം.
+		 "തരികിട തരികിടോ ധീംതരികിട" (തരികിട തരികിടയാല്‍)  എന്നു പയ്യന്റെ ഗുരു പയ്യഗുരു പയ്യെ മൊഴിഞ്ഞിട്ടുണ്ടു്. 
+		 '''
+		 unicode_letter = letter.encode('utf-8')
+		 if(   ( unicode_letter == "േ"  ) | (   unicode_letter ==  "ൈ" ) |   ( unicode_letter ==  "ൊ" ) 	| ( unicode_letter ==  "ോ"  ) |  ( unicode_letter == "ൌ"  )
+		 			|  ( unicode_letter == "്ര"  )  |  ( unicode_letter == "െ"  ) 
+		 			 ):
+			return True #"ഇതു സത്യം... അ...സത്യം.... അസത്യം...!"
+		 else:
+			return False
+			
+	def isPostbase(self, letter):
+		'''
+		"ക്യ" എന്നതിലെ "്യ", "ക്വ" എന്നതിലെ "്വ" എന്നിവ പോസ്റ്റ്-ബേസ് ആണ്.
+		"ത്യേ" എന്നത് ആസ്കിയില്‍ "ഏ+ത+്യ" എന്നാണ് എഴുതുന്നത്. അപ്പോള്‍ വ്യഞ്ജനം കഴിഞ്ഞ് പോസ്റ്റ്-ബേസ്
+		ഉണ്ടെങ്കില്‍ വ്യഞ്ജനം+പോസ്റ്റ്-ബേസ് കഴിഞ്ഞേ പ്രീ-ബേസ് ചേര്‍ക്കാവൂ! ഹൊ, പയ്യന്‍ പാണിനീശിഷ്യനാണ്!!
+		'''
+		unicode_letter = letter.encode('utf-8')
+		if ( (unicode_letter == "്യ") | (unicode_letter == "്വ") ):
+			return True
+		else:
+			return False
+					
+	def LoadRules(self):	
+		'''
+		ഈ സംഭവമാണു് മാപ്പിങ്ങ് ഫയല്‍ എടുത്തു് വായിച്ചു പഠിക്കുന്നതു്.
+		'''
+		if(self.rulesDict):
+			return self.rulesDict
+		rules_dict = dict()
+		line = []
+		line_number = 0
+		rules_file = codecs. open(self.mapping_filename,encoding='utf-8', errors='ignore')
+		while 1:
+			''' ലൈന്‍ നമ്പര്‍ , മാപ്പിങ്ങ് ഫയലില്‍ തെറ്റുണ്ടെങ്കില്‍ പറയാന്‍ ആവശ്യാണു് '''
+			line_number = line_number +1 
+   			text = unicode( rules_file.readline())
+			if text == "":
+			      break
+			'''കമന്റടിച്ചേ മത്യാവൂന്നു വെച്ചാ ആവാം. ഒട്ടും മുഷിയില്ല്യ'''      
+			if text[0] == '#': 
+			      continue 
+			      ''' കമന്റടി പതിവുപോലെ മൈന്റ് ചെയ്യണ്ട ഒന്നും കണ്ടില്യാ കേട്ടില്യാന്നു വെച്ചു നടന്നോളൂ(മനസ്സില്‍ ചിരിച്ചോളൂ) ''' 
+			line = text.strip()
+			if(line == ""):
+				  continue 
+				  '''ലൈനൊന്നും ല്യാ, മോശം.. ങും പോട്ടെ. വേറെ ലൈന്‍ പിടിക്കാം'''
+			if(len(line.split("=")) != 2):
+					'''എന്തോ പ്രശ്നണ്ടു്. ന്നാ അതങ്ങടു തുറന്നു പറഞ്ഞേക്കാം'''
+					print "Error: Syntax Error in the Ascii to Unicode Map in line number ",  line_number
+				  	print "Line: "+ text
+				  	'''പരിപാടി നിര്‍ത്താം '''
+				  	return 2	# Error - Syntax error in Mapping file 
+			'''ഇടതന്‍'''				  	
+	 		lhs = line.split("=") [ 0 ]  
+	 		'''വലതന്‍'''
+	 		rhs = line.split("=") [ 1 ]  
+	 		'''ഇതിനിടക്കിനി മൂന്നാമനു സ്കോപ്പിണ്ടോ? '''
+	 		'''മറക്കാതെ ഇരിക്കട്ടെ. ആവശ്യം വരും '''
+			if self.direction == 'a2u':
+				rules_dict[lhs]=rhs
+			else:
+				rules_dict[rhs]=lhs
+		return rules_dict
+	
+
+if __name__ == "__main__":
+	'''ഒരു പയ്യന്‍ അവതരിക്കുന്നു. '''
+	rule= Payyan()
+	
diff --git a/silpa/modules/syllabalizer/syllabalizer.py b/silpa/modules/syllabalizer/syllabalizer.py
new file mode 100644
index 0000000..deed058
--- /dev/null
+++ b/silpa/modules/syllabalizer/syllabalizer.py
@@ -0,0 +1,149 @@
+#!/usr/bin/env python
+"""mlsplit - Split Malayalam words into letters
+
+This script splits Malayalam words into letters.
+Ref: http://tinyurl.com/3v729s
+
+
+
+Copyright (C) 2008 Baiju M <baiju.m.mail AT gmail.com>
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or (at
+your option) any later version.
+
+This program is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program.  If not, see <http://www.gnu.org/licenses/>.
+"""
+
+import sys
+import re
+import codecs
+from langdetect import LangDetect
+class Syllabalizer:
+	def syllabalize_ml(self,text):
+		signs = [
+		u'\u0d02', u'\u0d03', u'\u0d3e', u'\u0d3f', u'\u0d40', u'\u0d41',
+		u'\u0d42', u'\u0d43', u'\u0d44', u'\u0d46', u'\u0d47', u'\u0d48',
+		u'\u0d4a', u'\u0d4b', u'\u0d4c', u'\u0d4d']
+
+		chandrakkala = u'\u0d4d'
+		lst_chars = []
+		for char in text:
+			if char in signs:
+				lst_chars[-1] = lst_chars[-1] + char
+			else:
+				try:
+					if lst_chars[-1][-1] == chandrakkala:
+						lst_chars[-1] = lst_chars[-1] + char
+					else:
+						lst_chars.append(char)
+				except IndexError:
+					lst_chars.append(char)
+
+		return lst_chars
+	def syllabalize_hi(self,text):
+		signs = [
+		u'\u0902', u'\u0903', u'\u093e', u'\u093f', u'\u0940', u'\u0941',
+		u'\u0942', u'\u0943', u'\u0944', u'\u0946', u'\u0947', u'\u0948',
+		u'\u094a', u'\u094b', u'\u094c', u'\u094d']
+
+		chandrakkala = u'\u094d'
+		lst_chars = []
+		for char in text:
+			if char in signs:
+				lst_chars[-1] = lst_chars[-1] + char
+			else:
+				try:
+					if lst_chars[-1][-1] == chandrakkala:
+						lst_chars[-1] = lst_chars[-1] + char
+					else:
+						lst_chars.append(char)
+				except IndexError:
+					lst_chars.append(char)
+
+		return lst_chars	
+	#Source: http://www.python-forum.org/pythonforum/viewtopic.php?f=14&t=5810#p42091
+	#Author: Cabu
+	def syllabalize_en(self,text):
+		text = " " + text + " "
+		vowel_list       = ['a', 'e', 'i', 'o', 'u', 'y']
+		vowel_pairs      = ['ai', 'au', 'aw', 'ee','ea', 'oa', 'oi', 'ou', 'oo', 'ow', 'oy', 'uu']
+		consonant_list   = ['b', 'c', 'd', 'f', 'g', 'h', 'j', 'k', 'l', 'm', 'n', 'p', 'q', 'r', 's', 't', 'v', 'w', 'x', 'z']
+		consonant_blends = ['bl', 'br', 'ch', 'chr', 'cl', 'cr', 'dr', 'fl', 'fr', 'gl', 'gr', 'kn', 'pl', 'pr',
+							'sc', 'sh', 'sk', 'sl', 'sm', 'sn', 'sp', 'spr', 'squ', 'st', 'str', 'sw',
+							'th', 'tr', 'thr', 'nt', 'wh']
+
+		# Cut numbers in digits
+		p = re.compile ("([0-9])([0-9])", re.IGNORECASE)
+		for i in range (2):
+			text = p.sub ("\\1#\\2", text)
+		   
+		# Cut i / vowel (- o) / consonant
+		p = re.compile ("i([aeiuy])([bcdfghjklmnpqrstvwxz])", re.IGNORECASE)
+		text = p.sub ("i+\\1+\\2", text)
+	   
+		# Cut the / vowel / consonant
+		p = re.compile ("the([aeiouy])([bcdfghjklmnpqrstvwxz])", re.IGNORECASE)
+		text = p.sub ("the+\\1+\\2", text)
+	   
+		# Cut vowel / vowel except for pairs
+		position = 0
+		while position < len (text)-1:
+			if text [position] in vowel_list and text [position+1] in vowel_list:
+				if not (text [position:position+2] in vowel_pairs):
+					if not (text [position-1:position+3] in ["tion", "dual", "nion", "quir", "tiou"]):
+						text = text [:position+1] + "_" + text [position+1:]
+			position = position + 1
+		   
+		# Cut consonant / consonant (ll, mm, ...)
+		p = re.compile ("([bcdfghjklmnpqrstvwxz])\\1([^ ])", re.IGNORECASE)
+		text = p.sub ("\\1-\\1\\2", text)
+	   
+		# Cut vowel / consonant vowel
+		start = 0
+		end = 0
+		while start < len (text)-1:
+			if text [start] in vowel_list and text [start+1] in consonant_list:
+				end = start + 1
+				while end <= len (text)-1 and text [end] in consonant_list:
+					end = end + 1
+				if end <= len (text)-1 and (text [start+1:end] in consonant_list or text [start+1:end] in consonant_blends) and text [end] in vowel_list and text [end:end+2] <> "e ":
+					text = text [:start+1] + "/" + text [start+1:]
+			start = start + 1
+		   
+		# Cut vowel consonant / consonant+ vowel (trumpet, simple, understanding, ...)
+		start = 0
+		end = 0
+		while start < len (text)-1:
+			if text [start] in vowel_list and text [start+1] in consonant_list:
+				end = start + 2
+				while end <= len (text)-1 and text [end] in consonant_list:
+					end = end + 1
+				if end <= len (text)-1 and end > start+2 and text [end] in vowel_list:
+					if not (text [start+1:end] in consonant_blends):
+						text = text [:start+2] + "-" + text [start+2:]
+			start = start + 1
+
+		# Return the words splitted
+		return text
+
+	def syllabalize(self,text):
+		ld=LangDetect()
+		lang=ld.detect_lang(text)
+		if(lang=="ml_IN"):
+			return self.syllabalize_ml(text)
+		if(lang=="hi_IN"):
+			return self.syllabalize_hi(text)
+		if(lang=="en_US"):
+			return self.syllabalize_en(text)
+		lst_chars=[]
+		for  char in text:
+			lst_chars.append(char)
+		return lst_chars	
diff --git a/silpa/modules/transliterator/transliterate.py b/silpa/modules/transliterator/transliterate.py
new file mode 100644
index 0000000..6603b27
--- /dev/null
+++ b/silpa/modules/transliterator/transliterate.py
@@ -0,0 +1,93 @@
+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+# Paralperu
+# Copyright 2008 Santhosh Thottingal <santhosh.thottingal@gmail.com>
+# http://www.smc.org.in
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Library General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+#
+# If you find any bugs or have any suggestions email: santhosh.thottingal@gmail.com
+# URL: http://www.smc.org.in
+from langdetect import LangDetect
+class Transliterator:
+	def transliterate(self,text, target_lang_code):
+		ld=LangDetect()
+		tx_str=""
+		words=text.split(" ")
+		for word in words:
+			src_lang_code= ld.detect_lang(word) 
+			tx_str = tx_str
+			for chr in word:
+				offset=ord(chr) + self.getOffset(src_lang_code, target_lang_code) 
+				if(offset>0):
+					tx_str=tx_str + unichr (offset) 
+			tx_str=tx_str	+ " "
+		return 	tx_str
+	def getOffset(self,src,target):
+		hi_IN = 0x0901
+		bn_IN = 0x0981
+		pa_IN = 0x0A01
+		gu_IN = 0x0A81 
+		or_IN = 0x0B01
+		ta_IN = 0x0B81
+		te_IN = 0x0C01
+		ka_IN = 0x0C81	
+		ml_IN = 0x0D01
+		src_id=0
+		target_id=0
+		if(src=="en_US"):
+			return 0
+		if(src=="hi_IN"):
+			src_id=hi_IN
+		if(src=="bn_IN"):
+			src_id=bn_IN
+		if(src=="pa_IN"):
+			src_id=pa_IN
+		if(src=="gu_IN"):
+			src_id=gu_IN
+		if(src=="or_IN"):
+			src_id=or_IN
+		if(src=="ta_IN"):
+			src_id=ta_IN
+		if(src=="te_IN"):
+			src_id=te_IN
+		if(src=="ka_IN"):
+			src_id=ka_IN
+		if(src=="ml_IN"):
+			src_id=ml_IN
+		if(target=="hi_IN"):
+			target_id=hi_IN
+		if(target=="bn_IN"):
+			target_id=bn_IN
+		if(target=="pa_IN"):
+			target_id=pa_IN
+		if(target=="gu_IN"):
+			target_id=gu_IN
+		if(target=="or_IN"):
+			target_id=or_IN
+		if(target=="ta_IN"):
+			target_id=ta_IN
+		if(target=="te_IN"):
+			target_id=te_IN
+		if(target=="ka_IN"):
+			target_id=ka_IN
+		if(target=="ml_IN"):
+			target_id=ml_IN	
+		if(src=="Unknown"):
+			return 0	
+		return (target_id - src_id)					
+if __name__ == "__main__":
+	t=Transliterator () 
+	print t.transliterate (u"കരയുന്നോ  കരയുന്നോ?" , "ta_IN")
author	Santhosh Thottingal <santhosh.thottingal@gmail.com>	2009-03-24 21:53:02 +0530
committer	Santhosh Thottingal <santhosh.thottingal@gmail.com>	2009-03-24 21:53:02 +0530
commit	925ba08ecb3ff12496d343a1a0a99daf9a32ad3d (patch)
tree	91aff87b0818e4e861a0ad2c6843d2286cb3775b /silpa/modules
parent	886b09e5d997af17d1b0a9c7fad6e952a94bed45 (diff)
download	AnjaliOldLipi.git-925ba08ecb3ff12496d343a1a0a99daf9a32ad3d.tar.gz AnjaliOldLipi.git-925ba08ecb3ff12496d343a1a0a99daf9a32ad3d.tar.xz AnjaliOldLipi.git-925ba08ecb3ff12496d343a1a0a99daf9a32ad3d.zip