summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRajeesh K Nambiar <rajeeshknambiar@gmail.com>2009-02-03 11:09:36 +0530
committerRajeesh K Nambiar <rajeeshknambiar@gmail.com>2009-02-03 11:09:36 +0530
commit79879edb7c6f62d2edb12d51e911a5a9c6df4c9e (patch)
tree6b8d77478a21451b0849372c5da8834e61d6b683
parentf771e45350f7a6e155a5ec9446944c0a58ffdcfa (diff)
downloadAnjaliOldLipi.git-79879edb7c6f62d2edb12d51e911a5a9c6df4c9e.tar.gz
AnjaliOldLipi.git-79879edb7c6f62d2edb12d51e911a5a9c6df4c9e.tar.xz
AnjaliOldLipi.git-79879edb7c6f62d2edb12d51e911a5a9c6df4c9e.zip
Payyans: Fix all pre-base, post-base issue when converting to unicode; includingൈ '', ്യ'', '്വ'
-rwxr-xr-xpayyans/payyans/payyan.py68
1 files changed, 48 insertions, 20 deletions
diff --git a/payyans/payyans/payyan.py b/payyans/payyans/payyan.py
index 819b940..afcac65 100755
--- a/payyans/payyans/payyan.py
+++ b/payyans/payyans/payyan.py
@@ -107,30 +107,45 @@ class Payyan:
def word2Unicode(self, ascii_text):
index = 0
+ post_index = 0
prebase_letter = ""
+ postbase_letter = "" # "‌‌്യ", "്വ"
unicode_text = ""
+ next_ucode_letter = ""
self.direction="a2u"
self.rulesDict = self.LoadRules()
while index < len(ascii_text):
- letter = ascii_text[index]
- if letter in self.rulesDict:
- unicode_letter = self.rulesDict[letter]
- else:
- unicode_letter = letter
- if(self.isPrebase(unicode_letter)):
- # "ൈ" എന്നത് ആസ്കിയില്‍ 2 "െ" ചേര്‍ന്നതാണ്. It is unique!
- if prebase_letter.encode('utf-8') == "െ" and unicode_letter.encode('utf-8') == "െ":
- prebase_letter = u"ൈ"
- else:
- prebase_letter = unicode_letter
- else:
- if ((unicode_letter.encode('utf-8') == "എ") | ( unicode_letter.encode('utf-8') == "ഒ" )):
- unicode_text = unicode_text + self.getVowelSign(prebase_letter , unicode_letter)
+ for charNo in [2,1]:
+ letter = ascii_text[index:index+charNo]
+ if letter in self.rulesDict:
+ unicode_letter = self.rulesDict[letter]
+ if(self.isPrebase(unicode_letter)): #സ്വരചിഹ്നമാണോ?
+ prebase_letter = unicode_letter
+ else: #സ്വരചിഹ്നമല്ല
+ #എങ്കില്‍ വ്യഞ്ജനത്തിനു ശേഷം പോസ്റ്റ്-ബേസ് ഉണ്ടോ എന്നു നോക്കൂ
+ post_index = index+charNo
+ if post_index < len(ascii_text):
+ letter = ascii_text[post_index]
+ if letter in self.rulesDict:
+ next_ucode_letter = self.rulesDict[letter]
+ if self.isPostbase(next_ucode_letter):
+ postbase_letter = next_ucode_letter
+ index = index + 1
+ if ((unicode_letter.encode('utf-8') == "എ") |
+ ( unicode_letter.encode('utf-8') == "ഒ" )):
+ unicode_text = unicode_text + postbase_letter + self.getVowelSign(prebase_letter , unicode_letter)
+ else:
+ unicode_text = unicode_text + unicode_letter + postbase_letter + prebase_letter
+ prebase_letter=""
+ postbase_letter=""
+ index = index + charNo
+ break
else:
- unicode_text = unicode_text + unicode_letter + prebase_letter
- prebase_letter=""
-
- index = index + 1
+ if charNo == 1:
+ unicode_text = unicode_text + letter
+ index = index + 1
+ break
+ unicode_letter = letter
return unicode_text # മതം മാറ്റി തിരിച്ചു കൊടുക്ക്വാ !
def Ascii2Uni(self):
@@ -195,8 +210,21 @@ class Payyan:
if( ( unicode_letter == "േ" ) | ( unicode_letter == "ൈ" ) | ( unicode_letter == "ൊ" ) | ( unicode_letter == "ോ" ) | ( unicode_letter == "ൌ" )
| ( unicode_letter == "്ര" ) | ( unicode_letter == "െ" )
):
- return "ഇതു സത്യം... അ...സത്യം.... അസത്യം...!"
-
+ return True #"ഇതു സത്യം... അ...സത്യം.... അസത്യം...!"
+ else:
+ return False
+
+ def isPostbase(self, letter):
+ '''
+ "ക്യ" എന്നതിലെ "്യ", "ക്വ" എന്നതിലെ "്വ" എന്നിവ പോസ്റ്റ്-ബേസ് ആണ്.
+ "ത്യേ" എന്നത് ആസ്കിയില്‍ "ഏ+ത+്യ" എന്നാണ് എഴുതുന്നത്. അപ്പോള്‍ വ്യഞ്ജനം കഴിഞ്ഞ് പോസ്റ്റ്-ബേസ്
+ ഉണ്ടെങ്കില്‍ വ്യഞ്ജനം+പോസ്റ്റ്-ബേസ് കഴിഞ്ഞേ പ്രീ-ബേസ് ചേര്‍ക്കാവൂ! ഹൊ, പയ്യന്‍ പാണിനീശിഷ്യനാണ്!!
+ '''
+ unicode_letter = letter.encode('utf-8')
+ if ( (unicode_letter == "്യ") | (unicode_letter == "്വ") ):
+ return True
+ else:
+ return False
def LoadRules(self):
'''