summaryrefslogtreecommitdiffstats
path: root/payyans/payyans/payyan.py
diff options
context:
space:
mode:
authorRajeesh K Nambiar <rajeeshknambiar@gmail.com>2009-02-03 08:01:47 +0530
committerRajeesh K Nambiar <rajeeshknambiar@gmail.com>2009-02-03 08:01:47 +0530
commitf771e45350f7a6e155a5ec9446944c0a58ffdcfa (patch)
tree1cd3808b8de2a18a1dfc48fb6375eefd7ad82d3b /payyans/payyans/payyan.py
parent6f02e32253b4a3ca60489ae980771f30ec0dd748 (diff)
downloadAnjaliOldLipi.git-f771e45350f7a6e155a5ec9446944c0a58ffdcfa.tar.gz
AnjaliOldLipi.git-f771e45350f7a6e155a5ec9446944c0a58ffdcfa.tar.xz
AnjaliOldLipi.git-f771e45350f7a6e155a5ec9446944c0a58ffdcfa.zip
Payyans: Fix converting 'ൈ' from ASCII
Diffstat (limited to 'payyans/payyans/payyan.py')
-rwxr-xr-xpayyans/payyans/payyan.py14
1 files changed, 9 insertions, 5 deletions
diff --git a/payyans/payyans/payyan.py b/payyans/payyans/payyan.py
index 0827b8f..819b940 100755
--- a/payyans/payyans/payyan.py
+++ b/payyans/payyans/payyan.py
@@ -1,7 +1,7 @@
#! /usr/bin/env python
# -*- coding: utf-8 -*-
# Payyans Ascii to Unicode Convertor
-# Copyright 2008 Santhosh Thottingal <santhosh.thottingal@gmail.com>,
+# Copyright 2008-2009 Santhosh Thottingal <santhosh.thottingal@gmail.com>,
# Nishan Naseer <nishan.naseer@gmail.com>, Manu S Madhav <manusmad@gmail.com>,
# Rajeesh K Nambiar <rajeeshknambiar@gmail.com>
# http://www.smc.org.in
@@ -57,7 +57,7 @@ class Payyan:
letter = unicode_text[index:index+charNo]
if letter in self.rulesDict:
ascii_letter = self.rulesDict[letter]
- letter = letter.encode('utf8')
+ letter = letter.encode('utf-8')
'''കിട്ടിയ അക്ഷരങ്ങളുടെ അപ്പുറത്തും ഇപ്പുറത്തും സ്വരചിഹ്നങ്ങള്‍ ഫിറ്റ് ചെയ്യാനുള്ള ബദ്ധപ്പാട്'''
if letter == 'ൈ': # പിറകില്‍ രണ്ടു സാധനം പിടിപ്പിക്കുക
ascii_text = ascii_text[:-1] + ascii_letter*2 + ascii_text[-1:]
@@ -118,12 +118,16 @@ class Payyan:
else:
unicode_letter = letter
if(self.isPrebase(unicode_letter)):
- prebase_letter = unicode_letter
+ # "ൈ" എന്നത് ആസ്കിയില്‍ 2 "െ" ചേര്‍ന്നതാണ്. It is unique!
+ if prebase_letter.encode('utf-8') == "െ" and unicode_letter.encode('utf-8') == "െ":
+ prebase_letter = u"ൈ"
+ else:
+ prebase_letter = unicode_letter
else:
if ((unicode_letter.encode('utf-8') == "എ") | ( unicode_letter.encode('utf-8') == "ഒ" )):
unicode_text = unicode_text + self.getVowelSign(prebase_letter , unicode_letter)
else:
- unicode_text = unicode_text + unicode_letter+ prebase_letter
+ unicode_text = unicode_text + unicode_letter + prebase_letter
prebase_letter=""
index = index + 1
@@ -140,7 +144,7 @@ class Payyan:
'''ഊഹും. കൊന്നാലും ഇനി മുന്നോട്ടില്ല. മുന്നില്‍ മറ്റവനാകുന്നു. ഏതു്? '''
return 1 # Error - no pdftotext !
else:
- self.input_filename = self.input_filename.split(".") [0]+ ".txt"
+ self.input_filename = os.path.splitext(self.input_filename)[0] + ".txt"
if self.input_filename :
ascii_file = codecs.open(self.input_filename, encoding = 'utf-8', errors = 'ignore')
else :