12 files changed, 259 insertions, 33 deletions
diff --git a/silpa/common/modulemanager.py b/silpa/common/modulemanager.py
index a3e9f1b..c6185dd 100644
--- a/silpa/common/modulemanager.py
+++ b/silpa/common/modulemanager.py
@@ -35,15 +35,15 @@ class ModuleManager:
 	def getModulesInfoAsHTML(self):
 		module_dict=getModulesList	()
 		response = "<h2>Available Modules</h2></hr>"
-		response = response+"<table class=\"table1\"><tr><th>Module</th><th>Description</th><th>Status</th></tr>"
+		response = response+"<table class=\"table1\"><tr><th>Module</th><th>Description</th></tr>"
 		for action in 	module_dict:
 			module_instance=self.getModuleInstance(action)
 			if(module_instance!=None):
-				response = response+"<tr><td><a href='?action="+ action +"'>"+module_instance.get_module_name()+"</a></td>"
-				response = response+"<td>"+module_instance.get_info()+"</td><td>OK</td></tr>"
+				response = response+"<tr><td><a href='"+ action +"'>"+module_instance.get_module_name()+"</a></td>"
+				response = response+"<td>"+module_instance.get_info()+"</td></tr>"
 			else:
 				response = response+"<tr><td>"+action.replace("_"," ")+"</td>"
-				response = response+"<td>Error while retrieving module details</td><td>Fail</td></tr>"	
+				response = response+"<td>Error while retrieving module details</td></tr>"	
 		return  response+"</table>"	
 if __name__ == '__main__':
 	mm=ModuleManager()
diff --git a/silpa/modules/__init__.py b/silpa/modules/__init__.py
index 4fe98d1..78f7b19 100644
--- a/silpa/modules/__init__.py
+++ b/silpa/modules/__init__.py
@@ -10,4 +10,5 @@ from fortune import *
 from inexactsearch import *
 from dictionary import *
 from anagram import *
+from normalizer import *
 
diff --git a/silpa/modules/dictionary/dictionary.py b/silpa/modules/dictionary/dictionary.py
index 7ba877a..4de0f76 100644
--- a/silpa/modules/dictionary/dictionary.py
+++ b/silpa/modules/dictionary/dictionary.py
@@ -28,6 +28,7 @@ import pickle
 class Dictionary(SilpaModule):
 	
 	def lookup_en_ml(self, key):
+		key=key.lower()
 		self.dictFile=os.path.dirname(__file__) + "/data/dict.dat"
 		pickled_dict=open(self.dictFile,'r')
 		self.dictionary=pickle.load(pickled_dict)
@@ -44,14 +45,15 @@ class Dictionary(SilpaModule):
 		<h2>English Malayalam Dictionary</h2></hr>
 		<p>Enter the word to lookup in the dictionary
 		</p>
-		<form action="" method="get">
+		<form action="" method="post">
 		<input type="text" value="%s" name="word"/>
-		<input  type="submit" id="Find_Meaning" value="Find Meaning"  name="action" style="width:12em;"/>
+		<input type="hidden" name="action" value="Dictionary">
+		<input  type="submit" id="Find_Meaning" value="Find Meaning"  style="width:12em;"/>
 		</br>
 		</form>
 		"""
 		if(form.has_key('word')):
-			search_key = form['word'].value.decode('utf-8')
+			search_key = form['word'].value
 			response=response % search_key
 			response = response+"<h2>Search Results</h2></hr>"
 			if(search_key==None):
diff --git a/silpa/modules/fortune/fortune.py b/silpa/modules/fortune/fortune.py
index cf31aad..bc83b51 100644
--- a/silpa/modules/fortune/fortune.py
+++ b/silpa/modules/fortune/fortune.py
@@ -1,7 +1,7 @@
 # Fortune
 # -*- coding: utf-8 -*-
 #
-#  Copyright © 2008  Santhosh Thottingal
+#  Copyright © 2009  Santhosh Thottingal <santhosh.thottingal@gmai.com>
 #  Released under the GPLV3+ license
 
 import os,random
@@ -13,24 +13,24 @@ class Fortune(SilpaModule):
 		for line in infile:
 			line=line.decode("utf-8")
 			if line == "%\n":
-				yield result
-				result = []
+				continue
 			else:
 				if(pattern==None):
 					result.append(line)
 				else:
-					if(line.find(pattern)==-1):
+					if(line.find(pattern)>0):
 						result.append(line)		
 		if result:
-			yield result
+			return result
+
 			
-	def fortune_ml(self, word):
-		filename="./modules/fortune/database/fortune-ml"
+	def fortune_ml(self, pattern):
+		filename = os.path.join(os.path.dirname(__file__), 'database/fortune-ml')
 		""" Pick a random fortune from a file """
-		for index, fortune in enumerate(self.fortunes(file(filename),None)):
-			if random.random() < (1.0 / (index+1)):
-				chosen = fortune
-
+		fortunes_list=self.fortunes(file(filename),pattern)
+		chosen=""
+		if fortunes_list:
+			chosen= random.choice(fortunes_list)
 		return "".join(chosen)
 
 	def process(self, form):
@@ -46,9 +46,10 @@ class Fortune(SilpaModule):
 		"""
 		if(form.has_key('input_text')):
 			text = form['input_text'].value	.decode('utf-8')
+			response=response % text
 		else:
-			text=""	
-		response=response % text
+			text= None
+			response=response % ""
 		result = self.fortune_ml(text)
 		response = response+"<h2>Random Quote</h2></hr>"
 		response = response+"<b>"+result+"</b>"
diff --git a/silpa/modules/hyphenator/hyphenator.py b/silpa/modules/hyphenator/hyphenator.py
index f5e8fe2..889aee3 100644
--- a/silpa/modules/hyphenator/hyphenator.py
+++ b/silpa/modules/hyphenator/hyphenator.py
@@ -234,9 +234,8 @@ class Hyphenator(SilpaModule):
 		 You can give the text in any language and even with mixed language
 		</p>
 		<form action="" method="post">
-		<textarea cols='100' rows='25' name='input_text' id='id1'>%s</textarea>
+		<textarea  name='input_text' id='id1'>%s</textarea>
 		<input  type="submit" id="Hyphenate" value="Hyphenate"  name="action" style="width:12em;"/>
-		<input type="reset" value="Clear" style="width:12em;"/>
 		</br>
 		</form>
 		"""
diff --git a/silpa/modules/hyphenator/rules/hyph_bn_IN.dic b/silpa/modules/hyphenator/rules/hyph_bn_IN.dic
index 52acee5..42dca20 100644
--- a/silpa/modules/hyphenator/rules/hyph_bn_IN.dic
+++ b/silpa/modules/hyphenator/rules/hyph_bn_IN.dic
@@ -37,8 +37,9 @@ UTF-8
 ৌ1
 ৗ1
 ্2
-ঃ1
-ং1
+2ঃ1
+2ং1
+2ঁ1
 1ন
 ন্2
 2ন্‍
diff --git a/silpa/modules/hyphenator/rules/hyph_ml_IN.dic b/silpa/modules/hyphenator/rules/hyph_ml_IN.dic
index 44b50b9..36a70f2 100644
--- a/silpa/modules/hyphenator/rules/hyph_ml_IN.dic
+++ b/silpa/modules/hyphenator/rules/hyph_ml_IN.dic
@@ -43,7 +43,7 @@ UTF-8
 ൌ1
 ൗ1
 ്2
-ഃ1
+2ഃ1
 2ം1
 1ന
 ന്2
diff --git a/silpa/modules/inexactsearch/__init__.py b/silpa/modules/inexactsearch/__init__.py
new file mode 100644
index 0000000..d47e9c0
--- /dev/null
+++ b/silpa/modules/inexactsearch/__init__.py
@@ -0,0 +1,4 @@
+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+import inexactsearch
+
diff --git a/silpa/modules/inexactsearch/inexactsearch.py b/silpa/modules/inexactsearch/inexactsearch.py
new file mode 100644
index 0000000..0d1f35b
--- /dev/null
+++ b/silpa/modules/inexactsearch/inexactsearch.py
@@ -0,0 +1,173 @@
+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+# Paralperu
+# Copyright 2008 Santhosh Thottingal <santhosh.thottingal@gmail.com>
+# http://www.smc.org.in
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Library General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+#
+# If you find any bugs or have any suggestions email: santhosh.thottingal@gmail.com
+# URL: http://www.smc.org.in
+
+
+import sys
+import re
+from common import *
+
+class ApproximateSearch(SilpaModule):
+	
+	def syllabalize_ml(self, text):
+		signs = [
+		u'\u0d02', u'\u0d03', u'\u0d3e', u'\u0d3f', u'\u0d40', u'\u0d41',
+		u'\u0d42', u'\u0d43', u'\u0d44', u'\u0d46', u'\u0d47', u'\u0d48',
+		u'\u0d4a', u'\u0d4b', u'\u0d4c', u'\u0d4d']
+		limiters = ['.','\"','\'','`','!',';',',','?']
+
+		chandrakkala = u'\u0d4d'
+		lst_chars = []
+		for char in text:
+			if char in limiters:
+				lst_chars.append(char)
+			elif char in signs:
+				lst_chars[-1] = lst_chars[-1] + char
+			else:
+				try:
+					if lst_chars[-1][-1] == chandrakkala:
+						lst_chars[-1] = lst_chars[-1] + char
+					else:
+						lst_chars.append(char)
+				except IndexError:
+					lst_chars.append(char)
+
+		return lst_chars
+
+
+	def bigram_search(self, str1, str2, syllable_search=False):
+		"""Return approximate string comparator measure (between 0.0 and 1.0)
+		using bigrams.
+		USAGE:
+		score = bigram(str1, str2)
+
+		ARGUMENTS:
+		str1  The first string
+		str2  The second string
+
+		DESCRIPTION:
+		Bigrams are two-character sub-strings contained in a string. For example,
+		'peter' contains the bigrams: pe,et,te,er.
+
+		This routine counts the number of common bigrams and divides by the
+		average number of bigrams. The resulting number is returned.
+		"""
+
+		# Quick check if the strings are the same - - - - - - - - - - - - - - - - - -
+		#
+		if (str1 == str2):
+			result_string = "<div  style='float: left; background-color: green;' title=\"  Bigram comparator : string1: %s, string2: %s. Exact Match found" % (str1, str2)
+			result_string = result_string + "\">"+str1+ "</div>"
+			return 	result_string
+
+		bigr1 = []
+		bigr2 = []
+
+		# Make a list of bigrams for both strings - - - - - - - - - - - - - - - - - -
+		#
+		for i in range(1,len(str1)):
+			bigr1.append(str1[i-1:i+1])
+		for i in range(1,len(str2)):
+			bigr2.append(str2[i-1:i+1])
+
+		# Compute average number of bigrams - - - - - - - - - - - - - - - - - - - - -
+		#
+		average = (len(bigr1)+len(bigr2)) / 2.0
+		if (average == 0.0):
+			return str1
+
+		# Get common bigrams  - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+		#
+		common = 0.0
+
+		if (len(bigr1) < len(bigr2)):  # Count using the shorter bigram list
+			short_bigr = bigr1
+			long_bigr  = bigr2
+		else:
+			short_bigr = bigr2
+			long_bigr  = bigr1
+
+		for b in short_bigr:
+			if (b in long_bigr):
+				common += 1.0
+				long_bigr[long_bigr.index(b)] = []  # Mark this bigram as counted
+
+		w = common / average
+		if(w>=0.6):
+			result_string = "<div  style='float: left; background-color: yellow;' title=\"  Bigram comparator string 1: %s, string 2: %s" % (str1, str2)
+		else:
+			if((w>0.4) & (w<0.6)):
+				result_string = "<div  style='float: left; background-color: grey;' title=\"  Bigram comparator string 1: %s, string 2: %s" % (str1, str2)	
+			else:
+				result_string = "<div  style='float: left;' title=\"  Bigram comparator string1: %s, string2: %s" % (str1, str2)	
+		result_string = result_string + "    Number of bigrams in String1: %i" % (len(bigr1))
+		result_string = result_string + "    Number of bigrams in String2: %i" % (len(bigr2))
+		result_string = result_string + "    Average: %i" % (average)
+		result_string = result_string + "    Common: %i" % (common)
+		result_string = result_string + "    Final approximate string weight: " + str(w)
+		result_string = result_string + "\">"+str1+ "</div>"
+		return 	result_string
+	def process(self,form):
+		response = """
+		<h2>Inexact Search</h2></hr>
+		<p>The search performed by search engines on Indic text is not effective.
+		It does not take care of the inflective or agglutinative nature of the language.
+		This application tries to solve that by using an inexact search algorithm based on maximum common bigram algorithm.
+		
+		</p>
+		<p>Enter the text for searching in the below text area.
+		</p>
+		<form action="" method="post">
+		<textarea cols='100' rows='25' name='input_text' id='input_text'>%s</textarea>
+		<br/>
+		<input type="text" name="search_key" value="%s"/>
+		<input  type="submit" id="Hyphenate" value="Approximate Search"  name="action" style="width:12em;"/>
+		</br>
+		</form>
+		"""
+		if(form.has_key('input_text')):
+			text = action=form['input_text'].value	.decode('utf-8')
+			if(form.has_key('search_key')):	
+				key = action=form['search_key'].value	.decode('utf-8')
+				response=response % (text,key)
+				words=text.split(" ")
+				response = response+"<h2>Search Results</h2></hr>"
+				response = response+"<p>Words in green are with exact match. Words in Yellow are with approximate Match."
+				response = response+" Move your mouse pointer over the words to get more information on matching.</p></hr>"
+			else:
+				response = response+ "Enter a string to search."
+				return response % (text,"")
+			for word in words:
+				word=word.strip()
+				if(word>""):
+					response = response+ self.bigram_search(word, key)
+					response = response+ "<div  style='float: left;'>&nbsp;</div>"
+		else:
+			response=response % ("","")	
+		return response
+	def get_module_name(self):
+		return "Approximate Search"
+	def get_info(self):
+		return 	"Approximate Search for a string in the given text. Based on bigram search algorithm"	
+		
+def getInstance():
+	return ApproximateSearch()
diff --git a/silpa/modules/payyans/payyans.py b/silpa/modules/payyans/payyans.py
index 1702307..8751347 100644
--- a/silpa/modules/payyans/payyans.py
+++ b/silpa/modules/payyans/payyans.py
@@ -273,7 +273,7 @@ class Payyans(SilpaModule):
 		<p>Enter the text for detecting the language in the below text area.
 		</p>
 		<form action="" method="post">
-		<textarea cols='100' rows='25' name='input_text' id='id1'>%s</textarea><br/>
+		<textarea  name='input_text' id='id1'>%s</textarea><br/>
 		Select Font : <select id="font" name="%s" style="width:12em;">
 		<option value="karthika">Karthika</option>
 		<option value="bhavana">Bhavana</option>
@@ -282,7 +282,6 @@ class Payyans(SilpaModule):
 		<option value="manorama">Manorama</option>
 		</select>
 		<input  type="submit" id="Convert To Unicode" value="%s"  name="action" style="width:12em;"/>
-		<input type="reset" value="Clear" style="width:12em;"/>
 		</br>
 		</form>
 		"""
diff --git a/silpa/modules/transliterator/transliterate.py b/silpa/modules/transliterator/transliterate.py
index 206a6bf..1b534cf 100644
--- a/silpa/modules/transliterator/transliterate.py
+++ b/silpa/modules/transliterator/transliterate.py
@@ -1,6 +1,6 @@
 #! /usr/bin/env python
 # -*- coding: utf-8 -*-
-# Paralperu
+# Any Indian Language to any other Indian language transliterator
 # Copyright 2008 Santhosh Thottingal <santhosh.thottingal@gmail.com>
 # http://www.smc.org.in
 #
@@ -23,6 +23,48 @@
 
 from common import *
 class Transliterator(SilpaModule):
+	def transliterate_ml_en(self, word):
+		virama=u"്"
+		#TODO: how to make this more generic so that more languages can be handled here?
+		#idea1: transliterate any langauge to a common language say hindi and the n do conversion?
+		#existing transliterate.py can be used?
+		#idea2: Have dictionaries for each language like english_xx_dict ?
+		#TODO: complete this
+		english_ml_dict={u'അ':'a',u'ആ':'a',u'ഇ':'a',u'ഈ':'a',u'ഉ':'a',u'ഊ':'a',u'ഋ':'a',\
+				u'എ':'a',u'ഏ':'a',u'ഐ':'a',u'ഒ':'a',u'ഓ':'a',u'ഔ':'a',\
+				u'ക':'k',u'ഖ':'kh',u'ഗ':'g',u'ഘ':'gh',u'ങ്ങ':'ng',\
+				u'ച':'ch',u'ഛ':'chh',u'ജ':'j',u'ഝ':'jhh',u'ഞ':'nj',\
+				u'ട':'t',u'ഠ':'th',u'ഡ':'d',u'ഢ':'dh',u'ണ':'n',\
+				u'ത':'th',u'ഥ':'th',u'ദ':'d',u'ധ':'dh',u'ന':'n',\
+				u'പ':'p',u'ഫ':'ph',u'ബ':'b',u'ഭ':'bh',u'മ':'m',\
+				u'യ':'y',u'ര':'r',u'ല':'l', u'വ':'v', u'റ':'r',\
+				u'ശ':'sa',u'ഷ':'sh',u'സ':'s', u'ഹ':'h',u'ള':'l',u'ഴ':'zh',\
+				u'ാ':'a',u'ി':'i' ,u'ീ':'ee' ,u'ു':'u',\
+				u'ൂ':'uu',u'ൃ':'ri' ,u'െ':'e' ,u'േ':'e',\
+				u'ൈ':'ai',u'ൊ':'o' ,u'ോ':'oo' ,u'ൗ':'au'}
+		word_length	=len(word)
+		index=0
+		tx_string=""
+		while index<word_length:
+			a_vowel=""
+			try:
+				if(index+1<word_length):
+					if(word[index+1]==virama):
+						a_vowel=""		
+				else:
+					if(index+1<word_length):
+						if (english_ml_dict[word[index+1]] in ['a','e','i','o','u']):
+							a_vowel=""				
+					else:	
+						a_vowel="a"		
+					if (english_ml_dict[word[index]] in ['a','e','i','o','u']):	
+						a_vowel=""				
+					tx_string=tx_string+ english_ml_dict[word[index]] + a_vowel
+			except:		
+				tx_string=tx_string+ word[index]
+			index=index+1	
+		return 	tx_string
+
 	def transliterate(self,text, target_lang_code):
 		mm=ModuleManager()
 		ld = mm.getModuleInstance("Detect Language")
@@ -31,7 +73,9 @@ class Transliterator(SilpaModule):
 		for word in words:
 			if(word.strip()>""):
 				src_lang_code=ld.detect_lang(word)[word]
-				tx_str = tx_str
+				if((target_lang_code=="en_US") and (src_lang_code=="ml_IN")):
+					tx_str=tx_str + self.transliterate_ml_en(word)
+					continue	
 				for chr in word:
 					offset=ord(chr) + self.getOffset(src_lang_code, target_lang_code) 
 					if(offset>0):
@@ -60,7 +104,7 @@ class Transliterator(SilpaModule):
 		 You can give the text in any language and even with mixed language
 		</p>
 		<form action="" method="post">
-		<textarea cols='100' rows='25' name='input_text' id='id1'>%s</textarea></br> 
+		<textarea  name='input_text' id='id1'>%s</textarea></br> 
 		<select id="trans-lang" name="trans-lang" style="width:12em;">
 		  <option value="hi_IN">Hindi</option>
 		  <option value="ml_IN">Malayalam</option>
@@ -71,6 +115,7 @@ class Transliterator(SilpaModule):
 		  <option value="gu_IN">Gujarai</option>
 		  <option value="pa_IN">Panjabi</option>
 		  <option value="ka_IN">Kannada</option>
+		  <option value="en_US">English</option>
 		</select>
 		<input  type="submit" id="Transliterate" value="Transliterate"  name="action" style="width:12em;"/>
 		<input type="reset" value="Clear" style="width:12em;"/>
diff --git a/silpa/silpa.conf b/silpa/silpa.conf
index 068112c..a70a482 100644
--- a/silpa/silpa.conf
+++ b/silpa/silpa.conf
@@ -1,13 +1,13 @@
 # Silpa configuration file
 # This is a comment
 # Global properties
-SILPA_BASE_URL=http://planet.smc.org.in/exp/silpa/index.py
+SILPA_BASE_URL=http://smc.org.in/silpa
 SILPA_TEMPLATE=./templates/default/silpa.html
 SILPA_SITE_NAME=Silpa
 SILPA_LANGUAGE=Silpa
 SILPA_SITE_ADMIN_NAME=Santhosh
 SILPA_SITE_ADMIN_EMAIL=santhosh@silpa.org
-SILPA_SITE_COPYRIGHT= Copyright 2008-2009. Silpa Team. All rights Reserved.
+SILPA_SITE_COPYRIGHT= Copyright 2008-2009. Silpa Team. All rights Reserved. 
 
 #Silpa Plugin Modules
 #Format: action=module. For eg:
@@ -16,6 +16,7 @@ SILPA_SITE_COPYRIGHT= Copyright 2008-2009. Silpa Team. All rights Reserved.
 #An example entry
 #SILPA_ACTION.Sort=modules.sort #comment at the end of line is also allowed
 SILPA_ACTION.Transliterate=modules.transliterate #transliterator module
+SILPA_ACTION.Normalize=modules.normalizer #transliterator module
 SILPA_ACTION.Stem=modules.stemmer #stemmer module for Indian Languages
 SILPA_ACTION.Detect_Language=utils #This is also a valid module
 SILPA_ACTION.Guess_Language=modules.guess_language #This is also a valid module
@@ -23,7 +24,7 @@ SILPA_ACTION.To_Unicode=modules.payyans #Ascii to Unicode conversion Module
 SILPA_ACTION.To_ASCII=modules.payyans #Unicode to Ascii conversion Module
 SILPA_ACTION.Syllabalize=modules.syllabalizer #Syllabalizer module
 SILPA_ACTION.Hyphenate=modules.hyphenator #Syllabalizer module
-SILPA_ACTION.Find_Meaning=modules.dictionary #English Malayalam Module
+SILPA_ACTION.Dictionary=modules.dictionary #English Malayalam Dictionary Module
 SILPA_ACTION.Fortune=modules.fortune #Syllabalizer module
 SILPA_ACTION.Anagram=modules.anagram #Anagram module
 SILPA_ACTION.Approximate_Search=modules.inexactsearch #Approximate search