3 files changed, 143 insertions, 64 deletions
diff --git a/silpa/modules/dictionary/dictionary.py b/silpa/modules/dictionary/dictionary.py
index 4de0f76..927c06c 100644..100755
--- a/silpa/modules/dictionary/dictionary.py
+++ b/silpa/modules/dictionary/dictionary.py
@@ -1,6 +1,6 @@
 #! /usr/bin/env python
 # -*- coding: utf-8 -*-
-# English Malayalam Dictionary
+# Dictionary
 # Copyright 2008 Santhosh Thottingal <santhosh.thottingal@gmail.com>
 # http://www.smc.org.in
 #
@@ -24,49 +24,56 @@
 
 from common import *
 import os
-import pickle
+from dictdlib import DictDB
 class Dictionary(SilpaModule):
 	
-	def lookup_en_ml(self, key):
-		key=key.lower()
-		self.dictFile=os.path.dirname(__file__) + "/data/dict.dat"
-		pickled_dict=open(self.dictFile,'r')
-		self.dictionary=pickle.load(pickled_dict)
-		meaning=""
-		if self.dictionary.has_key(key):
-			meaningList=self.dictionary[key]
-			for meaning_item in meaningList:
-				meaning=meaning+meaning_item.strip()	+"<br/>"
-		else :
-			meaning="No Meaning found"
-		return meaning.decode('utf-8')
+	def getdef(self, word, dictionary):
+		dict_dir=os.path.join(os.path.dirname(__file__), 'dictionaries')
+		dictdata=dict_dir+ "/"+dictionary
+		dict=DictDB(dictdata)
+		meanings =  dict.getdef(word)
+		meaningstring= ""
+		if (meanings==None):
+			meaningstring = "No definition found"
+			return meaningstring
+		for meaning in meanings:
+			meaningstring += meaning
+		return meaningstring.decode("utf-8")
 	def process(self,form):
 		response = """
-		<h2>English Malayalam Dictionary</h2></hr>
+		<h2>Dictionary</h2></hr>
 		<p>Enter the word to lookup in the dictionary
 		</p>
 		<form action="" method="post">
-		<input type="text" value="%s" name="word"/>
+		 <p align="center">
+		Word : <input type="text" value="%s" name="word"/>
+		Dictionary :<select id="dictionary" name="dictionary" style="width:12em;">
+		  <option value="freedict-eng-hin">English-Hindi</option>
+		  <option value="freedict-eng-mal">English-Malayalam</option>
+		</select>
 		<input type="hidden" name="action" value="Dictionary">
+		</br>
 		<input  type="submit" id="Find_Meaning" value="Find Meaning"  style="width:12em;"/>
 		</br>
+		</p>
 		</form>
 		"""
 		if(form.has_key('word')):
 			search_key = form['word'].value
+			dictionary =  form['dictionary'].value
 			response=response % search_key
 			response = response+"<h2>Search Results</h2></hr>"
 			if(search_key==None):
 				response = response+ "Enter a word to find meaning."
 			else:		
-				response = response+ self.lookup_en_ml(search_key)
+				response = response+ "<pre> "+ self.getdef(search_key,dictionary) + "</pre> "
 		else:
 			response=response % ""	
 		return response
 	def get_module_name(self):
-		return "English Malayalam Dictionary"
+		return "Dictionary"
 	def get_info(self):
-		return 	"English Malayalam Dictionary. Dictionary is compiled by Kerala state IT Mission"	
+		return 	"Bilingual Dictionaries"	
 		
 def getInstance():
 	return Dictionary()
diff --git a/silpa/modules/inexactsearch/inexactsearch.py b/silpa/modules/inexactsearch/inexactsearch.py
index 0d1f35b..67e47d9 100644..100755
--- a/silpa/modules/inexactsearch/inexactsearch.py
+++ b/silpa/modules/inexactsearch/inexactsearch.py
@@ -1,6 +1,6 @@
 #! /usr/bin/env python
 # -*- coding: utf-8 -*-
-# Paralperu
+# Approximate Search
 # Copyright 2008 Santhosh Thottingal <santhosh.thottingal@gmail.com>
 # http://www.smc.org.in
 #
@@ -28,32 +28,11 @@ from common import *
 
 class ApproximateSearch(SilpaModule):
 	
-	def syllabalize_ml(self, text):
-		signs = [
-		u'\u0d02', u'\u0d03', u'\u0d3e', u'\u0d3f', u'\u0d40', u'\u0d41',
-		u'\u0d42', u'\u0d43', u'\u0d44', u'\u0d46', u'\u0d47', u'\u0d48',
-		u'\u0d4a', u'\u0d4b', u'\u0d4c', u'\u0d4d']
-		limiters = ['.','\"','\'','`','!',';',',','?']
-
-		chandrakkala = u'\u0d4d'
-		lst_chars = []
-		for char in text:
-			if char in limiters:
-				lst_chars.append(char)
-			elif char in signs:
-				lst_chars[-1] = lst_chars[-1] + char
-			else:
-				try:
-					if lst_chars[-1][-1] == chandrakkala:
-						lst_chars[-1] = lst_chars[-1] + char
-					else:
-						lst_chars.append(char)
-				except IndexError:
-					lst_chars.append(char)
-
-		return lst_chars
-
-
+	def syllabalize(self, text):
+		mm=ModuleManager()
+		syllabalizer = mm.getModuleInstance("Syllabalize")
+		return syllabalizer.syllabalize(text)
+		
 	def bigram_search(self, str1, str2, syllable_search=False):
 		"""Return approximate string comparator measure (between 0.0 and 1.0)
 		using bigrams.
@@ -84,10 +63,19 @@ class ApproximateSearch(SilpaModule):
 
 		# Make a list of bigrams for both strings - - - - - - - - - - - - - - - - - -
 		#
-		for i in range(1,len(str1)):
-			bigr1.append(str1[i-1:i+1])
-		for i in range(1,len(str2)):
-			bigr2.append(str2[i-1:i+1])
+		if(syllable_search):
+			str1_syllables = self. syllabalize(str1)
+			str2_syllables = self. syllabalize(str2)
+			for i in range(1,len(str1_syllables)):
+				bigr1.append(str1_syllables[i-1:i+1])
+			for i in range(1,len(str2_syllables)):
+				bigr2.append(str2_syllables[i-1:i+1])
+		else:	
+			for i in range(1,len(str1)):
+				bigr1.append(str1[i-1:i+1])
+			for i in range(1,len(str2)):
+				bigr2.append(str2[i-1:i+1])
+
 
 		# Compute average number of bigrams - - - - - - - - - - - - - - - - - - - - -
 		#
@@ -105,11 +93,22 @@ class ApproximateSearch(SilpaModule):
 		else:
 			short_bigr = bigr2
 			long_bigr  = bigr1
-
-		for b in short_bigr:
-			if (b in long_bigr):
-				common += 1.0
-				long_bigr[long_bigr.index(b)] = []  # Mark this bigram as counted
+		if(syllable_search):
+			for b in short_bigr:
+				if (b in long_bigr):
+					if long_bigr.index(b) == short_bigr.index(b) :
+						common += 1.0
+					else:
+						dislocation=(long_bigr.index(b) - short_bigr.index(b))/ average
+						if dislocation < 0 :
+							dislocation = dislocation * -1
+						common += 1.0 - dislocation
+					long_bigr[long_bigr.index(b)] = []  # Mark this bigram as counted
+		else:
+			for b in short_bigr:
+				if (b in long_bigr):
+					common += 1.0
+					long_bigr[long_bigr.index(b)] = []  # Mark this bigram as counted
 
 		w = common / average
 		if(w>=0.6):
@@ -139,30 +138,47 @@ class ApproximateSearch(SilpaModule):
 		<form action="" method="post">
 		<textarea cols='100' rows='25' name='input_text' id='input_text'>%s</textarea>
 		<br/>
-		<input type="text" name="search_key" value="%s"/>
-		<input  type="submit" id="Hyphenate" value="Approximate Search"  name="action" style="width:12em;"/>
+		<p align="center">
+		Search :<input type="text" name="search_key" value="%s"/>
+		Algorithm : <select id="algorithm" name="algorithm"  value="%s" style="width:12em;">
+		  <option value="sb">Syllable Bigram</option>
+		  <option value="lb">Letter Bigram</option>
+		</select>
 		</br>
+		<input type="hidden" name="action" value="Approximate Search">
+		
+		<input  type="submit" id="ApproximateSearch" value="Search" style="width:12em;"/>
+		</p>
 		</form>
 		"""
+		algorithm = 'sb'	
+		if(form.has_key('algorithm')):		
+				algorithm = form['algorithm'].value
 		if(form.has_key('input_text')):
 			text = action=form['input_text'].value	.decode('utf-8')
 			if(form.has_key('search_key')):	
-				key = action=form['search_key'].value	.decode('utf-8')
-				response=response % (text,key)
+				key =form['search_key'].value	.decode('utf-8')
+				response=response % (text,key,algorithm)
 				words=text.split(" ")
 				response = response+"<h2>Search Results</h2></hr>"
 				response = response+"<p>Words in green are with exact match. Words in Yellow are with approximate Match."
 				response = response+" Move your mouse pointer over the words to get more information on matching.</p></hr>"
 			else:
 				response = response+ "Enter a string to search."
-				return response % (text,"")
+				return response % (text,"", algorithm)
 			for word in words:
 				word=word.strip()
 				if(word>""):
-					response = response+ self.bigram_search(word, key)
+					if word[0]>'0' and word[0]<'Z':
+						response = response+ self.bigram_search(word, key,False)
+					else:	
+						if algorithm == 'sb':
+							response = response+ self.bigram_search(word, key, True)
+						else:
+							response = response+ self.bigram_search(word, key, False)	
 					response = response+ "<div  style='float: left;'>&nbsp;</div>"
 		else:
-			response=response % ("","")	
+			response=response % ("","","sb")	
 		return response
 	def get_module_name(self):
 		return "Approximate Search"
diff --git a/silpa/modules/syllabalizer/syllabalizer.py b/silpa/modules/syllabalizer/syllabalizer.py
index 706ee77..39c140f 100644..100755
--- a/silpa/modules/syllabalizer/syllabalizer.py
+++ b/silpa/modules/syllabalizer/syllabalizer.py
@@ -51,6 +51,54 @@ class Syllabalizer(SilpaModule):
 					lst_chars.append(char)
 
 		return lst_chars
+	def syllabalize_kn(self,text):
+		signs = [
+		u'\u0c82', u'\u0c83', u'\u0cbd', u'\u0cbe', u'\u0cbf', u'\u0cc0', u'\u0cc1',
+		u'\u0cc2', u'\u0cc3', u'\u0cc4', u'\u0cc6', u'\u0cc7', u'\u0cc8',
+		u'\u0cca', u'\u0ccb', u'\u0ccc', u'\u0ccd']
+		limiters = ['.','\"','\'','`','!',';',',','?']
+
+		halant = u'\u0ccd'
+		lst_chars = []
+		for char in text:
+			if char in limiters:
+				lst_chars.append(char)
+			elif char in signs:
+				lst_chars[-1] = lst_chars[-1] + char
+			else:
+				try:
+					if lst_chars[-1][-1] == halant:
+						lst_chars[-1] = lst_chars[-1] + char
+					else:
+						lst_chars.append(char)
+				except IndexError:
+					lst_chars.append(char)
+
+		return lst_chars	
+	def syllabalize_bn(self,text):
+		signs = [
+		u'\u0981', u'\u0982', u'\u0983', u'\u09bd', u'\u09be', u'\u09bf', u'\u09c0', u'\u09c1',
+		u'\u09c2', u'\u09c3', u'\u09c4', u'\u09c6', u'\u09c7', u'\u09c8',
+		u'\u09ca', u'\u09cb', u'\u09cc', u'\u09cd', u'\u09d7']
+		limiters = ['.','\"','\'','`','!',';',',','?']
+
+		halant = u'\u09cd'
+		lst_chars = []
+		for char in text:
+			if char in limiters:
+				lst_chars.append(char)
+			elif char in signs:
+				lst_chars[-1] = lst_chars[-1] + char
+			else:
+				try:
+					if lst_chars[-1][-1] == halant:
+						lst_chars[-1] = lst_chars[-1] + char
+					else:
+						lst_chars.append(char)
+				except IndexError:
+					lst_chars.append(char)
+
+		return lst_chars		
 	def syllabalize_hi(self,text):
 		signs = [
 		u'\u0902', u'\u0903', u'\u093e', u'\u093f', u'\u0940', u'\u0941',
@@ -176,11 +224,19 @@ class Syllabalizer(SilpaModule):
 	def syllabalize(self,text):
 		mm=ModuleManager()
 		ld = mm.getModuleInstance("Detect Language")
-		lang=ld.detect_lang(text)[text]
+		lang = None
+		try:
+			lang=ld.detect_lang(text)[text]
+		except:
+			pass #FIXME	
 		if(lang=="ml_IN"):
 			return self.syllabalize_ml(text)
 		if(lang=="hi_IN"):
 			return self.syllabalize_hi(text)
+		if(lang=="kn_IN"):
+			return self.syllabalize_kn(text)	
+		if(lang=="bn_IN"):
+			return self.syllabalize_bn(text)		
 		if(lang=="en_US"):
 			return self.syllabalize_en(text)
 		lst_chars=[]