From 30c1b89bec818490131780389c918f8bcfb7aef6 Mon Sep 17 00:00:00 2001
From: Santhosh Thottingal <santhosh.thottingal@gmail.com>
Date: Sun, 24 May 2009 13:04:44 +0530
Subject: Miscellaneous bug fixes, hit counter, static pages

---
 silpa/common/silparesponse.py                |   3 +
 silpa/doc/credits.html                       |  14 ++++
 silpa/doc/todo.html                          |  15 ++++
 silpa/index.py                               |  31 ++++----
 silpa/modules/dictionary/dictionary.py       |  47 ++++++------
 silpa/modules/inexactsearch/inexactsearch.py | 102 ++++++++++++++++-----------
 silpa/modules/syllabalizer/syllabalizer.py   |  58 ++++++++++++++-
 silpa/utils/langdetect.py                    |  16 ++++-
 silpa/utils/silpautils.py                    |  18 +++++
 9 files changed, 226 insertions(+), 78 deletions(-)
 mode change 100644 => 100755 silpa/common/silparesponse.py
 create mode 100644 silpa/doc/credits.html
 create mode 100644 silpa/doc/todo.html
 mode change 100644 => 100755 silpa/index.py
 mode change 100644 => 100755 silpa/modules/dictionary/dictionary.py
 mode change 100644 => 100755 silpa/modules/inexactsearch/inexactsearch.py
 mode change 100644 => 100755 silpa/modules/syllabalizer/syllabalizer.py
diff --git a/silpa/common/silparesponse.py b/silpa/common/silparesponse.py
old mode 100644
new mode 100755
index 13cc5ca..5bd8af3
--- a/silpa/common/silparesponse.py
+++ b/silpa/common/silparesponse.py
@@ -18,6 +18,9 @@ class SilpaResponse:
 			html=	"<div id=\"breadcrumb\"><a href=\"http://smc.org.in/silpa\">Home</a> /"
 			html=html+navPath+"</div>"
 			self.response=self.response.replace("$$SILPA_BREADCRUMB$$",html)
+		else:
+			html=	"<div id=\"breadcrumb\"><a href=\"http://smc.org.in/silpa\">Home</a></div>"
+			self.response=self.response.replace("$$SILPA_BREADCRUMB$$",html)
 	def setContent(self,value):
 		if(value):
 			self.response=self.response.replace("$$SILPA_CONTENT$$",value)
diff --git a/silpa/doc/credits.html b/silpa/doc/credits.html
new file mode 100644
index 0000000..cc836ec
--- /dev/null
+++ b/silpa/doc/credits.html
@@ -0,0 +1,14 @@
+<h2>Credits</h2>
+Many people contributed in direct and indirect way in the development of silpa. This page attempts to list their names.
+<ul>
+<li>Baiju. M, Swathanthra Malayalam Computing for his mlsplit program for using it as a base for syllabalizer for many languages</li>
+<li>Laxminarayan Kamath for testing and feature suggestions</li>
+<li>Rajeesh Nambiar and Nishan Naseer of SMC for their contributions for Font converter</li>
+<li>Guess Language module is based on  the python implementation by Kent S Johnson of guesslanguage.cpp by Jacob R Rideout for KDE
+    http://websvn.kde.org/branches/work/sonnet-refactoring/common/nlp/guesslanguage.cpp?view=markup
+    which itself is based on Language::Guess by Maciej Ceglowski
+    http://languid.cantbedone.org/
+
+</li>
+<li>IT Mission, Kerala government for providing English-Malayalam dictionary in GPL license</li>
+</ul>
diff --git a/silpa/doc/todo.html b/silpa/doc/todo.html
new file mode 100644
index 0000000..ece066b
--- /dev/null
+++ b/silpa/doc/todo.html
@@ -0,0 +1,15 @@
+<h2>TODO List</h2>
+<ul>
+<li>Sakavarsham ,Kollavarsham and Other Indian Calenders</li>
+<li>Spellcheck</li>
+<li>Linguistical sorting</li>
+<li>Stemmer</li>
+<li>OCR</li>
+<li>TTS</li>
+<li>Support more fonts for font converter, .doc and .pdf support</li>
+<li>Anagram for remaining languages</li>
+<li>Random Quote for remaining languages</li>
+<li>Crossword generator and solver for Indian Languages</li>
+<li>More dictionaries</li>
+<li>Web APIs</li>
+</ul>
diff --git a/silpa/index.py b/silpa/index.py
old mode 100644
new mode 100755
index 8a44a15..ccffe09
--- a/silpa/index.py
+++ b/silpa/index.py
@@ -1,4 +1,4 @@
-#!/home/.laboring/smcweb/bin/python
+#!/usr/bin/python
 # -*- coding: utf-8 -*-
 
 from common import *
@@ -12,21 +12,28 @@ def index(form):
 		action=form['action'].value	
 	else:	
 		action=None
+	handleStats()	
 	response=SilpaResponse()
 	if(action):
 		module_manager=ModuleManager()
 		action=action.replace(" ","_")
-		module_instance =  module_manager.getModuleInstance(action)
-		if(module_instance):
-			response.setBreadcrumb(module_instance.get_module_name())
-			response.setContent(module_instance.process(form))
-			response.setErrorMessage(module_instance.get_errormessage())
-			response.setSuccessMessage(module_instance.get_successmessage())
-		else:
-			response.setBreadcrumb("Coming Soon")	
-			response.setErrorMessage("Module not available")	
-			response.setContent(None)
-			response.setSuccessMessage(None)
+		if action.endswith('.html') or action.endswith('.htm'):
+			response.setBreadcrumb(None)
+			response.setContent(getStaticContent(action))
+			response.setErrorMessage(None)	
+			response.setSuccessMessage(None)		
+		else:	
+			module_instance =  module_manager.getModuleInstance(action)
+			if(module_instance):
+				response.setBreadcrumb(module_instance.get_module_name())
+				response.setContent(module_instance.process(form))
+				response.setErrorMessage(module_instance.get_errormessage())
+				response.setSuccessMessage(module_instance.get_successmessage())
+			else:
+				response.setBreadcrumb("Coming Soon")	
+				response.setErrorMessage("Module not available")	
+				response.setContent(None)
+				response.setSuccessMessage(None)
 	else: #index
 		module_manager=ModuleManager()
 		response.setBreadcrumb("Welcome")	
diff --git a/silpa/modules/dictionary/dictionary.py b/silpa/modules/dictionary/dictionary.py
old mode 100644
new mode 100755
index 4de0f76..927c06c
--- a/silpa/modules/dictionary/dictionary.py
+++ b/silpa/modules/dictionary/dictionary.py
@@ -1,6 +1,6 @@
 #! /usr/bin/env python
 # -*- coding: utf-8 -*-
-# English Malayalam Dictionary
+# Dictionary
 # Copyright 2008 Santhosh Thottingal <santhosh.thottingal@gmail.com>
 # http://www.smc.org.in
 #
@@ -24,49 +24,56 @@
 
 from common import *
 import os
-import pickle
+from dictdlib import DictDB
 class Dictionary(SilpaModule):
 	
-	def lookup_en_ml(self, key):
-		key=key.lower()
-		self.dictFile=os.path.dirname(__file__) + "/data/dict.dat"
-		pickled_dict=open(self.dictFile,'r')
-		self.dictionary=pickle.load(pickled_dict)
-		meaning=""
-		if self.dictionary.has_key(key):
-			meaningList=self.dictionary[key]
-			for meaning_item in meaningList:
-				meaning=meaning+meaning_item.strip()	+"<br/>"
-		else :
-			meaning="No Meaning found"
-		return meaning.decode('utf-8')
+	def getdef(self, word, dictionary):
+		dict_dir=os.path.join(os.path.dirname(__file__), 'dictionaries')
+		dictdata=dict_dir+ "/"+dictionary
+		dict=DictDB(dictdata)
+		meanings =  dict.getdef(word)
+		meaningstring= ""
+		if (meanings==None):
+			meaningstring = "No definition found"
+			return meaningstring
+		for meaning in meanings:
+			meaningstring += meaning
+		return meaningstring.decode("utf-8")
 	def process(self,form):
 		response = """
-		<h2>English Malayalam Dictionary</h2></hr>
+		<h2>Dictionary</h2></hr>
 		<p>Enter the word to lookup in the dictionary
 		</p>
 		<form action="" method="post">
-		<input type="text" value="%s" name="word"/>
+		 <p align="center">
+		Word : <input type="text" value="%s" name="word"/>
+		Dictionary :<select id="dictionary" name="dictionary" style="width:12em;">
+		  <option value="freedict-eng-hin">English-Hindi</option>
+		  <option value="freedict-eng-mal">English-Malayalam</option>
+		</select>
 		<input type="hidden" name="action" value="Dictionary">
+		</br>
 		<input  type="submit" id="Find_Meaning" value="Find Meaning"  style="width:12em;"/>
 		</br>
+		</p>
 		</form>
 		"""
 		if(form.has_key('word')):
 			search_key = form['word'].value
+			dictionary =  form['dictionary'].value
 			response=response % search_key
 			response = response+"<h2>Search Results</h2></hr>"
 			if(search_key==None):
 				response = response+ "Enter a word to find meaning."
 			else:		
-				response = response+ self.lookup_en_ml(search_key)
+				response = response+ "<pre> "+ self.getdef(search_key,dictionary) + "</pre> "
 		else:
 			response=response % ""	
 		return response
 	def get_module_name(self):
-		return "English Malayalam Dictionary"
+		return "Dictionary"
 	def get_info(self):
-		return 	"English Malayalam Dictionary. Dictionary is compiled by Kerala state IT Mission"	
+		return 	"Bilingual Dictionaries"	
 		
 def getInstance():
 	return Dictionary()
diff --git a/silpa/modules/inexactsearch/inexactsearch.py b/silpa/modules/inexactsearch/inexactsearch.py
old mode 100644
new mode 100755
index 0d1f35b..67e47d9
--- a/silpa/modules/inexactsearch/inexactsearch.py
+++ b/silpa/modules/inexactsearch/inexactsearch.py
@@ -1,6 +1,6 @@
 #! /usr/bin/env python
 # -*- coding: utf-8 -*-
-# Paralperu
+# Approximate Search
 # Copyright 2008 Santhosh Thottingal <santhosh.thottingal@gmail.com>
 # http://www.smc.org.in
 #
@@ -28,32 +28,11 @@ from common import *
 
 class ApproximateSearch(SilpaModule):
 	
-	def syllabalize_ml(self, text):
-		signs = [
-		u'\u0d02', u'\u0d03', u'\u0d3e', u'\u0d3f', u'\u0d40', u'\u0d41',
-		u'\u0d42', u'\u0d43', u'\u0d44', u'\u0d46', u'\u0d47', u'\u0d48',
-		u'\u0d4a', u'\u0d4b', u'\u0d4c', u'\u0d4d']
-		limiters = ['.','\"','\'','`','!',';',',','?']
-
-		chandrakkala = u'\u0d4d'
-		lst_chars = []
-		for char in text:
-			if char in limiters:
-				lst_chars.append(char)
-			elif char in signs:
-				lst_chars[-1] = lst_chars[-1] + char
-			else:
-				try:
-					if lst_chars[-1][-1] == chandrakkala:
-						lst_chars[-1] = lst_chars[-1] + char
-					else:
-						lst_chars.append(char)
-				except IndexError:
-					lst_chars.append(char)
-
-		return lst_chars
-
-
+	def syllabalize(self, text):
+		mm=ModuleManager()
+		syllabalizer = mm.getModuleInstance("Syllabalize")
+		return syllabalizer.syllabalize(text)
+		
 	def bigram_search(self, str1, str2, syllable_search=False):
 		"""Return approximate string comparator measure (between 0.0 and 1.0)
 		using bigrams.
@@ -84,10 +63,19 @@ class ApproximateSearch(SilpaModule):
 
 		# Make a list of bigrams for both strings - - - - - - - - - - - - - - - - - -
 		#
-		for i in range(1,len(str1)):
-			bigr1.append(str1[i-1:i+1])
-		for i in range(1,len(str2)):
-			bigr2.append(str2[i-1:i+1])
+		if(syllable_search):
+			str1_syllables = self. syllabalize(str1)
+			str2_syllables = self. syllabalize(str2)
+			for i in range(1,len(str1_syllables)):
+				bigr1.append(str1_syllables[i-1:i+1])
+			for i in range(1,len(str2_syllables)):
+				bigr2.append(str2_syllables[i-1:i+1])
+		else:	
+			for i in range(1,len(str1)):
+				bigr1.append(str1[i-1:i+1])
+			for i in range(1,len(str2)):
+				bigr2.append(str2[i-1:i+1])
+
 
 		# Compute average number of bigrams - - - - - - - - - - - - - - - - - - - - -
 		#
@@ -105,11 +93,22 @@ class ApproximateSearch(SilpaModule):
 		else:
 			short_bigr = bigr2
 			long_bigr  = bigr1
-
-		for b in short_bigr:
-			if (b in long_bigr):
-				common += 1.0
-				long_bigr[long_bigr.index(b)] = []  # Mark this bigram as counted
+		if(syllable_search):
+			for b in short_bigr:
+				if (b in long_bigr):
+					if long_bigr.index(b) == short_bigr.index(b) :
+						common += 1.0
+					else:
+						dislocation=(long_bigr.index(b) - short_bigr.index(b))/ average
+						if dislocation < 0 :
+							dislocation = dislocation * -1
+						common += 1.0 - dislocation
+					long_bigr[long_bigr.index(b)] = []  # Mark this bigram as counted
+		else:
+			for b in short_bigr:
+				if (b in long_bigr):
+					common += 1.0
+					long_bigr[long_bigr.index(b)] = []  # Mark this bigram as counted
 
 		w = common / average
 		if(w>=0.6):
@@ -139,30 +138,47 @@ class ApproximateSearch(SilpaModule):
 		<form action="" method="post">
 		<textarea cols='100' rows='25' name='input_text' id='input_text'>%s</textarea>
 		<br/>
-		<input type="text" name="search_key" value="%s"/>
-		<input  type="submit" id="Hyphenate" value="Approximate Search"  name="action" style="width:12em;"/>
+		<p align="center">
+		Search :<input type="text" name="search_key" value="%s"/>
+		Algorithm : <select id="algorithm" name="algorithm"  value="%s" style="width:12em;">
+		  <option value="sb">Syllable Bigram</option>
+		  <option value="lb">Letter Bigram</option>
+		</select>
 		</br>
+		<input type="hidden" name="action" value="Approximate Search">
+		
+		<input  type="submit" id="ApproximateSearch" value="Search" style="width:12em;"/>
+		</p>
 		</form>
 		"""
+		algorithm = 'sb'	
+		if(form.has_key('algorithm')):		
+				algorithm = form['algorithm'].value
 		if(form.has_key('input_text')):
 			text = action=form['input_text'].value	.decode('utf-8')
 			if(form.has_key('search_key')):	
-				key = action=form['search_key'].value	.decode('utf-8')
-				response=response % (text,key)
+				key =form['search_key'].value	.decode('utf-8')
+				response=response % (text,key,algorithm)
 				words=text.split(" ")
 				response = response+"<h2>Search Results</h2></hr>"
 				response = response+"<p>Words in green are with exact match. Words in Yellow are with approximate Match."
 				response = response+" Move your mouse pointer over the words to get more information on matching.</p></hr>"
 			else:
 				response = response+ "Enter a string to search."
-				return response % (text,"")
+				return response % (text,"", algorithm)
 			for word in words:
 				word=word.strip()
 				if(word>""):
-					response = response+ self.bigram_search(word, key)
+					if word[0]>'0' and word[0]<'Z':
+						response = response+ self.bigram_search(word, key,False)
+					else:	
+						if algorithm == 'sb':
+							response = response+ self.bigram_search(word, key, True)
+						else:
+							response = response+ self.bigram_search(word, key, False)	
 					response = response+ "<div  style='float: left;'>&nbsp;</div>"
 		else:
-			response=response % ("","")	
+			response=response % ("","","sb")	
 		return response
 	def get_module_name(self):
 		return "Approximate Search"
diff --git a/silpa/modules/syllabalizer/syllabalizer.py b/silpa/modules/syllabalizer/syllabalizer.py
old mode 100644
new mode 100755
index 706ee77..39c140f
--- a/silpa/modules/syllabalizer/syllabalizer.py
+++ b/silpa/modules/syllabalizer/syllabalizer.py
@@ -51,6 +51,54 @@ class Syllabalizer(SilpaModule):
 					lst_chars.append(char)
 
 		return lst_chars
+	def syllabalize_kn(self,text):
+		signs = [
+		u'\u0c82', u'\u0c83', u'\u0cbd', u'\u0cbe', u'\u0cbf', u'\u0cc0', u'\u0cc1',
+		u'\u0cc2', u'\u0cc3', u'\u0cc4', u'\u0cc6', u'\u0cc7', u'\u0cc8',
+		u'\u0cca', u'\u0ccb', u'\u0ccc', u'\u0ccd']
+		limiters = ['.','\"','\'','`','!',';',',','?']
+
+		halant = u'\u0ccd'
+		lst_chars = []
+		for char in text:
+			if char in limiters:
+				lst_chars.append(char)
+			elif char in signs:
+				lst_chars[-1] = lst_chars[-1] + char
+			else:
+				try:
+					if lst_chars[-1][-1] == halant:
+						lst_chars[-1] = lst_chars[-1] + char
+					else:
+						lst_chars.append(char)
+				except IndexError:
+					lst_chars.append(char)
+
+		return lst_chars	
+	def syllabalize_bn(self,text):
+		signs = [
+		u'\u0981', u'\u0982', u'\u0983', u'\u09bd', u'\u09be', u'\u09bf', u'\u09c0', u'\u09c1',
+		u'\u09c2', u'\u09c3', u'\u09c4', u'\u09c6', u'\u09c7', u'\u09c8',
+		u'\u09ca', u'\u09cb', u'\u09cc', u'\u09cd', u'\u09d7']
+		limiters = ['.','\"','\'','`','!',';',',','?']
+
+		halant = u'\u09cd'
+		lst_chars = []
+		for char in text:
+			if char in limiters:
+				lst_chars.append(char)
+			elif char in signs:
+				lst_chars[-1] = lst_chars[-1] + char
+			else:
+				try:
+					if lst_chars[-1][-1] == halant:
+						lst_chars[-1] = lst_chars[-1] + char
+					else:
+						lst_chars.append(char)
+				except IndexError:
+					lst_chars.append(char)
+
+		return lst_chars		
 	def syllabalize_hi(self,text):
 		signs = [
 		u'\u0902', u'\u0903', u'\u093e', u'\u093f', u'\u0940', u'\u0941',
@@ -176,11 +224,19 @@ class Syllabalizer(SilpaModule):
 	def syllabalize(self,text):
 		mm=ModuleManager()
 		ld = mm.getModuleInstance("Detect Language")
-		lang=ld.detect_lang(text)[text]
+		lang = None
+		try:
+			lang=ld.detect_lang(text)[text]
+		except:
+			pass #FIXME	
 		if(lang=="ml_IN"):
 			return self.syllabalize_ml(text)
 		if(lang=="hi_IN"):
 			return self.syllabalize_hi(text)
+		if(lang=="kn_IN"):
+			return self.syllabalize_kn(text)	
+		if(lang=="bn_IN"):
+			return self.syllabalize_bn(text)		
 		if(lang=="en_US"):
 			return self.syllabalize_en(text)
 		lst_chars=[]
diff --git a/silpa/utils/langdetect.py b/silpa/utils/langdetect.py
index 727ea6d..2ed7c6f 100644
--- a/silpa/utils/langdetect.py
+++ b/silpa/utils/langdetect.py
@@ -21,27 +21,39 @@ class LangDetect(SilpaModule):
 				index = 0
 				while index < length:
 					letter=word[index]
+					if not letter.isalpha():
+						index=index+1	
+						continue
 					if ((letter >= u'ം') &  (letter <=u'൯')):
 						result_dict[word]= "ml_IN"
+						break;
 					if ((letter >= u'ঁ') &  (letter <= u'৺')):
 						result_dict[word]= "bn_IN"
+						break
 					if ((letter >= u'ँ') &  (letter <= u'ॿ')):
 						result_dict[word]= "hi_IN"
+						break
 					if ((letter >=u'ઁ') &  (letter <= u'૱')):
 						result_dict[word]= "gu_IN"
+						break
 					if ((letter >= u'ਁ') &  (letter <=u'ੴ')):
 						result_dict[word]= "pa_IN"
+						break
 					if ((letter >= u'ಂ') &  (letter <=u'ೲ')):
-						result_dict[word]= "ka_IN"
+						result_dict[word]= "kn_IN"
+						break
 					if ((letter >= u'ଁ') &  (letter <= u'ୱ')):
 						result_dict[word]= "or_IN"
+						break
 					if ((letter >=u'ஂ') &  (letter <= u'௺')):
 						result_dict[word]= "ta_IN"
+						break
 					if ((letter >=u'ఁ') &  (letter <= u'౯')):
 						result_dict[word]= "te_IN"
+						break
 					if ((letter <= u'z')):
 						result_dict[word]= "en_US"
-						
+						break
 					index=index+1	
 			word_iter=word_iter+1	
 		return result_dict
diff --git a/silpa/utils/silpautils.py b/silpa/utils/silpautils.py
index 02556af..25a3df8 100644
--- a/silpa/utils/silpautils.py
+++ b/silpa/utils/silpautils.py
@@ -14,6 +14,24 @@ def getModulesList():
 		if(item.startswith("SILPA_ACTION.")):
 			action_dict[item.replace("SILPA_ACTION.","")]=conf_dict[item]
 	return 	action_dict	
+def getStaticContent(page):
+	try:
+		return open("doc/"+page).read()
+	except:
+		return "Could not find the requested page "+	page
+def handleStats():
+	Hits="0"	
+	try:
+		InFile = open("count.dat", "r")	# Text file with total hits
+		Hits = InFile.readline()
+	except:
+		pass	
+	x = int(Hits) + 1
+	h = str(x)
+	OutFile = open("count.dat", "w")
+	OutFile.write(str(x))
+	OutFile.close()
+
 def loadConfiguration():
 	conf_dict={}
 	conffile = codecs. open("silpa.conf",encoding='utf-8', errors='ignore')
-- 
cgit