From 30c1b89bec818490131780389c918f8bcfb7aef6 Mon Sep 17 00:00:00 2001 From: Santhosh Thottingal Date: Sun, 24 May 2009 13:04:44 +0530 Subject: Miscellaneous bug fixes, hit counter, static pages --- silpa/common/silparesponse.py | 3 + silpa/doc/credits.html | 14 ++++ silpa/doc/todo.html | 15 ++++ silpa/index.py | 31 ++++---- silpa/modules/dictionary/dictionary.py | 47 ++++++------ silpa/modules/inexactsearch/inexactsearch.py | 102 ++++++++++++++++----------- silpa/modules/syllabalizer/syllabalizer.py | 58 ++++++++++++++- silpa/utils/langdetect.py | 16 ++++- silpa/utils/silpautils.py | 18 +++++ 9 files changed, 226 insertions(+), 78 deletions(-) mode change 100644 => 100755 silpa/common/silparesponse.py create mode 100644 silpa/doc/credits.html create mode 100644 silpa/doc/todo.html mode change 100644 => 100755 silpa/index.py mode change 100644 => 100755 silpa/modules/dictionary/dictionary.py mode change 100644 => 100755 silpa/modules/inexactsearch/inexactsearch.py mode change 100644 => 100755 silpa/modules/syllabalizer/syllabalizer.py diff --git a/silpa/common/silparesponse.py b/silpa/common/silparesponse.py old mode 100644 new mode 100755 index 13cc5ca..5bd8af3 --- a/silpa/common/silparesponse.py +++ b/silpa/common/silparesponse.py @@ -18,6 +18,9 @@ class SilpaResponse: html= "
Home /" html=html+navPath+"
" self.response=self.response.replace("$$SILPA_BREADCRUMB$$",html) + else: + html= "
Home
" + self.response=self.response.replace("$$SILPA_BREADCRUMB$$",html) def setContent(self,value): if(value): self.response=self.response.replace("$$SILPA_CONTENT$$",value) diff --git a/silpa/doc/credits.html b/silpa/doc/credits.html new file mode 100644 index 0000000..cc836ec --- /dev/null +++ b/silpa/doc/credits.html @@ -0,0 +1,14 @@ +

Credits

+Many people contributed in direct and indirect way in the development of silpa. This page attempts to list their names. + diff --git a/silpa/doc/todo.html b/silpa/doc/todo.html new file mode 100644 index 0000000..ece066b --- /dev/null +++ b/silpa/doc/todo.html @@ -0,0 +1,15 @@ +

TODO List

+ diff --git a/silpa/index.py b/silpa/index.py old mode 100644 new mode 100755 index 8a44a15..ccffe09 --- a/silpa/index.py +++ b/silpa/index.py @@ -1,4 +1,4 @@ -#!/home/.laboring/smcweb/bin/python +#!/usr/bin/python # -*- coding: utf-8 -*- from common import * @@ -12,21 +12,28 @@ def index(form): action=form['action'].value else: action=None + handleStats() response=SilpaResponse() if(action): module_manager=ModuleManager() action=action.replace(" ","_") - module_instance = module_manager.getModuleInstance(action) - if(module_instance): - response.setBreadcrumb(module_instance.get_module_name()) - response.setContent(module_instance.process(form)) - response.setErrorMessage(module_instance.get_errormessage()) - response.setSuccessMessage(module_instance.get_successmessage()) - else: - response.setBreadcrumb("Coming Soon") - response.setErrorMessage("Module not available") - response.setContent(None) - response.setSuccessMessage(None) + if action.endswith('.html') or action.endswith('.htm'): + response.setBreadcrumb(None) + response.setContent(getStaticContent(action)) + response.setErrorMessage(None) + response.setSuccessMessage(None) + else: + module_instance = module_manager.getModuleInstance(action) + if(module_instance): + response.setBreadcrumb(module_instance.get_module_name()) + response.setContent(module_instance.process(form)) + response.setErrorMessage(module_instance.get_errormessage()) + response.setSuccessMessage(module_instance.get_successmessage()) + else: + response.setBreadcrumb("Coming Soon") + response.setErrorMessage("Module not available") + response.setContent(None) + response.setSuccessMessage(None) else: #index module_manager=ModuleManager() response.setBreadcrumb("Welcome") diff --git a/silpa/modules/dictionary/dictionary.py b/silpa/modules/dictionary/dictionary.py old mode 100644 new mode 100755 index 4de0f76..927c06c --- a/silpa/modules/dictionary/dictionary.py +++ b/silpa/modules/dictionary/dictionary.py @@ -1,6 +1,6 @@ #! /usr/bin/env python # -*- coding: utf-8 -*- -# English Malayalam Dictionary +# Dictionary # Copyright 2008 Santhosh Thottingal # http://www.smc.org.in # @@ -24,49 +24,56 @@ from common import * import os -import pickle +from dictdlib import DictDB class Dictionary(SilpaModule): - def lookup_en_ml(self, key): - key=key.lower() - self.dictFile=os.path.dirname(__file__) + "/data/dict.dat" - pickled_dict=open(self.dictFile,'r') - self.dictionary=pickle.load(pickled_dict) - meaning="" - if self.dictionary.has_key(key): - meaningList=self.dictionary[key] - for meaning_item in meaningList: - meaning=meaning+meaning_item.strip() +"
" - else : - meaning="No Meaning found" - return meaning.decode('utf-8') + def getdef(self, word, dictionary): + dict_dir=os.path.join(os.path.dirname(__file__), 'dictionaries') + dictdata=dict_dir+ "/"+dictionary + dict=DictDB(dictdata) + meanings = dict.getdef(word) + meaningstring= "" + if (meanings==None): + meaningstring = "No definition found" + return meaningstring + for meaning in meanings: + meaningstring += meaning + return meaningstring.decode("utf-8") def process(self,form): response = """ -

English Malayalam Dictionary

+

Dictionary

Enter the word to lookup in the dictionary

- +

+ Word : + Dictionary : +

+

""" if(form.has_key('word')): search_key = form['word'].value + dictionary = form['dictionary'].value response=response % search_key response = response+"

Search Results

" if(search_key==None): response = response+ "Enter a word to find meaning." else: - response = response+ self.lookup_en_ml(search_key) + response = response+ "
 "+ self.getdef(search_key,dictionary) + "
" else: response=response % "" return response def get_module_name(self): - return "English Malayalam Dictionary" + return "Dictionary" def get_info(self): - return "English Malayalam Dictionary. Dictionary is compiled by Kerala state IT Mission" + return "Bilingual Dictionaries" def getInstance(): return Dictionary() diff --git a/silpa/modules/inexactsearch/inexactsearch.py b/silpa/modules/inexactsearch/inexactsearch.py old mode 100644 new mode 100755 index 0d1f35b..67e47d9 --- a/silpa/modules/inexactsearch/inexactsearch.py +++ b/silpa/modules/inexactsearch/inexactsearch.py @@ -1,6 +1,6 @@ #! /usr/bin/env python # -*- coding: utf-8 -*- -# Paralperu +# Approximate Search # Copyright 2008 Santhosh Thottingal # http://www.smc.org.in # @@ -28,32 +28,11 @@ from common import * class ApproximateSearch(SilpaModule): - def syllabalize_ml(self, text): - signs = [ - u'\u0d02', u'\u0d03', u'\u0d3e', u'\u0d3f', u'\u0d40', u'\u0d41', - u'\u0d42', u'\u0d43', u'\u0d44', u'\u0d46', u'\u0d47', u'\u0d48', - u'\u0d4a', u'\u0d4b', u'\u0d4c', u'\u0d4d'] - limiters = ['.','\"','\'','`','!',';',',','?'] - - chandrakkala = u'\u0d4d' - lst_chars = [] - for char in text: - if char in limiters: - lst_chars.append(char) - elif char in signs: - lst_chars[-1] = lst_chars[-1] + char - else: - try: - if lst_chars[-1][-1] == chandrakkala: - lst_chars[-1] = lst_chars[-1] + char - else: - lst_chars.append(char) - except IndexError: - lst_chars.append(char) - - return lst_chars - - + def syllabalize(self, text): + mm=ModuleManager() + syllabalizer = mm.getModuleInstance("Syllabalize") + return syllabalizer.syllabalize(text) + def bigram_search(self, str1, str2, syllable_search=False): """Return approximate string comparator measure (between 0.0 and 1.0) using bigrams. @@ -84,10 +63,19 @@ class ApproximateSearch(SilpaModule): # Make a list of bigrams for both strings - - - - - - - - - - - - - - - - - - # - for i in range(1,len(str1)): - bigr1.append(str1[i-1:i+1]) - for i in range(1,len(str2)): - bigr2.append(str2[i-1:i+1]) + if(syllable_search): + str1_syllables = self. syllabalize(str1) + str2_syllables = self. syllabalize(str2) + for i in range(1,len(str1_syllables)): + bigr1.append(str1_syllables[i-1:i+1]) + for i in range(1,len(str2_syllables)): + bigr2.append(str2_syllables[i-1:i+1]) + else: + for i in range(1,len(str1)): + bigr1.append(str1[i-1:i+1]) + for i in range(1,len(str2)): + bigr2.append(str2[i-1:i+1]) + # Compute average number of bigrams - - - - - - - - - - - - - - - - - - - - - # @@ -105,11 +93,22 @@ class ApproximateSearch(SilpaModule): else: short_bigr = bigr2 long_bigr = bigr1 - - for b in short_bigr: - if (b in long_bigr): - common += 1.0 - long_bigr[long_bigr.index(b)] = [] # Mark this bigram as counted + if(syllable_search): + for b in short_bigr: + if (b in long_bigr): + if long_bigr.index(b) == short_bigr.index(b) : + common += 1.0 + else: + dislocation=(long_bigr.index(b) - short_bigr.index(b))/ average + if dislocation < 0 : + dislocation = dislocation * -1 + common += 1.0 - dislocation + long_bigr[long_bigr.index(b)] = [] # Mark this bigram as counted + else: + for b in short_bigr: + if (b in long_bigr): + common += 1.0 + long_bigr[long_bigr.index(b)] = [] # Mark this bigram as counted w = common / average if(w>=0.6): @@ -139,30 +138,47 @@ class ApproximateSearch(SilpaModule):

- - +

+ Search : + Algorithm :
+ + + +

""" + algorithm = 'sb' + if(form.has_key('algorithm')): + algorithm = form['algorithm'].value if(form.has_key('input_text')): text = action=form['input_text'].value .decode('utf-8') if(form.has_key('search_key')): - key = action=form['search_key'].value .decode('utf-8') - response=response % (text,key) + key =form['search_key'].value .decode('utf-8') + response=response % (text,key,algorithm) words=text.split(" ") response = response+"

Search Results

" response = response+"

Words in green are with exact match. Words in Yellow are with approximate Match." response = response+" Move your mouse pointer over the words to get more information on matching.

" else: response = response+ "Enter a string to search." - return response % (text,"") + return response % (text,"", algorithm) for word in words: word=word.strip() if(word>""): - response = response+ self.bigram_search(word, key) + if word[0]>'0' and word[0]<'Z': + response = response+ self.bigram_search(word, key,False) + else: + if algorithm == 'sb': + response = response+ self.bigram_search(word, key, True) + else: + response = response+ self.bigram_search(word, key, False) response = response+ "
 
" else: - response=response % ("","") + response=response % ("","","sb") return response def get_module_name(self): return "Approximate Search" diff --git a/silpa/modules/syllabalizer/syllabalizer.py b/silpa/modules/syllabalizer/syllabalizer.py old mode 100644 new mode 100755 index 706ee77..39c140f --- a/silpa/modules/syllabalizer/syllabalizer.py +++ b/silpa/modules/syllabalizer/syllabalizer.py @@ -51,6 +51,54 @@ class Syllabalizer(SilpaModule): lst_chars.append(char) return lst_chars + def syllabalize_kn(self,text): + signs = [ + u'\u0c82', u'\u0c83', u'\u0cbd', u'\u0cbe', u'\u0cbf', u'\u0cc0', u'\u0cc1', + u'\u0cc2', u'\u0cc3', u'\u0cc4', u'\u0cc6', u'\u0cc7', u'\u0cc8', + u'\u0cca', u'\u0ccb', u'\u0ccc', u'\u0ccd'] + limiters = ['.','\"','\'','`','!',';',',','?'] + + halant = u'\u0ccd' + lst_chars = [] + for char in text: + if char in limiters: + lst_chars.append(char) + elif char in signs: + lst_chars[-1] = lst_chars[-1] + char + else: + try: + if lst_chars[-1][-1] == halant: + lst_chars[-1] = lst_chars[-1] + char + else: + lst_chars.append(char) + except IndexError: + lst_chars.append(char) + + return lst_chars + def syllabalize_bn(self,text): + signs = [ + u'\u0981', u'\u0982', u'\u0983', u'\u09bd', u'\u09be', u'\u09bf', u'\u09c0', u'\u09c1', + u'\u09c2', u'\u09c3', u'\u09c4', u'\u09c6', u'\u09c7', u'\u09c8', + u'\u09ca', u'\u09cb', u'\u09cc', u'\u09cd', u'\u09d7'] + limiters = ['.','\"','\'','`','!',';',',','?'] + + halant = u'\u09cd' + lst_chars = [] + for char in text: + if char in limiters: + lst_chars.append(char) + elif char in signs: + lst_chars[-1] = lst_chars[-1] + char + else: + try: + if lst_chars[-1][-1] == halant: + lst_chars[-1] = lst_chars[-1] + char + else: + lst_chars.append(char) + except IndexError: + lst_chars.append(char) + + return lst_chars def syllabalize_hi(self,text): signs = [ u'\u0902', u'\u0903', u'\u093e', u'\u093f', u'\u0940', u'\u0941', @@ -176,11 +224,19 @@ class Syllabalizer(SilpaModule): def syllabalize(self,text): mm=ModuleManager() ld = mm.getModuleInstance("Detect Language") - lang=ld.detect_lang(text)[text] + lang = None + try: + lang=ld.detect_lang(text)[text] + except: + pass #FIXME if(lang=="ml_IN"): return self.syllabalize_ml(text) if(lang=="hi_IN"): return self.syllabalize_hi(text) + if(lang=="kn_IN"): + return self.syllabalize_kn(text) + if(lang=="bn_IN"): + return self.syllabalize_bn(text) if(lang=="en_US"): return self.syllabalize_en(text) lst_chars=[] diff --git a/silpa/utils/langdetect.py b/silpa/utils/langdetect.py index 727ea6d..2ed7c6f 100644 --- a/silpa/utils/langdetect.py +++ b/silpa/utils/langdetect.py @@ -21,27 +21,39 @@ class LangDetect(SilpaModule): index = 0 while index < length: letter=word[index] + if not letter.isalpha(): + index=index+1 + continue if ((letter >= u'ം') & (letter <=u'൯')): result_dict[word]= "ml_IN" + break; if ((letter >= u'ঁ') & (letter <= u'৺')): result_dict[word]= "bn_IN" + break if ((letter >= u'ँ') & (letter <= u'ॿ')): result_dict[word]= "hi_IN" + break if ((letter >=u'ઁ') & (letter <= u'૱')): result_dict[word]= "gu_IN" + break if ((letter >= u'ਁ') & (letter <=u'ੴ')): result_dict[word]= "pa_IN" + break if ((letter >= u'ಂ') & (letter <=u'ೲ')): - result_dict[word]= "ka_IN" + result_dict[word]= "kn_IN" + break if ((letter >= u'ଁ') & (letter <= u'ୱ')): result_dict[word]= "or_IN" + break if ((letter >=u'ஂ') & (letter <= u'௺')): result_dict[word]= "ta_IN" + break if ((letter >=u'ఁ') & (letter <= u'౯')): result_dict[word]= "te_IN" + break if ((letter <= u'z')): result_dict[word]= "en_US" - + break index=index+1 word_iter=word_iter+1 return result_dict diff --git a/silpa/utils/silpautils.py b/silpa/utils/silpautils.py index 02556af..25a3df8 100644 --- a/silpa/utils/silpautils.py +++ b/silpa/utils/silpautils.py @@ -14,6 +14,24 @@ def getModulesList(): if(item.startswith("SILPA_ACTION.")): action_dict[item.replace("SILPA_ACTION.","")]=conf_dict[item] return action_dict +def getStaticContent(page): + try: + return open("doc/"+page).read() + except: + return "Could not find the requested page "+ page +def handleStats(): + Hits="0" + try: + InFile = open("count.dat", "r") # Text file with total hits + Hits = InFile.readline() + except: + pass + x = int(Hits) + 1 + h = str(x) + OutFile = open("count.dat", "w") + OutFile.write(str(x)) + OutFile.close() + def loadConfiguration(): conf_dict={} conffile = codecs. open("silpa.conf",encoding='utf-8', errors='ignore') -- cgit