From ee0f42962618bb4d3b607cc18248957c72f18961 Mon Sep 17 00:00:00 2001 From: Santhosh Thottingal Date: Tue, 12 May 2009 20:36:02 +0530 Subject: Removed the dependency on command line access of dictd daemon by using python dictd library --- bots/dictdlib.py | 377 ++++++++++++++++++++++++++++++++++++++++++++++++++++ bots/eng-mal-bot.py | 53 ++++---- 2 files changed, 403 insertions(+), 27 deletions(-) create mode 100644 bots/dictdlib.py (limited to 'bots') diff --git a/bots/dictdlib.py b/bots/dictdlib.py new file mode 100644 index 0000000..e1d9036 --- /dev/null +++ b/bots/dictdlib.py @@ -0,0 +1,377 @@ +# Dictionary creation library +# Copyright (C) 2002 John Goerzen +# +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +import sys, string, gzip, os + +b64_list = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" +url_headword = "00-database-url" +short_headword = "00-database-short" +info_headword = "00-database-info" + + +def b64_encode(val): + """Takes as input an integer val and returns a string of it encoded + with the base64 algorithm used by dict indexes.""" + startfound = 0 + retval = "" + for i in range(5, -1, -1): + thispart = (val >> (6 * i)) & ((2 ** 6) - 1) + if (not startfound) and (not thispart): + # Both zero -- keep going. + continue + startfound = 1 + retval += b64_list[thispart] + if len(retval): + return retval + else: + return b64_list[0] + +def b64_decode(str): + """Takes as input a string and returns an integer value of it decoded + with the base64 algorithm used by dict indexes.""" + if not len(str): + return 0 + retval = 0 + shiftval = 0 + for i in range(len(str) - 1, -1, -1): + val = b64_list.index(str[i]) + retval = retval | (val << shiftval) + shiftval += 6 + return retval + +validdict = {} +for x in string.ascii_letters + string.digits + " \t": + validdict[x] = 1 + +def sortnormalize(x): + """Returns a value such that x is mapped to a format that sorts properly + with standard comparison.""" + x2 = '' + for i in range(len(x)): + if validdict.has_key(x[i]): + x2 += x[i] + return x2.upper() + "\0" + x.upper() + +def sortfunc(x, y): + """Emulate sort -df.""" + xl = x.split("\0") + yl = y.split("\0") + ret = cmp(xl[0], yl[0]) + if ret != 0: + return ret + return cmp(xl[1], yl[1]) + +class DictDB: + def __init__(self, basename, mode = 'read', quiet = 0): + #, url = 'unknown', shortname = 'unknown', + # longinfo = 'unknown', quiet = 0): + """Initialize a DictDB object. + + Mode must be one of: + + read -- read-only access + + write -- write-only access, truncates existing files, does not work + with .dz. dict created if nonexistant. + + update -- read/write access, dict created if nonexistant. Does not + work with .dz. + + Read can read dict or dict.dz files. Write and update will NOT work + with dict.dz files. + + If quiet is nonzero, status messages + will be suppressed.""" + + self.mode = mode + self.quiet = quiet + self.indexentries = {} + self.count = 0 + self.basename = basename + + self.indexfilename = self.basename + ".index" + if os.path.isfile(self.basename + ".dict.dz"): + self.dictfilename = self.basename + ".dict.dz" + self.usecompression = 1 + else: + self.dictfilename = self.basename + ".dict" + self.usecompression = 0 + + if mode == 'read': + self.indexfile = open(self.indexfilename, "rt") + if self.usecompression: + self.dictfile = gzip.GzipFile(self.dictfilename, "rb") + else: + self.dictfile = open(self.dictfilename, "rb") + self._initindex() + elif mode == 'write': + self.indexfile = open(self.indexfilename, "wt") + if self.usecompression: + raise ValueError, "'write' mode incompatible with .dz files" + else: + self.dictfile = open(self.dictfilename, "wb") + elif mode == 'update': + try: + self.indexfile = open(self.indexfilename, "r+b") + except IOError: + self.indexfile = open(self.indexfilename, "w+b") + if self.usecompression: + # Open it read-only since we don't support mods. + self.dictfile = gzip.GzipFile(self.dictfilename, "rb") + else: + try: + self.dictfile = open(self.dictfilename, "r+b") + except IOError: + self.dictfile = open(self.dictfilename, "w+b") + self._initindex() + else: + raise ValueError, "mode must be 'read', 'write', or 'update'" + + #self.writeentry(url_headword + "\n " + url, [url_headword]) + #self.writeentry(short_headword + "\n " + shortname, + # [short_headword]) + #self.writeentry(info_headword + "\n" + longinfo, [info_headword]) + + def _initindex(self): + """Load the entire index off disk into memory.""" + self.indexfile.seek(0) + for line in self.indexfile.xreadlines(): + splits = line.rstrip().split("\t") + if not self.indexentries.has_key(splits[0]): + self.indexentries[splits[0]] = [] + self.indexentries[splits[0]].append([b64_decode(splits[1]), + b64_decode(splits[2])]) + + def addindexentry(self, word, start, size): + """Adds an entry to the index. word is the relevant word. + start is the starting position in the dictionary and size is the + size of the definition; both are integers.""" + if not self.indexentries.has_key(word): + self.indexentries[word] = [] + self.indexentries[word].append([start, size]) + + def delindexentry(self, word, start = None, size = None): + """Removes an entry from the index; word is the word to search for. + + start and size are optional. If they are specified, only index + entries matching the specified values will be removed. + + For instance, if word is "foo" and start and size are not specified, + all index entries for the word foo will be removed. If start and size + are specified, only those entries matching all criteria will be + removed. + + This function does not actually remove the data from the .dict file. + Therefore, information removed by this function will still + exist on-disk in the .dict file, but the dict server will just + not "see" it -- there will be no way to get to it anymore. + + Returns a count of the deleted entries.""" + + if not self.indexentries.has_key(word): + return 0 + retval = 0 + entrylist = self.indexentries[word] + for i in range(len(entrylist) - 1, -1, -1): + # Go backwords so the del doesn't effect the index. + if (start == None or start == entrylist[i][0]) and \ + (size == None or size == entrylist[i][1]): + del(entrylist[i]) + retval += 1 + if len(entrylist) == 0: # If we emptied it, del it completely + del(self.indexentries[word]) + return retval + + def update(self, string): + """Writes string out, if not quiet.""" + if not self.quiet: + sys.stdout.write(string) + sys.stdout.flush() + + def seturl(self, url): + """Sets the URL attribute of this database. If there was + already a URL specified, we will use delindexentry() on it + first.""" + self.delindexentry(url_headword) + self.addentry(url_headword + "\n " + url, [url_headword]) + + def setshortname(self, shortname): + """Sets the shortname for this database. If there was already + a shortname specified, we will use delindexentry() on it first.""" + self.delindexentry(short_headword) + self.addentry(short_headword + "\n " + shortname, + [short_headword]) + + def setlonginfo(self, longinfo): + """Sets the extended information for this database. If there was + already long info specified, we will use delindexentry() on it + first.""" + self.delindexentry(info_headword) + self.addentry(info_headword + "\n" + longinfo, [info_headword]) + + + def addentry(self, defstr, headwords): + """Writes an entry. defstr holds the content of the definition. + headwords is a list specifying one or more words under which this + definition should be indexed. This function always adds \\n + to the end of defstr.""" + self.dictfile.seek(0, 2) # Seek to end of file + start = self.dictfile.tell() + defstr += "\n" + self.dictfile.write(defstr) + for word in headwords: + self.addindexentry(word, start, len(defstr)) + self.count += 1 + + if self.count % 1000 == 0: + self.update("Processed %d records\r" % self.count) + + def finish(self, dosort = 1): + """Called to finish the writing process. + **REQUIRED IF OPENED WITH 'update' OR 'write' MODES**. + This will write the index and close the files. + + dosort is optional and defaults to true. If set to false, + dictlib will not sort the index file. In this case, you + MUST manually sort it through "sort -df" before it can be used.""" + + self.update("Processed %d records.\n" % self.count) + + if dosort: + self.update("Sorting index: converting") + + indexlist = [] + for word, defs in self.indexentries.items(): + for thisdef in defs: + indexlist.append("%s\t%s\t%s" % (word, + b64_encode(thisdef[0]), + b64_encode(thisdef[1]))) + + self.update(" mapping") + + sortmap = {} + for entry in indexlist: + norm = sortnormalize(entry) + if sortmap.has_key(norm): + sortmap[norm].append(entry) + sortmap[norm].sort(sortfunc) + else: + sortmap[norm] = [entry] + + self.update(" listing") + + normalizedentries = sortmap.keys() + + self.update(" sorting") + + normalizedentries.sort() + + self.update(" re-mapping") + indexlist = [] + + for normentry in normalizedentries: + for entry in sortmap[normentry]: + indexlist.append(entry) + + self.update(", done.\n") + + self.update("Writing index...\n") + + self.indexfile.seek(0) + + for entry in indexlist: + self.indexfile.write(entry + "\n") + + if self.mode == 'update': + # In case things were deleted + self.indexfile.truncate() + self.indexfile.close() + self.dictfile.close() + + self.update("Complete.\n") + + def getdeflist(self): + """Returns a list of strings naming all definitions contained + in this dictionary.""" + return self.indexentries.keys() + + def hasdef(self, word): + return self.indexentries.has_key(word) + + def getdef(self, word): + """Given a definition name, returns a list of strings with all + matching definitions. This is an *exact* match, not a + case-insensitive one. Returns [] if word is not in the dictionary.""" + retval = [] + if not self.hasdef(word): + return retval + for start, length in self.indexentries[word]: + self.dictfile.seek(start) + retval.append(self.dictfile.read(length)) + return retval + + +class DictReader: + """This object provides compatibility with earlier versions + of dictdlib. It is now deprecated.""" + + def __init__(self, basename): + """Initialize a DictReader object. Provide it with the basename.""" + self.dictdb = DictDB(basename, 'read') + + def getdeflist(self): + """Returns a list of strings naming all definitions contained + in this dictionary.""" + return self.dictdb.getdeflist() + + def getdef(self, defname): + """Given a definition name, returns a list of strings + with all matching definitions.""" + return self.dictdb.getdef(defname) + +class DictWriter: + """This object provides compatibility with earlier versions + of dictdlib. It is now deprecated.""" + + def __init__(self, basename, url = 'unknown', shortname = 'unknown', + longinfo = 'unknown', quiet = 0): + """Initialize a DictWriter object. Will create 'basename.dict' and + 'basename.index' files. url, shortname, and longinfo specify the + respective attributes of the database. If quiet is 1, + status messages are not printed.""" + self.dictdb = DictDB(basename, 'write', quiet) + self.dictdb.seturl(url) + self.dictdb.setshortname(shortname) + self.dictdb.setlonginfo(longinfo) + + def writeentry(self, defstr, headwords): + """Writes an entry. defstr holds the content of the definition. + headwords is a list specifying one or more words under which this + definition should be indexed. This function always adds \\n + to the end of defstr.""" + self.dictdb.addentry(defstr, headwords) + + def finish(self, dosort = 1): + """Called to finish the writing process. **REQUIRED**. + This will write the index and close the files. + + dosort is optional and defaults to true. If set to false, + dictlib will not sort the index file. In this case, you + MUST manually sort it through "sort -df" before it can be used.""" + self.dictdb.finish(dosort) + diff --git a/bots/eng-mal-bot.py b/bots/eng-mal-bot.py index 3b3ffac..844fe29 100755 --- a/bots/eng-mal-bot.py +++ b/bots/eng-mal-bot.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- # -# eng-mal-bot.py A Jabbe buddy bot which provide eng-mal dictionary lookup service +# eng-mal-bot.py A Jabber buddy bot which provide eng-mal dictionary lookup service # # Copyright (c) 2009 # Santhosh Thottingal @@ -23,10 +23,11 @@ import xmpp from xmpp.protocol import * import os import commands +from dictdlib import DictDB options = { 'JID': 'eng.mal.dict@gmail.com', - 'Password': '*******', + 'Password': 'eng.mal.', #This is fake password. } class ConnectionError: pass @@ -68,37 +69,35 @@ class Bot: except KeyboardInterrupt: pass - def messageHandler(self, conn,mess_node): - - if(mess_node.getBody()): - command = "dict --database dict-en-ml '" + mess_node.getBody() +"'" - output = commands.getoutput(command) - if output.find('No definitions found') is not -1: - print "No definitions found" - conn.send( xmpp.Message( mess_node.getFrom(),'No Definitions Found')) - else : - print "definition found" - conn.send( xmpp.Message( mess_node.getFrom() ,output)) + def messageHandler(self, conn,message_node): + word = message_node.getBody() + if word : + output = self.getdef(word) + conn.send( xmpp.Message( message_node.getFrom() ,output)) raise NodeProcessed # This stanza is fully processed - - + def getdef(self, word): + en_ml_db = None + try: + #search the dictionary in same directory of program + en_ml_db = DictDB("freedict-eng-mal") + except: + #retry in standard directory of dictd + en_ml_db = DictDB("/usr/share/dictd/freedict-eng-mal") + if en_ml_db == None: + return "[FATAL ERROR] Dictionary not found." + try: + return en_ml_db.getdef(word)[0] + except: + return "No definitions found" + def presenceHandler(self, conn, presence): - '''Auto authorizing chat invites''' if presence: - if presence.getType()=='subscribe': - jid = presence.getFrom().getStripped() - self.connection.getRoster().Authorize(jid) + if presence.getType() == 'subscribe': + jabber_id = presence.getFrom().getStripped() + self.connection.getRoster().Authorize(jabber_id) - targetJID='node@domain.org' - print presence.getFrom() - if presence.getFrom().bareMatch(targetJID): - # play a sound - pass - - - bot = Bot(**options) bot.loop() -- cgit