summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2013-01-23 11:24:05 +0800
committerPeng Wu <alexepico@gmail.com>2013-01-23 11:25:11 +0800
commit014f1784618f2079dcf2404eb3e0e32dffedce74 (patch)
tree1d93c7b927a23982798f8510926feabb15de50b1
parent670c213f286404ea8925aed000ae0fad59b6abf9 (diff)
downloadtrainer-014f1784618f2079dcf2404eb3e0e32dffedce74.tar.gz
trainer-014f1784618f2079dcf2404eb3e0e32dffedce74.tar.xz
trainer-014f1784618f2079dcf2404eb3e0e32dffedce74.zip
write compute*Entropy
-rw-r--r--newword.py39
1 files changed, 39 insertions, 0 deletions
diff --git a/newword.py b/newword.py
index 50ee8aa..05c67aa 100644
--- a/newword.py
+++ b/newword.py
@@ -123,6 +123,45 @@ def computeEntropy(freqs):
return entropy
+SELECT_PREFIX_DML = '''
+SELECT prefix, freq FROM bigram WHERE postfix = ? ;
+'''
+
+SELECT_POSTFIX_DML = '''
+SELECT postfix, freq FROM bigram WHERE prefix = ? ;
+'''
+
+
+def computePrefixEntropy(cur, word):
+ print('prefix', word)
+
+ rows = cur.execute(SELECT_PREFIX_DML, (word, )).fetchall()
+ if 0 == len(rows):
+ return 0.
+
+ freqs = []
+ for row in rows:
+ (prefix, freq) = row
+ assert freq >= 1
+ freqs.append(freq)
+
+ return computeEntropy(freqs)
+
+
+def computePostfixEntropy(cur, word):
+ print('postfix', word)
+
+ rows = cur.execute(SELECT_POSTFIX_DML, (word, )).fetchall()
+ if 0 == len(rows):
+ return 0.
+
+ freqs = []
+ for row in rows:
+ (postfix, freq) = row
+ assert freq >= 1
+ freqs.append(freq)
+
+ return computeEntropy(freqs)
############################################################