diff options
author | Peng Wu <alexepico@gmail.com> | 2013-01-23 11:24:05 +0800 |
---|---|---|
committer | Peng Wu <alexepico@gmail.com> | 2013-01-23 11:25:11 +0800 |
commit | 014f1784618f2079dcf2404eb3e0e32dffedce74 (patch) | |
tree | 1d93c7b927a23982798f8510926feabb15de50b1 | |
parent | 670c213f286404ea8925aed000ae0fad59b6abf9 (diff) | |
download | trainer-014f1784618f2079dcf2404eb3e0e32dffedce74.tar.gz trainer-014f1784618f2079dcf2404eb3e0e32dffedce74.tar.xz trainer-014f1784618f2079dcf2404eb3e0e32dffedce74.zip |
write compute*Entropy
-rw-r--r-- | newword.py | 39 |
1 files changed, 39 insertions, 0 deletions
@@ -123,6 +123,45 @@ def computeEntropy(freqs): return entropy +SELECT_PREFIX_DML = ''' +SELECT prefix, freq FROM bigram WHERE postfix = ? ; +''' + +SELECT_POSTFIX_DML = ''' +SELECT postfix, freq FROM bigram WHERE prefix = ? ; +''' + + +def computePrefixEntropy(cur, word): + print('prefix', word) + + rows = cur.execute(SELECT_PREFIX_DML, (word, )).fetchall() + if 0 == len(rows): + return 0. + + freqs = [] + for row in rows: + (prefix, freq) = row + assert freq >= 1 + freqs.append(freq) + + return computeEntropy(freqs) + + +def computePostfixEntropy(cur, word): + print('postfix', word) + + rows = cur.execute(SELECT_POSTFIX_DML, (word, )).fetchall() + if 0 == len(rows): + return 0. + + freqs = [] + for row in rows: + (postfix, freq) = row + assert freq >= 1 + freqs.append(freq) + + return computeEntropy(freqs) ############################################################ |