diff options
author | Peng Wu <alexepico@gmail.com> | 2013-01-23 10:57:56 +0800 |
---|---|---|
committer | Peng Wu <alexepico@gmail.com> | 2013-01-23 10:57:56 +0800 |
commit | 670c213f286404ea8925aed000ae0fad59b6abf9 (patch) | |
tree | fcf655646524675e5038b5419b5c9ab7bfbdfeb9 | |
parent | a4d7e8bd6e562864a67a17372c1a004329af91e4 (diff) | |
download | trainer-670c213f286404ea8925aed000ae0fad59b6abf9.tar.gz trainer-670c213f286404ea8925aed000ae0fad59b6abf9.tar.xz trainer-670c213f286404ea8925aed000ae0fad59b6abf9.zip |
write computeEntropy
-rw-r--r-- | newword.py | 21 |
1 files changed, 20 insertions, 1 deletions
@@ -1,8 +1,9 @@ -#!/usr/bin/sqlite3 +#!/usr/bin/python3 import os import os.path import sqlite3 from argparse import ArgumentParser +from math import log import utils from myconfig import MyConfig from dirwalk import walkIndex @@ -107,6 +108,24 @@ def populateBigramSqlite(workdir): ############################################################ +# Information Entropy Model # +############################################################ + +def computeEntropy(freqs): + print(freqs) + + totalfreq = sum(freqs) + freqs = [ freq / float(totalfreq) for freq in freqs ] + assert 1 == sum(freqs) + + entropy = sum([ - freq * log(freq) for freq in freqs ]) + print(entropy) + return entropy + + + + +############################################################ # Get Threshold Pass # ############################################################ |