summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2013-01-23 10:57:56 +0800
committerPeng Wu <alexepico@gmail.com>2013-01-23 10:57:56 +0800
commit670c213f286404ea8925aed000ae0fad59b6abf9 (patch)
treefcf655646524675e5038b5419b5c9ab7bfbdfeb9
parenta4d7e8bd6e562864a67a17372c1a004329af91e4 (diff)
downloadtrainer-670c213f286404ea8925aed000ae0fad59b6abf9.tar.gz
trainer-670c213f286404ea8925aed000ae0fad59b6abf9.tar.xz
trainer-670c213f286404ea8925aed000ae0fad59b6abf9.zip
write computeEntropy
-rw-r--r--newword.py21
1 files changed, 20 insertions, 1 deletions
diff --git a/newword.py b/newword.py
index 434a27f..50ee8aa 100644
--- a/newword.py
+++ b/newword.py
@@ -1,8 +1,9 @@
-#!/usr/bin/sqlite3
+#!/usr/bin/python3
import os
import os.path
import sqlite3
from argparse import ArgumentParser
+from math import log
import utils
from myconfig import MyConfig
from dirwalk import walkIndex
@@ -107,6 +108,24 @@ def populateBigramSqlite(workdir):
############################################################
+# Information Entropy Model #
+############################################################
+
+def computeEntropy(freqs):
+ print(freqs)
+
+ totalfreq = sum(freqs)
+ freqs = [ freq / float(totalfreq) for freq in freqs ]
+ assert 1 == sum(freqs)
+
+ entropy = sum([ - freq * log(freq) for freq in freqs ])
+ print(entropy)
+ return entropy
+
+
+
+
+############################################################
# Get Threshold Pass #
############################################################