summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--newword.py21
1 files changed, 20 insertions, 1 deletions
diff --git a/newword.py b/newword.py
index 434a27f..50ee8aa 100644
--- a/newword.py
+++ b/newword.py
@@ -1,8 +1,9 @@
-#!/usr/bin/sqlite3
+#!/usr/bin/python3
import os
import os.path
import sqlite3
from argparse import ArgumentParser
+from math import log
import utils
from myconfig import MyConfig
from dirwalk import walkIndex
@@ -107,6 +108,24 @@ def populateBigramSqlite(workdir):
############################################################
+# Information Entropy Model #
+############################################################
+
+def computeEntropy(freqs):
+ print(freqs)
+
+ totalfreq = sum(freqs)
+ freqs = [ freq / float(totalfreq) for freq in freqs ]
+ assert 1 == sum(freqs)
+
+ entropy = sum([ - freq * log(freq) for freq in freqs ])
+ print(entropy)
+ return entropy
+
+
+
+
+############################################################
# Get Threshold Pass #
############################################################