From 670c213f286404ea8925aed000ae0fad59b6abf9 Mon Sep 17 00:00:00 2001 From: Peng Wu Date: Wed, 23 Jan 2013 10:57:56 +0800 Subject: write computeEntropy --- newword.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/newword.py b/newword.py index 434a27f..50ee8aa 100644 --- a/newword.py +++ b/newword.py @@ -1,8 +1,9 @@ -#!/usr/bin/sqlite3 +#!/usr/bin/python3 import os import os.path import sqlite3 from argparse import ArgumentParser +from math import log import utils from myconfig import MyConfig from dirwalk import walkIndex @@ -106,6 +107,24 @@ def populateBigramSqlite(workdir): ngram_conn.close() +############################################################ +# Information Entropy Model # +############################################################ + +def computeEntropy(freqs): + print(freqs) + + totalfreq = sum(freqs) + freqs = [ freq / float(totalfreq) for freq in freqs ] + assert 1 == sum(freqs) + + entropy = sum([ - freq * log(freq) for freq in freqs ]) + print(entropy) + return entropy + + + + ############################################################ # Get Threshold Pass # ############################################################ -- cgit