diff options
| author | Peng Wu <alexepico@gmail.com> | 2013-01-23 10:57:56 +0800 |
|---|---|---|
| committer | Peng Wu <alexepico@gmail.com> | 2013-01-23 10:57:56 +0800 |
| commit | 670c213f286404ea8925aed000ae0fad59b6abf9 (patch) | |
| tree | fcf655646524675e5038b5419b5c9ab7bfbdfeb9 | |
| parent | a4d7e8bd6e562864a67a17372c1a004329af91e4 (diff) | |
| download | trainer-670c213f286404ea8925aed000ae0fad59b6abf9.tar.gz trainer-670c213f286404ea8925aed000ae0fad59b6abf9.tar.xz trainer-670c213f286404ea8925aed000ae0fad59b6abf9.zip | |
write computeEntropy
| -rw-r--r-- | newword.py | 21 |
1 files changed, 20 insertions, 1 deletions
@@ -1,8 +1,9 @@ -#!/usr/bin/sqlite3 +#!/usr/bin/python3 import os import os.path import sqlite3 from argparse import ArgumentParser +from math import log import utils from myconfig import MyConfig from dirwalk import walkIndex @@ -107,6 +108,24 @@ def populateBigramSqlite(workdir): ############################################################ +# Information Entropy Model # +############################################################ + +def computeEntropy(freqs): + print(freqs) + + totalfreq = sum(freqs) + freqs = [ freq / float(totalfreq) for freq in freqs ] + assert 1 == sum(freqs) + + entropy = sum([ - freq * log(freq) for freq in freqs ]) + print(entropy) + return entropy + + + + +############################################################ # Get Threshold Pass # ############################################################ |
