From 1966705ee7edc8296e629ee5bb526864a8428faf Mon Sep 17 00:00:00 2001
From: Peng Wu <alexepico@gmail.com>
Date: Tue, 26 Jul 2011 14:14:30 +0800
Subject: write eval rate

---
 evaluate.py     | 165 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 lib/myconfig.py |   2 +-
 tryprune.py     |   3 +-
 3 files changed, 168 insertions(+), 2 deletions(-)
 create mode 100755 evaluate.py

diff --git a/evaluate.py b/evaluate.py
new file mode 100755
index 0000000..b46c9fa
--- /dev/null
+++ b/evaluate.py
@@ -0,0 +1,165 @@
+#!/usr/bin/python3
+import os
+import os.path
+import shutil
+import sys
+from subprocess import Popen, PIPE
+from argparse import ArgumentParser
+import utils
+from myconfig import MyConfig
+
+
+#Please `make -f Makefile.data prepare` first
+
+config = MyConfig()
+
+#change cwd to the libpinyin evals tool directory
+libpinyindir = config.getEvalsDir()
+os.chdir(libpinyindir)
+
+datafiles = [ 'gb_char.table',  'gbk_char.table', \
+                  config.getFinalModelFileName(), 'evals.text', \
+                  'deleted_bigram.db']
+
+def checkData():
+    cwd = os.getcwd()
+    os.chdir(os.path.join(libpinyindir, 'data'))
+    for onefile in datafiles:
+        if not os.access(onefile, os.F_OK):
+            sys.exit('missing one data file:' + onefile)
+    os.chdir(cwd)
+
+def cleanUpData():
+    #begin processing
+    cmdline = ['/usr/bin/make', '-f', 'Makefile.data', 'clean']
+    subprocess = Popen(cmdline, shell=False, close_fds=True)
+    (pid, status) = os.waitpid(subprocess.pid, 0)
+    if status != 0:
+        sys.exit('make clean for data files failed.')
+    #end processing
+
+def buildData():
+    #begin processing
+    cmdline = ['/usr/bin/make', '-f', 'Makefile.data', 'build']
+    subprocess = Popen(cmdline, shell=False, close_fds=True)
+    (pid, status) = os.waitpid(subprocess.pid, 0)
+    if status != 0:
+        sys.exit('make build for data files failed.')
+    #end processing
+
+def estimateModel():
+    #change to utils/training subdir
+    cwd = os.getcwd()
+    os.chdir(os.path.join(libpinyindir, 'utils', 'training'))
+
+    result_line_prefix = "average lambda:"
+    avg_lambda = 0.
+
+    #begin processing
+    cmdline = ['./estimate_interpolation']
+
+    subprocess = Popen(cmdline, shell=False, stdout=PIPE, \
+                           close_fds=True)
+
+    for line in subprocess.stdout.readlines():
+        #remove trailing '\n'
+        line = line.rstrip(os.linesep)
+        if line.startswith(result_line_prefix):
+            avg_lambda = float(line[len(result_line_prefix):])
+
+    os.waitpid(subprocess.pid, 0)
+    #end processing
+
+    os.chdir(cwd)
+    return avg_lambda
+
+def modifyCodeforLambda(lambdaparam):
+    #begin processing
+    cmdline = ['/usr/bin/make', '-f', 'Makefile.data', 'rebuild', \
+                   'LAMBDA_PARAMETER=' + lambdaparam]
+    subprocess = Popen(cmdline, shell=False, close_fds=True)
+    (pid, status) = os.waitpid(subprocess.pid, 0)
+    if status != 0:
+        sys.exit('make rebuild for data files failed.')
+    #end processing
+
+def evaluateModel():
+    #change to utils/training subdir
+    cwd = os.getcwd()
+    os.chdir(os.path.join(libpinyindir, 'utils', 'training'))
+
+    result_line_prefix = "correction rate:"
+    rate = 0.
+
+    #begin processing
+    cmdline = ['./eval_correction_rate']
+
+    subprocess = Popen(cmdline, shell=False, stdout=PIPE, \
+                           close_fds=True)
+
+    for line in subprocess.stdout.readlines():
+        #remove training '\n'
+        line = line.rstrip(os.linesep)
+        if line.startswith(result_line_prefix):
+            rate = float(line[len(result_line_prefix):])
+
+    os.waitpid(subprocess.pid, 0)
+    #end processing
+
+    os.chdir(cwd)
+    return rate
+
+if __name__ == '__main__':
+    parser = ArgumentParser(description='Evaluate correction rate.')
+    parser.add_argument('--finaldir', action='store', \
+                            help='final directory', \
+                            default=config.getFinalModelDir())
+    parser.add_argument('tryname', action='store', \
+                            help='the storage directory')
+
+    args = parser.parse_args()
+    print(args)
+    tryname = 'try' + args.tryname
+
+    trydir = os.path.join(args.finaldir, tryname)
+    if not os.access(trydir, os.F_OK):
+        sys.exit(tryname + "doesn't exist.")
+
+    cwdstatuspath = os.path.join(trydir, config.getFinalStatusFileName())
+    cwdstatus = utils.load_status(cwdstatuspath)
+    if not utils.check_epoch(cwdstatus, 'Prune'):
+        raise utils.EpochError('Please tryprune first.')
+
+    if utils.check_epoch(cwdstatus, 'Evaluate'):
+        sys.exit('already evaluated.')
+
+    modelfile = os.path.join(trydir, config.getFinalModelFileName())
+    destfile = os.path.join(libpinyindir, 'data', \
+                                config.getFinalModelFileName())
+    print('copying from ' + modelfile + ' to ' + destfile)
+    shutil.copyfile(modelfile, destfile)
+
+    print('checking')
+    checkData()
+    print('cleaning')
+    cleanUpData()
+    print('building')
+    buildData()
+    print('estimating')
+    avg_lambda = estimateModel()
+
+    cwdstatus['EvaluateAverageLambda'] = avg_lambda
+    utils.store_status(cwdstatuspath, cwdstatus)
+
+    print('rebuilding')
+    modifyCodeforLambda(avg_lambda)
+    print('evaluating')
+    rate = evaluateModel()
+    print(tryname + "'s correction rate:" + rate)
+
+    cwdstatus['EvaluateCorrectionRate'] = rate
+    utils.store_status(cwdstatuspath, cwdstatus)
+    
+    utils.sign_epoch(cwdstatus, 'Evaluate');
+    utils.store_status(cwdstatuspath, cwdstatus)
+    print('done')
diff --git a/lib/myconfig.py b/lib/myconfig.py
index b39d012..da29bb7 100644
--- a/lib/myconfig.py
+++ b/lib/myconfig.py
@@ -49,7 +49,7 @@ class MyConfig:
         return 'estimate.sorted.index'
 
     def getEvalsText(self):
-        evals_text = m_tools_dir + '/data/evals.text'
+        evals_text = m_evals_dir + '/data/evals.text'
         return evals_text
 
     def getMinimumFileSize(self):
diff --git a/tryprune.py b/tryprune.py
index 459e4ed..41a45e9 100755
--- a/tryprune.py
+++ b/tryprune.py
@@ -158,7 +158,7 @@ if __name__ == '__main__':
 
     #check try<name> directory
     if os.access(trydir, os.F_OK):
-        sys.exit('try' + tryname + ' exists.')
+        sys.exit(tryname + ' exists.')
 
     os.makedirs(trydir)
     cwdstatuspath = os.path.join(trydir, config.getFinalStatusFileName())
@@ -201,3 +201,4 @@ if __name__ == '__main__':
     #sign status epoch
     utils.sign_epoch(cwdstatus, 'Prune')
     utils.store_status(cwdstatuspath, cwdstatus)
+    print('done')
-- 
cgit