From 1966705ee7edc8296e629ee5bb526864a8428faf Mon Sep 17 00:00:00 2001 From: Peng Wu Date: Tue, 26 Jul 2011 14:14:30 +0800 Subject: write eval rate --- evaluate.py | 165 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ lib/myconfig.py | 2 +- tryprune.py | 3 +- 3 files changed, 168 insertions(+), 2 deletions(-) create mode 100755 evaluate.py diff --git a/evaluate.py b/evaluate.py new file mode 100755 index 0000000..b46c9fa --- /dev/null +++ b/evaluate.py @@ -0,0 +1,165 @@ +#!/usr/bin/python3 +import os +import os.path +import shutil +import sys +from subprocess import Popen, PIPE +from argparse import ArgumentParser +import utils +from myconfig import MyConfig + + +#Please `make -f Makefile.data prepare` first + +config = MyConfig() + +#change cwd to the libpinyin evals tool directory +libpinyindir = config.getEvalsDir() +os.chdir(libpinyindir) + +datafiles = [ 'gb_char.table', 'gbk_char.table', \ + config.getFinalModelFileName(), 'evals.text', \ + 'deleted_bigram.db'] + +def checkData(): + cwd = os.getcwd() + os.chdir(os.path.join(libpinyindir, 'data')) + for onefile in datafiles: + if not os.access(onefile, os.F_OK): + sys.exit('missing one data file:' + onefile) + os.chdir(cwd) + +def cleanUpData(): + #begin processing + cmdline = ['/usr/bin/make', '-f', 'Makefile.data', 'clean'] + subprocess = Popen(cmdline, shell=False, close_fds=True) + (pid, status) = os.waitpid(subprocess.pid, 0) + if status != 0: + sys.exit('make clean for data files failed.') + #end processing + +def buildData(): + #begin processing + cmdline = ['/usr/bin/make', '-f', 'Makefile.data', 'build'] + subprocess = Popen(cmdline, shell=False, close_fds=True) + (pid, status) = os.waitpid(subprocess.pid, 0) + if status != 0: + sys.exit('make build for data files failed.') + #end processing + +def estimateModel(): + #change to utils/training subdir + cwd = os.getcwd() + os.chdir(os.path.join(libpinyindir, 'utils', 'training')) + + result_line_prefix = "average lambda:" + avg_lambda = 0. + + #begin processing + cmdline = ['./estimate_interpolation'] + + subprocess = Popen(cmdline, shell=False, stdout=PIPE, \ + close_fds=True) + + for line in subprocess.stdout.readlines(): + #remove trailing '\n' + line = line.rstrip(os.linesep) + if line.startswith(result_line_prefix): + avg_lambda = float(line[len(result_line_prefix):]) + + os.waitpid(subprocess.pid, 0) + #end processing + + os.chdir(cwd) + return avg_lambda + +def modifyCodeforLambda(lambdaparam): + #begin processing + cmdline = ['/usr/bin/make', '-f', 'Makefile.data', 'rebuild', \ + 'LAMBDA_PARAMETER=' + lambdaparam] + subprocess = Popen(cmdline, shell=False, close_fds=True) + (pid, status) = os.waitpid(subprocess.pid, 0) + if status != 0: + sys.exit('make rebuild for data files failed.') + #end processing + +def evaluateModel(): + #change to utils/training subdir + cwd = os.getcwd() + os.chdir(os.path.join(libpinyindir, 'utils', 'training')) + + result_line_prefix = "correction rate:" + rate = 0. + + #begin processing + cmdline = ['./eval_correction_rate'] + + subprocess = Popen(cmdline, shell=False, stdout=PIPE, \ + close_fds=True) + + for line in subprocess.stdout.readlines(): + #remove training '\n' + line = line.rstrip(os.linesep) + if line.startswith(result_line_prefix): + rate = float(line[len(result_line_prefix):]) + + os.waitpid(subprocess.pid, 0) + #end processing + + os.chdir(cwd) + return rate + +if __name__ == '__main__': + parser = ArgumentParser(description='Evaluate correction rate.') + parser.add_argument('--finaldir', action='store', \ + help='final directory', \ + default=config.getFinalModelDir()) + parser.add_argument('tryname', action='store', \ + help='the storage directory') + + args = parser.parse_args() + print(args) + tryname = 'try' + args.tryname + + trydir = os.path.join(args.finaldir, tryname) + if not os.access(trydir, os.F_OK): + sys.exit(tryname + "doesn't exist.") + + cwdstatuspath = os.path.join(trydir, config.getFinalStatusFileName()) + cwdstatus = utils.load_status(cwdstatuspath) + if not utils.check_epoch(cwdstatus, 'Prune'): + raise utils.EpochError('Please tryprune first.') + + if utils.check_epoch(cwdstatus, 'Evaluate'): + sys.exit('already evaluated.') + + modelfile = os.path.join(trydir, config.getFinalModelFileName()) + destfile = os.path.join(libpinyindir, 'data', \ + config.getFinalModelFileName()) + print('copying from ' + modelfile + ' to ' + destfile) + shutil.copyfile(modelfile, destfile) + + print('checking') + checkData() + print('cleaning') + cleanUpData() + print('building') + buildData() + print('estimating') + avg_lambda = estimateModel() + + cwdstatus['EvaluateAverageLambda'] = avg_lambda + utils.store_status(cwdstatuspath, cwdstatus) + + print('rebuilding') + modifyCodeforLambda(avg_lambda) + print('evaluating') + rate = evaluateModel() + print(tryname + "'s correction rate:" + rate) + + cwdstatus['EvaluateCorrectionRate'] = rate + utils.store_status(cwdstatuspath, cwdstatus) + + utils.sign_epoch(cwdstatus, 'Evaluate'); + utils.store_status(cwdstatuspath, cwdstatus) + print('done') diff --git a/lib/myconfig.py b/lib/myconfig.py index b39d012..da29bb7 100644 --- a/lib/myconfig.py +++ b/lib/myconfig.py @@ -49,7 +49,7 @@ class MyConfig: return 'estimate.sorted.index' def getEvalsText(self): - evals_text = m_tools_dir + '/data/evals.text' + evals_text = m_evals_dir + '/data/evals.text' return evals_text def getMinimumFileSize(self): diff --git a/tryprune.py b/tryprune.py index 459e4ed..41a45e9 100755 --- a/tryprune.py +++ b/tryprune.py @@ -158,7 +158,7 @@ if __name__ == '__main__': #check try directory if os.access(trydir, os.F_OK): - sys.exit('try' + tryname + ' exists.') + sys.exit(tryname + ' exists.') os.makedirs(trydir) cwdstatuspath = os.path.join(trydir, config.getFinalStatusFileName()) @@ -201,3 +201,4 @@ if __name__ == '__main__': #sign status epoch utils.sign_epoch(cwdstatus, 'Prune') utils.store_status(cwdstatuspath, cwdstatus) + print('done') -- cgit