summaryrefslogtreecommitdiffstats
path: root/estimate.py
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2011-07-23 20:23:40 +0800
committerPeng Wu <alexepico@gmail.com>2011-07-23 20:23:40 +0800
commitf2cce92f790dbceef5bbbbefb6d6989f17701628 (patch)
tree72191ccd5f875068b6e352129ea1adfdd6baf3b1 /estimate.py
parent4f45286b35c94f1bd7e2048a543787f5e9f103bc (diff)
downloadtrainer-f2cce92f790dbceef5bbbbefb6d6989f17701628.tar.gz
trainer-f2cce92f790dbceef5bbbbefb6d6989f17701628.tar.xz
trainer-f2cce92f790dbceef5bbbbefb6d6989f17701628.zip
begin to write estimate.py
Diffstat (limited to 'estimate.py')
-rw-r--r--estimate.py89
1 files changed, 84 insertions, 5 deletions
diff --git a/estimate.py b/estimate.py
index c36ee03..e621dcb 100644
--- a/estimate.py
+++ b/estimate.py
@@ -1,19 +1,98 @@
#!/usr/bin/python3
+import os
+import os.path
import sys
+from subprocess import Popen, PIPE
+from argparse import ArgumentParser
+import utils
+from myconfig import MyConfig
+
+
+config = MyConfig()
+
+#change cwd to the libpinyin utils/training directory
+libpinyin_dir = config.getToolsDir()
+libpinyin_sub_dir = os.path.join(libpinyin_dir, 'utils', 'training')
+os.chdir(libpinyin_sub_dir)
+#chdir done
def handleError(error):
sys.exit(error)
def handleOneModel(modelfile):
- pass
+ modelfilestatuspath = modelfile + config.getStatusPostfix()
+ modelfilestatus = utils.load_status(modelfilestatuspath)
+ if not utils.check_epoch(modelfilestatus, 'Generate'):
+ raise utils.EpochError('Please generate first.\n')
+ if utils.check_epoch(modelfilestatus, 'Estimate'):
+ return
+
+ result_line_prefix = "average lambda:"
+ avg_lambda = 0.
+
+ #begin processing
+ cmdline = ['./estimate_k_mixture_model', \
+ '--deleted-bigram-file', \
+ config.getEstimatesModel(), \
+ '--bigram-file', \
+ modelfile]
+
+ subprocess = Popen(cmdline, shell=False, stdout=PIPE, \
+ close_fds= True)
+
+ for line in subprocess.stdout.readlines():
+ #remove trailing '\n'
+ line = line.rstrip(os.linesep)
+ if line.startswith(result_line_prefix):
+ avg_lambda = float(line[len(result_line_prefix):])
+
+ os.waitpid(subprocess.pid, 0)
+ #end processing
+
+ modelfilestatus['EstimateScore'] = avg_lambda
+ utils.sign_epoch(modelfilestatus, 'Estimate')
+ utils.store_status(modelfilestatuspath, modelfilestatus)
def walkThroughModels(path):
- pass
+ for root, dirs, files in os.walk(path, topdown=True, onerror=handleError):
+ for onefile in files:
+ filepath = os.path.join(root, onefile)
+ if onefile.endswith(config.getModelPostfix()):
+ handleOneModel(filepath)
+ elif onefile.endswith(config.getStatusPostfix()):
+ pass
+ elif onefile.endswith(config.getIndexPostfix()):
+ pass
+ else:
+ print('Unexpected file:' + filepath)
-def gatherModels(indexfile):
- pass
+def gatherModels(path, indexname):
+ indexfile = open(indexname, "w")
+ for root, dirs, files in os.walk(path, topdown=True, onerror=handleError):
+ for onefile in files:
+ filepath = os.path.join(root, onefile)
+ if onefile.endswith(config.getModelPostfix()):
+ #append one record to index file
+ subdir = os.path.relpath(root, path)
+ statusfilepath = filepath + config.getStatusPostfix()
+ status = utils.load_status(statusfilepath)
+ if not (utils.check_epoch(status, 'Estimate') and \
+ 'EstimateScore' in status):
+ raise utils.EpochError('Unknown Error:\n' + \
+ 'Try re-run estimate.\n')
+ avg_lambda = status['EstimateScore']
+ line = subdir + '#' + onefile + '#' + avg_lambda
+ indexfile.writelines([line])
+ #record written
+ elif onefile.endswith(config.getStatusPostfix()):
+ pass
+ elif onefile.endswith(config.getIndexPostfix()):
+ pass
+ else:
+ print('Unexpected file:' + filepath)
+ indexfile.close()
-def sortModels(indexfile, sortedindexfile):
+def sortModels(indexfilename, sortedindexfilename):
pass
if __name__ == '__main__':