summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2011-07-27 18:56:01 +0800
committerPeng Wu <alexepico@gmail.com>2011-07-27 18:56:01 +0800
commitc676243f85f22ace3ca18dcb07e79db2b133f21c (patch)
tree70079abc8c27822509868706e99528f0aee610f5
parent5b2b30242c8b07b954e7a0bf7b647c2a6e219c47 (diff)
downloadtrainer-c676243f85f22ace3ca18dcb07e79db2b133f21c.tar.gz
trainer-c676243f85f22ace3ca18dcb07e79db2b133f21c.tar.xz
trainer-c676243f85f22ace3ca18dcb07e79db2b133f21c.zip
fixes estimate.py
-rwxr-xr-xestimate.py20
-rwxr-xr-xgenerate.py1
-rw-r--r--lib/myconfig.py4
3 files changed, 20 insertions, 5 deletions
diff --git a/estimate.py b/estimate.py
index eaec334..ef5c40a 100755
--- a/estimate.py
+++ b/estimate.py
@@ -22,7 +22,7 @@ def handleError(error):
sys.exit(error)
-def handleOneModel(modelfile):
+def handleOneModel(modelfile, reportfile):
modelfilestatuspath = modelfile + config.getStatusPostfix()
modelfilestatus = utils.load_status(modelfilestatuspath)
if not utils.check_epoch(modelfilestatus, 'Generate'):
@@ -30,6 +30,8 @@ def handleOneModel(modelfile):
if utils.check_epoch(modelfilestatus, 'Estimate'):
return
+ reporthandle = open(reportfile, 'wb')
+
result_line_prefix = "average lambda:"
avg_lambda = 0.
@@ -44,14 +46,21 @@ def handleOneModel(modelfile):
close_fds=True)
for line in subprocess.stdout.readlines():
+ reporthandle.writelines([line])
#remove trailing '\n'
+ line = line.decode('utf-8')
line = line.rstrip(os.linesep)
if line.startswith(result_line_prefix):
avg_lambda = float(line[len(result_line_prefix):])
- os.waitpid(subprocess.pid, 0)
+ reporthandle.close()
+
+ (pid, status) = os.waitpid(subprocess.pid, 0)
+ if status != 0:
+ sys.exit('estimate k mixture model returns error.')
#end processing
+ print('average lambda:', avg_lambda)
modelfilestatus['EstimateScore'] = avg_lambda
utils.sign_epoch(modelfilestatus, 'Estimate')
utils.store_status(modelfilestatuspath, modelfilestatus)
@@ -63,13 +72,16 @@ def walkThroughModels(path):
filepath = os.path.join(root, onefile)
if onefile.endswith(config.getModelPostfix()):
subpath = os.path.relpath(filepath, path)
+ reportfile = filepath + config.getReportPostfix()
print("Processing " + subpath)
- handleOneModel(filepath)
+ handleOneModel(filepath, reportfile)
print("Processed " + subpath)
elif onefile.endswith(config.getStatusPostfix()):
pass
elif onefile.endswith(config.getIndexPostfix()):
pass
+ elif onefile.endswith(config.getReportPostfix()):
+ pass
else:
print('Unexpected file:' + filepath)
@@ -102,6 +114,8 @@ def gatherModels(path, indexname):
pass
elif onefile.endswith(config.getIndexPostfix()):
pass
+ elif onefile.endswith(config.getReportPostfix()):
+ pass
else:
print('Unexpected file:' + filepath)
indexfile.close()
diff --git a/generate.py b/generate.py
index 6f98814..f3cc09c 100755
--- a/generate.py
+++ b/generate.py
@@ -148,6 +148,7 @@ def handleOneIndex(indexpath, subdir, indexname):
#end processing
#save current progress in status file
+ modelnum += 1
indexstatus['GenerateTextEnd'] = nexttextnum
indexstatus['GenerateModelEnd'] = modelnum
diff --git a/lib/myconfig.py b/lib/myconfig.py
index f40d2a4..a5f4eec 100644
--- a/lib/myconfig.py
+++ b/lib/myconfig.py
@@ -39,7 +39,7 @@ class MyConfig:
return self.m_evals_dir
def getEstimatesModel(self):
- estimates_model = m_tools_dir + '/data/estimates.db'
+ estimates_model = self.m_tools_dir + '/data/estimates.db'
return estimates_model
def getEstimateIndex(self):
@@ -62,7 +62,7 @@ class MyConfig:
#the trained corpus size of model candidates
def getCandidateModelSize(self):
- candidate_model_size = 11.9 * 1024 * 1024
+ candidate_model_size = 11.9 * 1024 * 1024 * 3
return candidate_model_size
def getModelPostfix(self):