summaryrefslogtreecommitdiffstats
path: root/generate.py
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2011-07-27 16:56:15 +0800
committerPeng Wu <alexepico@gmail.com>2011-07-27 16:56:15 +0800
commit5b2b30242c8b07b954e7a0bf7b647c2a6e219c47 (patch)
treeddd1285134e61bd8636fdef7790cb1558e401e8f /generate.py
parentb3c2761198d3a50dd1d34eab4eafea7f18650554 (diff)
downloadtrainer-5b2b30242c8b07b954e7a0bf7b647c2a6e219c47.tar.gz
trainer-5b2b30242c8b07b954e7a0bf7b647c2a6e219c47.tar.xz
trainer-5b2b30242c8b07b954e7a0bf7b647c2a6e219c47.zip
fixes model size calculation generate.py
Diffstat (limited to 'generate.py')
-rwxr-xr-xgenerate.py9
1 files changed, 5 insertions, 4 deletions
diff --git a/generate.py b/generate.py
index ae66d79..6f98814 100755
--- a/generate.py
+++ b/generate.py
@@ -28,7 +28,7 @@ def generateOneText(infile, modelfile, reportfile):
if not utils.check_epoch(infilestatus, 'Segment'):
raise utils.EpochError('Please segment first.\n')
if utils.check_epoch(infilestatus, 'Generate'):
- return
+ return False
#begin processing
cmdline = ['./gen_k_mixture_model', '--maximum-occurs-allowed', \
@@ -55,6 +55,7 @@ def generateOneText(infile, modelfile, reportfile):
utils.sign_epoch(infilestatus, 'Generate')
utils.store_status(infilestatuspath, infilestatus)
+ return True
#Note: should check the corpus file size, and skip the too small text file.
@@ -77,7 +78,7 @@ def handleOneIndex(indexpath, subdir, indexname):
modelstatus['GenerateStart'] = textnum
modelstatus['GenerateEnd'] = nexttextnum
utils.sign_epoch(modelstatus, 'Generate')
- utils.store_status(modelstatuspath, modelstatus)
+ utils.store_status(modelstatuspath, modelstatus)
print(indexpath, subdir, indexname)
@@ -114,14 +115,14 @@ def handleOneIndex(indexpath, subdir, indexname):
print("Skipping " + title + '#' + textpath)
continue
- aggmodelsize += infilesize
modeldir = os.path.join(config.getModelDir(), subdir, indexname)
os.makedirs(modeldir, exist_ok=True)
modelfile = os.path.join(modeldir, \
config.getCandidateModelName(modelnum))
reportfile = modelfile + config.getReportPostfix()
print("Proccessing " + title + '#' + textpath)
- generateOneText(infile, modelfile, reportfile)
+ if generateOneText(infile, modelfile, reportfile):
+ aggmodelsize += infilesize
print("Processed " + title + '#' + textpath)
if aggmodelsize > config.getCandidateModelSize():
nexttextnum = i + 1