diff options
author | Peng Wu <alexepico@gmail.com> | 2011-07-27 16:56:15 +0800 |
---|---|---|
committer | Peng Wu <alexepico@gmail.com> | 2011-07-27 16:56:15 +0800 |
commit | 5b2b30242c8b07b954e7a0bf7b647c2a6e219c47 (patch) | |
tree | ddd1285134e61bd8636fdef7790cb1558e401e8f /generate.py | |
parent | b3c2761198d3a50dd1d34eab4eafea7f18650554 (diff) | |
download | trainer-5b2b30242c8b07b954e7a0bf7b647c2a6e219c47.tar.gz trainer-5b2b30242c8b07b954e7a0bf7b647c2a6e219c47.tar.xz trainer-5b2b30242c8b07b954e7a0bf7b647c2a6e219c47.zip |
fixes model size calculation generate.py
Diffstat (limited to 'generate.py')
-rwxr-xr-x | generate.py | 9 |
1 files changed, 5 insertions, 4 deletions
diff --git a/generate.py b/generate.py index ae66d79..6f98814 100755 --- a/generate.py +++ b/generate.py @@ -28,7 +28,7 @@ def generateOneText(infile, modelfile, reportfile): if not utils.check_epoch(infilestatus, 'Segment'): raise utils.EpochError('Please segment first.\n') if utils.check_epoch(infilestatus, 'Generate'): - return + return False #begin processing cmdline = ['./gen_k_mixture_model', '--maximum-occurs-allowed', \ @@ -55,6 +55,7 @@ def generateOneText(infile, modelfile, reportfile): utils.sign_epoch(infilestatus, 'Generate') utils.store_status(infilestatuspath, infilestatus) + return True #Note: should check the corpus file size, and skip the too small text file. @@ -77,7 +78,7 @@ def handleOneIndex(indexpath, subdir, indexname): modelstatus['GenerateStart'] = textnum modelstatus['GenerateEnd'] = nexttextnum utils.sign_epoch(modelstatus, 'Generate') - utils.store_status(modelstatuspath, modelstatus) + utils.store_status(modelstatuspath, modelstatus) print(indexpath, subdir, indexname) @@ -114,14 +115,14 @@ def handleOneIndex(indexpath, subdir, indexname): print("Skipping " + title + '#' + textpath) continue - aggmodelsize += infilesize modeldir = os.path.join(config.getModelDir(), subdir, indexname) os.makedirs(modeldir, exist_ok=True) modelfile = os.path.join(modeldir, \ config.getCandidateModelName(modelnum)) reportfile = modelfile + config.getReportPostfix() print("Proccessing " + title + '#' + textpath) - generateOneText(infile, modelfile, reportfile) + if generateOneText(infile, modelfile, reportfile): + aggmodelsize += infilesize print("Processed " + title + '#' + textpath) if aggmodelsize > config.getCandidateModelSize(): nexttextnum = i + 1 |