diff options
-rw-r--r-- | docs/fileformat | 2 | ||||
-rwxr-xr-x | generate.py | 4 | ||||
-rw-r--r-- | lib/myconfig.py | 12 |
3 files changed, 15 insertions, 3 deletions
diff --git a/docs/fileformat b/docs/fileformat index d86099d..e147644 100644 --- a/docs/fileformat +++ b/docs/fileformat @@ -45,7 +45,7 @@ Status File Format <sub-directory>#model-candidates-<num>.db#<score> The lines are sorted by <score>. Prune Status Files - 1. 'merged.db', 'pruned.db', 'kmm.text', 'interpolation.text' are generated when running prune tools in 'finals/try<num>' sub-directory. + 1. 'merged.db', 'kmm_merged.text' , 'pruned.db', 'kmm_pruned.text', 'interpolation.text' are generated when running prune tools in 'finals/try<num>' sub-directory. 2. 'prune.status' file are generated also, like: {'PruneEpoch': 4, 'PruneMergeNumber': 1000, 'PruneK':2, 'PruneCDF': 0.6} diff --git a/generate.py b/generate.py index f60904c..9e86ab2 100755 --- a/generate.py +++ b/generate.py @@ -81,7 +81,7 @@ def handleOneIndex(indexpath, subdir, indexname): oneline = oneline.rstrip(os.linesep) (title, textpath) = oneline.split('#') infile = config.getTextDir() + textpath - infilesize = utils.get_file_length(infile) + infilesize = utils.get_file_length(infile + config.getSegmentPostfix()) if infilesize < config.getMinimumFileSize(): print("Skipping " + title + '#' + textpath) continue @@ -117,7 +117,7 @@ def handleOneIndex(indexpath, subdir, indexname): os.unlink(modelfile) if os.access(reportfile, os.F_OK): os.unlink(reportfile) - #save current process in status file + #save current progress in status file indexstatus['GenerateTextEnd'] = nexttextnum indexstatus['GenerateModelEnd'] = modelnum utils.store_status(indexstatuspath, indexstatus) diff --git a/lib/myconfig.py b/lib/myconfig.py index 38a3e53..efed55c 100644 --- a/lib/myconfig.py +++ b/lib/myconfig.py @@ -38,6 +38,18 @@ class MyConfig: def getEvalsDir(self): return self.m_evals_dir + m_estimates_model = \ + '/media/data/Program/trainer/tools/libpinyin/data/estimates.db' + + def getEstimatesModel(self): + return self.m_estimates_model + + m_evals_text = \ + '/media/data/Program/trainer/tools/libpinyin/data/evals.text' + + def getEvalsText(self): + return self.m_evals_text + #about 1,200 Chinese characters m_minimum_chinese_characters = 1200 m_minimum_file_size = m_minimum_chinese_characters * 3 + \ |