summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--docs/fileformat2
-rwxr-xr-xgenerate.py4
-rw-r--r--lib/myconfig.py12
3 files changed, 15 insertions, 3 deletions
diff --git a/docs/fileformat b/docs/fileformat
index d86099d..e147644 100644
--- a/docs/fileformat
+++ b/docs/fileformat
@@ -45,7 +45,7 @@ Status File Format
<sub-directory>#model-candidates-<num>.db#<score>
The lines are sorted by <score>.
Prune Status Files
- 1. 'merged.db', 'pruned.db', 'kmm.text', 'interpolation.text' are generated when running prune tools in 'finals/try<num>' sub-directory.
+ 1. 'merged.db', 'kmm_merged.text' , 'pruned.db', 'kmm_pruned.text', 'interpolation.text' are generated when running prune tools in 'finals/try<num>' sub-directory.
2. 'prune.status' file are generated also, like:
{'PruneEpoch': 4, 'PruneMergeNumber': 1000,
'PruneK':2, 'PruneCDF': 0.6}
diff --git a/generate.py b/generate.py
index f60904c..9e86ab2 100755
--- a/generate.py
+++ b/generate.py
@@ -81,7 +81,7 @@ def handleOneIndex(indexpath, subdir, indexname):
oneline = oneline.rstrip(os.linesep)
(title, textpath) = oneline.split('#')
infile = config.getTextDir() + textpath
- infilesize = utils.get_file_length(infile)
+ infilesize = utils.get_file_length(infile + config.getSegmentPostfix())
if infilesize < config.getMinimumFileSize():
print("Skipping " + title + '#' + textpath)
continue
@@ -117,7 +117,7 @@ def handleOneIndex(indexpath, subdir, indexname):
os.unlink(modelfile)
if os.access(reportfile, os.F_OK):
os.unlink(reportfile)
- #save current process in status file
+ #save current progress in status file
indexstatus['GenerateTextEnd'] = nexttextnum
indexstatus['GenerateModelEnd'] = modelnum
utils.store_status(indexstatuspath, indexstatus)
diff --git a/lib/myconfig.py b/lib/myconfig.py
index 38a3e53..efed55c 100644
--- a/lib/myconfig.py
+++ b/lib/myconfig.py
@@ -38,6 +38,18 @@ class MyConfig:
def getEvalsDir(self):
return self.m_evals_dir
+ m_estimates_model = \
+ '/media/data/Program/trainer/tools/libpinyin/data/estimates.db'
+
+ def getEstimatesModel(self):
+ return self.m_estimates_model
+
+ m_evals_text = \
+ '/media/data/Program/trainer/tools/libpinyin/data/evals.text'
+
+ def getEvalsText(self):
+ return self.m_evals_text
+
#about 1,200 Chinese characters
m_minimum_chinese_characters = 1200
m_minimum_file_size = m_minimum_chinese_characters * 3 + \