diff options
| author | Peng Wu <alexepico@gmail.com> | 2011-07-21 18:20:08 +0800 |
|---|---|---|
| committer | Peng Wu <alexepico@gmail.com> | 2011-07-21 18:20:08 +0800 |
| commit | abead147cd1eea3ff7ba8b34055374a8ad812464 (patch) | |
| tree | eb3d5c457a5aaf89b2437baa9ee9cd3b01d55145 | |
| parent | eea694d78b09818af1d3bba432bfa97262db8c2a (diff) | |
| download | trainer-abead147cd1eea3ff7ba8b34055374a8ad812464.tar.gz trainer-abead147cd1eea3ff7ba8b34055374a8ad812464.tar.xz trainer-abead147cd1eea3ff7ba8b34055374a8ad812464.zip | |
begin to write generate tool
| -rw-r--r-- | generate.py | 29 | ||||
| -rw-r--r-- | lib/myconfig.py | 11 |
2 files changed, 40 insertions, 0 deletions
diff --git a/generate.py b/generate.py new file mode 100644 index 0000000..8cb0be8 --- /dev/null +++ b/generate.py @@ -0,0 +1,29 @@ +#!/usr/bin/python3 +from myconfig import MyConfig + + +config = MyConfig() + +#change cwd to the libpinyin utils/training directory +libpinyin_dir = config.getToolsDir() +libpinyin_sub_dir = os.path.join(libpinyin_dir, 'utils', 'training') +os.chdir(libpinyin_sub_dir) +#chdir done + +def handleError(error): + sys.exit(error) + + +#Note: all file passed here should be trained. +def generateOneText(infile, modelfile): + pass + +#Note: should check the corpus file size, and skip the too small text file. +def handleOneIndex(indexpath): + pass + +def walkThroughIndex(path): + pass + +if __name__ == '__main__': + pass diff --git a/lib/myconfig.py b/lib/myconfig.py index 4ed631f..bea11a3 100644 --- a/lib/myconfig.py +++ b/lib/myconfig.py @@ -46,6 +46,17 @@ class MyConfig: def getMinimumFileSize(self): return self.m_minimum_file_size + m_candidate_model_size = 11.9 * 1024 * 1024 + + #the trained corpus size of model candidates + def getCandidateModelSize(self): + return self.m_candidate_model_size + + m_candidate_model_name = "model-candidates-{0}.db" + + def getCandidateModelName(self, index): + return self.m_candidate_model_name.format(index) + m_segment_postfix = '.segmented' def getSegmentPostfix(self): |
