diff options
| author | Peng Wu <alexepico@gmail.com> | 2013-01-18 15:36:30 +0800 |
|---|---|---|
| committer | Peng Wu <alexepico@gmail.com> | 2013-01-18 15:36:30 +0800 |
| commit | 8a09db5ce595569550475c03aeb9f025b942d128 (patch) | |
| tree | c9f01d22634861da229795c5c96a2a0648bafa5d | |
| parent | 32dbf024db28d6f550c1422fd0208403c998b3a6 (diff) | |
| download | trainer-8a09db5ce595569550475c03aeb9f025b942d128.tar.gz trainer-8a09db5ce595569550475c03aeb9f025b942d128.tar.xz trainer-8a09db5ce595569550475c03aeb9f025b942d128.zip | |
write load_words function
| -rw-r--r-- | partialword.py | 37 |
1 files changed, 36 insertions, 1 deletions
diff --git a/partialword.py b/partialword.py index 54ea0e0..32d3166 100644 --- a/partialword.py +++ b/partialword.py @@ -1,7 +1,9 @@ #!/usr/bin/python3 import os -import os.path import sqlite3 +from argparse import ArgumentParser +import utils +from myconfig import MyConfig SELECT_PARTIAL_WORD_DML = ''' @@ -40,3 +42,36 @@ DROP TABLE IF EXISTS ngram_fts; SELECT_MERGE_HIGH_NGRAM_DML = ''' SELECT words, freq FROM ngram_fts WHERE words MATCH ?; ''' + +config = MyConfig() + +#maximum combine number +N = config.getMaximumCombineNumber() + +#change cwd to the word recognizer directory +words_dir = config.getWordRecognizerDir() +os.chdir(words_dir) +#chdir done + + +#load existing words +words_set = set([]) + +def load_words(filename): + wordlistfile = open(filename, "r") + + for oneline in wordlistfile.readlines(): + oneline = oneline.rstrip(os.linesep) + + if len(oneline) == 0: + continue + + word = oneline + + words_set.add(word) + + wordlistfile.close() + + +load_words(config.getWordsListFileName()) +#print(words_set) |
