diff options
author | Peng Wu <alexepico@gmail.com> | 2013-01-15 11:33:08 +0800 |
---|---|---|
committer | Peng Wu <alexepico@gmail.com> | 2013-01-15 11:49:22 +0800 |
commit | 8f5a4dd7aa1e32cd81effe49a85bdf6b9f1fb23f (patch) | |
tree | 3847c503c47d08ebca2b968a8d710e34488f8eb3 | |
parent | 329870de63777c801a4f071fe472a9e9657b7a60 (diff) | |
download | trainer-8f5a4dd7aa1e32cd81effe49a85bdf6b9f1fb23f.tar.gz trainer-8f5a4dd7aa1e32cd81effe49a85bdf6b9f1fb23f.tar.xz trainer-8f5a4dd7aa1e32cd81effe49a85bdf6b9f1fb23f.zip |
re-factor code
-rw-r--r-- | lib/myconfig.py | 4 | ||||
-rw-r--r-- | partialwordthreshold.py | 13 | ||||
-rw-r--r-- | populate.py | 1 |
3 files changed, 8 insertions, 10 deletions
diff --git a/lib/myconfig.py b/lib/myconfig.py index 35244a7..a649c82 100644 --- a/lib/myconfig.py +++ b/lib/myconfig.py @@ -131,10 +131,10 @@ class MyConfig: return 3 # minimum word occurrence def getPartialWordThreshold(self): - return 0.30 # the last 10% in position + return 0.10 # the first 10% in position def getNewWordThreshold(self): - return 0.30 / 2 # the last 5% in position + return 0.10 / 2 # the first 5% in position def getMaximumIteration(self): return 20 # roughly around N diff --git a/partialwordthreshold.py b/partialwordthreshold.py index f848553..c215f40 100644 --- a/partialwordthreshold.py +++ b/partialwordthreshold.py @@ -15,7 +15,6 @@ config = MyConfig() #change cwd to the word recognizer directory words_dir = config.getWordRecognizerDir() -os.makedirs(words_dir, exist_ok=True) os.chdir(words_dir) #chdir done @@ -62,12 +61,12 @@ def computeThreshold(conn): #ascending sort wordswithfreq.sort(key=itemgetter(1)) pos = int(len(wordswithfreq) * config.getPartialWordThreshold()) - threshold = wordswithfreq[pos] + threshold = wordswithfreq[-pos] return threshold -def handleOneIndex(filepath, subdir, indexname): +def handleOneIndex(indexpath, subdir, indexname): print(indexpath, subdir, indexname) indexstatuspath = indexpath + config.getStatusPostfix() @@ -88,8 +87,8 @@ def handleOneIndex(filepath, subdir, indexname): conn = sqlite3.connect(filepath) - threshold = computeThreshold(conn) - print(threshold) + (word, threshold)= computeThreshold(conn) + print(word, threshold) indexstatus['PartialWordThreshold'] = threshold conn.commit() @@ -97,8 +96,8 @@ def handleOneIndex(filepath, subdir, indexname): conn.close() #sign epoch - #utils.sign_epoch(indexstatus, 'PartialWordThreshold') - #utils.store_status(indexstatuspath, indexstatus) + utils.sign_epoch(indexstatus, 'PartialWordThreshold') + utils.store_status(indexstatuspath, indexstatus) def walkThroughIndex(path): diff --git a/populate.py b/populate.py index 0e8a964..3097d0e 100644 --- a/populate.py +++ b/populate.py @@ -34,7 +34,6 @@ N = config.getMaximumCombineNumber() #change cwd to the word recognizer directory words_dir = config.getWordRecognizerDir() -os.makedirs(words_dir, exist_ok=True) os.chdir(words_dir) #chdir done |