summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2013-01-15 11:33:08 +0800
committerPeng Wu <alexepico@gmail.com>2013-01-15 11:49:22 +0800
commit8f5a4dd7aa1e32cd81effe49a85bdf6b9f1fb23f (patch)
tree3847c503c47d08ebca2b968a8d710e34488f8eb3
parent329870de63777c801a4f071fe472a9e9657b7a60 (diff)
downloadtrainer-8f5a4dd7aa1e32cd81effe49a85bdf6b9f1fb23f.tar.gz
trainer-8f5a4dd7aa1e32cd81effe49a85bdf6b9f1fb23f.tar.xz
trainer-8f5a4dd7aa1e32cd81effe49a85bdf6b9f1fb23f.zip
re-factor code
-rw-r--r--lib/myconfig.py4
-rw-r--r--partialwordthreshold.py13
-rw-r--r--populate.py1
3 files changed, 8 insertions, 10 deletions
diff --git a/lib/myconfig.py b/lib/myconfig.py
index 35244a7..a649c82 100644
--- a/lib/myconfig.py
+++ b/lib/myconfig.py
@@ -131,10 +131,10 @@ class MyConfig:
return 3 # minimum word occurrence
def getPartialWordThreshold(self):
- return 0.30 # the last 10% in position
+ return 0.10 # the first 10% in position
def getNewWordThreshold(self):
- return 0.30 / 2 # the last 5% in position
+ return 0.10 / 2 # the first 5% in position
def getMaximumIteration(self):
return 20 # roughly around N
diff --git a/partialwordthreshold.py b/partialwordthreshold.py
index f848553..c215f40 100644
--- a/partialwordthreshold.py
+++ b/partialwordthreshold.py
@@ -15,7 +15,6 @@ config = MyConfig()
#change cwd to the word recognizer directory
words_dir = config.getWordRecognizerDir()
-os.makedirs(words_dir, exist_ok=True)
os.chdir(words_dir)
#chdir done
@@ -62,12 +61,12 @@ def computeThreshold(conn):
#ascending sort
wordswithfreq.sort(key=itemgetter(1))
pos = int(len(wordswithfreq) * config.getPartialWordThreshold())
- threshold = wordswithfreq[pos]
+ threshold = wordswithfreq[-pos]
return threshold
-def handleOneIndex(filepath, subdir, indexname):
+def handleOneIndex(indexpath, subdir, indexname):
print(indexpath, subdir, indexname)
indexstatuspath = indexpath + config.getStatusPostfix()
@@ -88,8 +87,8 @@ def handleOneIndex(filepath, subdir, indexname):
conn = sqlite3.connect(filepath)
- threshold = computeThreshold(conn)
- print(threshold)
+ (word, threshold)= computeThreshold(conn)
+ print(word, threshold)
indexstatus['PartialWordThreshold'] = threshold
conn.commit()
@@ -97,8 +96,8 @@ def handleOneIndex(filepath, subdir, indexname):
conn.close()
#sign epoch
- #utils.sign_epoch(indexstatus, 'PartialWordThreshold')
- #utils.store_status(indexstatuspath, indexstatus)
+ utils.sign_epoch(indexstatus, 'PartialWordThreshold')
+ utils.store_status(indexstatuspath, indexstatus)
def walkThroughIndex(path):
diff --git a/populate.py b/populate.py
index 0e8a964..3097d0e 100644
--- a/populate.py
+++ b/populate.py
@@ -34,7 +34,6 @@ N = config.getMaximumCombineNumber()
#change cwd to the word recognizer directory
words_dir = config.getWordRecognizerDir()
-os.makedirs(words_dir, exist_ok=True)
os.chdir(words_dir)
#chdir done