summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2013-01-16 13:13:15 +0800
committerPeng Wu <alexepico@gmail.com>2013-01-16 13:13:15 +0800
commitb68763cc73d772593aa396f9b85ebace32e91779 (patch)
treea5f92221550d0d18a772e366dd59de407b71fc8a
parent938a50962f8f4173ca71171532d2d88e9bcfaffa (diff)
downloadtrainer-b68763cc73d772593aa396f9b85ebace32e91779.tar.gz
trainer-b68763cc73d772593aa396f9b85ebace32e91779.tar.xz
trainer-b68763cc73d772593aa396f9b85ebace32e91779.zip
switch to walkIndexFast
-rwxr-xr-xgenerate.py22
-rw-r--r--populate.py22
2 files changed, 4 insertions, 40 deletions
diff --git a/generate.py b/generate.py
index 58c4d80..069cfec 100755
--- a/generate.py
+++ b/generate.py
@@ -6,7 +6,7 @@ from subprocess import Popen, PIPE
from argparse import ArgumentParser
import utils
from myconfig import MyConfig
-
+from dirwalk import walkIndexFast
config = MyConfig()
@@ -17,10 +17,6 @@ os.chdir(libpinyin_sub_dir)
#chdir done
-def handleError(error):
- sys.exit(error)
-
-
#Note: all file passed here should be trained.
def generateOneText(infile, modelfile, reportfile):
infilestatuspath = infile + config.getStatusPostfix()
@@ -205,20 +201,6 @@ def handleOneIndex(indexpath, subdir, indexname, fast):
utils.store_status(indexstatuspath, indexstatus)
-def walkThroughIndex(path, fast):
- for root, dirs, files in os.walk(path, topdown=True, onerror=handleError):
- for onefile in files:
- filepath = os.path.join(root, onefile)
- indexpostfix = config.getIndexPostfix()
- if onefile.endswith(indexpostfix):
- subdir = os.path.relpath(root, path)
- indexname = onefile[:-len(indexpostfix)]
- handleOneIndex(filepath, subdir, indexname, fast)
- elif onefile.endswith(config.getStatusPostfix()):
- pass
- else:
- print('Unexpected file:' + filepath)
-
if __name__ == '__main__':
parser = ArgumentParser(description='Generate model candidates.')
parser.add_argument('--indexdir', action='store', \
@@ -232,5 +214,5 @@ if __name__ == '__main__':
args = parser.parse_args()
print(args)
- walkThroughIndex(args.indexdir, args.fast)
+ walkIndexFast(handleOneIndex, args.indexdir, args.fast)
print('done')
diff --git a/populate.py b/populate.py
index 918fc6e..4d416db 100644
--- a/populate.py
+++ b/populate.py
@@ -5,6 +5,7 @@ import sqlite3
from argparse import ArgumentParser
import utils
from myconfig import MyConfig
+from dirwalk import walkIndexFast
INSERT_NGRAM_DML = '''
@@ -27,10 +28,6 @@ os.chdir(words_dir)
#chdir done
-def handleError(error):
- sys.exit(error)
-
-
def handleOneDocument(infile, conn, length):
print(infile, length)
@@ -154,21 +151,6 @@ def handleOneIndex(indexpath, subdir, indexname, fast):
utils.store_status(indexstatuspath, indexstatus)
-def walkThroughIndex(path, fast):
- for root, dirs, files in os.walk(path, topdown=True, onerror=handleError):
- for onefile in files:
- filepath = os.path.join(root, onefile)
- indexpostfix = config.getIndexPostfix()
- if onefile.endswith(indexpostfix):
- subdir = os.path.relpath(root, path)
- indexname = onefile[:-len(indexpostfix)]
- handleOneIndex(filepath, subdir, indexname, fast)
- elif onefile.endswith(config.getStatusPostfix()):
- pass
- else:
- print('Unexpected file:' + filepath)
-
-
if __name__ == '__main__':
parser = ArgumentParser(description='Populate n-gram.')
parser.add_argument('--indexdir', action='store', \
@@ -182,5 +164,5 @@ if __name__ == '__main__':
args = parser.parse_args()
print(args)
- walkThroughIndex(args.indexdir, args.fast)
+ walkIndexFast(handleOneIndex, args.indexdir, args.fast)
print('done')