diff options
author | Peng Wu <alexepico@gmail.com> | 2013-01-16 13:13:15 +0800 |
---|---|---|
committer | Peng Wu <alexepico@gmail.com> | 2013-01-16 13:13:15 +0800 |
commit | b68763cc73d772593aa396f9b85ebace32e91779 (patch) | |
tree | a5f92221550d0d18a772e366dd59de407b71fc8a | |
parent | 938a50962f8f4173ca71171532d2d88e9bcfaffa (diff) | |
download | trainer-b68763cc73d772593aa396f9b85ebace32e91779.tar.gz trainer-b68763cc73d772593aa396f9b85ebace32e91779.tar.xz trainer-b68763cc73d772593aa396f9b85ebace32e91779.zip |
switch to walkIndexFast
-rwxr-xr-x | generate.py | 22 | ||||
-rw-r--r-- | populate.py | 22 |
2 files changed, 4 insertions, 40 deletions
diff --git a/generate.py b/generate.py index 58c4d80..069cfec 100755 --- a/generate.py +++ b/generate.py @@ -6,7 +6,7 @@ from subprocess import Popen, PIPE from argparse import ArgumentParser import utils from myconfig import MyConfig - +from dirwalk import walkIndexFast config = MyConfig() @@ -17,10 +17,6 @@ os.chdir(libpinyin_sub_dir) #chdir done -def handleError(error): - sys.exit(error) - - #Note: all file passed here should be trained. def generateOneText(infile, modelfile, reportfile): infilestatuspath = infile + config.getStatusPostfix() @@ -205,20 +201,6 @@ def handleOneIndex(indexpath, subdir, indexname, fast): utils.store_status(indexstatuspath, indexstatus) -def walkThroughIndex(path, fast): - for root, dirs, files in os.walk(path, topdown=True, onerror=handleError): - for onefile in files: - filepath = os.path.join(root, onefile) - indexpostfix = config.getIndexPostfix() - if onefile.endswith(indexpostfix): - subdir = os.path.relpath(root, path) - indexname = onefile[:-len(indexpostfix)] - handleOneIndex(filepath, subdir, indexname, fast) - elif onefile.endswith(config.getStatusPostfix()): - pass - else: - print('Unexpected file:' + filepath) - if __name__ == '__main__': parser = ArgumentParser(description='Generate model candidates.') parser.add_argument('--indexdir', action='store', \ @@ -232,5 +214,5 @@ if __name__ == '__main__': args = parser.parse_args() print(args) - walkThroughIndex(args.indexdir, args.fast) + walkIndexFast(handleOneIndex, args.indexdir, args.fast) print('done') diff --git a/populate.py b/populate.py index 918fc6e..4d416db 100644 --- a/populate.py +++ b/populate.py @@ -5,6 +5,7 @@ import sqlite3 from argparse import ArgumentParser import utils from myconfig import MyConfig +from dirwalk import walkIndexFast INSERT_NGRAM_DML = ''' @@ -27,10 +28,6 @@ os.chdir(words_dir) #chdir done -def handleError(error): - sys.exit(error) - - def handleOneDocument(infile, conn, length): print(infile, length) @@ -154,21 +151,6 @@ def handleOneIndex(indexpath, subdir, indexname, fast): utils.store_status(indexstatuspath, indexstatus) -def walkThroughIndex(path, fast): - for root, dirs, files in os.walk(path, topdown=True, onerror=handleError): - for onefile in files: - filepath = os.path.join(root, onefile) - indexpostfix = config.getIndexPostfix() - if onefile.endswith(indexpostfix): - subdir = os.path.relpath(root, path) - indexname = onefile[:-len(indexpostfix)] - handleOneIndex(filepath, subdir, indexname, fast) - elif onefile.endswith(config.getStatusPostfix()): - pass - else: - print('Unexpected file:' + filepath) - - if __name__ == '__main__': parser = ArgumentParser(description='Populate n-gram.') parser.add_argument('--indexdir', action='store', \ @@ -182,5 +164,5 @@ if __name__ == '__main__': args = parser.parse_args() print(args) - walkThroughIndex(args.indexdir, args.fast) + walkIndexFast(handleOneIndex, args.indexdir, args.fast) print('done') |