diff options
author | Peng Wu <alexepico@gmail.com> | 2013-01-16 13:13:15 +0800 |
---|---|---|
committer | Peng Wu <alexepico@gmail.com> | 2013-01-16 13:13:15 +0800 |
commit | b68763cc73d772593aa396f9b85ebace32e91779 (patch) | |
tree | a5f92221550d0d18a772e366dd59de407b71fc8a /populate.py | |
parent | 938a50962f8f4173ca71171532d2d88e9bcfaffa (diff) | |
download | trainer-b68763cc73d772593aa396f9b85ebace32e91779.tar.gz trainer-b68763cc73d772593aa396f9b85ebace32e91779.tar.xz trainer-b68763cc73d772593aa396f9b85ebace32e91779.zip |
switch to walkIndexFast
Diffstat (limited to 'populate.py')
-rw-r--r-- | populate.py | 22 |
1 files changed, 2 insertions, 20 deletions
diff --git a/populate.py b/populate.py index 918fc6e..4d416db 100644 --- a/populate.py +++ b/populate.py @@ -5,6 +5,7 @@ import sqlite3 from argparse import ArgumentParser import utils from myconfig import MyConfig +from dirwalk import walkIndexFast INSERT_NGRAM_DML = ''' @@ -27,10 +28,6 @@ os.chdir(words_dir) #chdir done -def handleError(error): - sys.exit(error) - - def handleOneDocument(infile, conn, length): print(infile, length) @@ -154,21 +151,6 @@ def handleOneIndex(indexpath, subdir, indexname, fast): utils.store_status(indexstatuspath, indexstatus) -def walkThroughIndex(path, fast): - for root, dirs, files in os.walk(path, topdown=True, onerror=handleError): - for onefile in files: - filepath = os.path.join(root, onefile) - indexpostfix = config.getIndexPostfix() - if onefile.endswith(indexpostfix): - subdir = os.path.relpath(root, path) - indexname = onefile[:-len(indexpostfix)] - handleOneIndex(filepath, subdir, indexname, fast) - elif onefile.endswith(config.getStatusPostfix()): - pass - else: - print('Unexpected file:' + filepath) - - if __name__ == '__main__': parser = ArgumentParser(description='Populate n-gram.') parser.add_argument('--indexdir', action='store', \ @@ -182,5 +164,5 @@ if __name__ == '__main__': args = parser.parse_args() print(args) - walkThroughIndex(args.indexdir, args.fast) + walkIndexFast(handleOneIndex, args.indexdir, args.fast) print('done') |