summaryrefslogtreecommitdiffstats
path: root/populate.py
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2013-01-14 15:42:52 +0800
committerPeng Wu <alexepico@gmail.com>2013-01-14 15:47:33 +0800
commit7a51612320739af8c05f9e9f113302d47da66bda (patch)
tree7275796d39c45fc410ccc70ab1b849fbaf75ce9d /populate.py
parentf31533a3ca0f6b5476b9fd3638086077d419f95b (diff)
downloadtrainer-7a51612320739af8c05f9e9f113302d47da66bda.tar.gz
trainer-7a51612320739af8c05f9e9f113302d47da66bda.tar.xz
trainer-7a51612320739af8c05f9e9f113302d47da66bda.zip
improves populate.py
Diffstat (limited to 'populate.py')
-rw-r--r--populate.py27
1 files changed, 22 insertions, 5 deletions
diff --git a/populate.py b/populate.py
index d72d685..0e8a964 100644
--- a/populate.py
+++ b/populate.py
@@ -163,7 +163,7 @@ def handleBigramPass(indexpath, workdir):
conn.close()
-def handleOneIndex(indexpath, subdir, indexname):
+def handleOneIndex(indexpath, subdir, indexname, fast):
print(indexpath, subdir, indexname)
indexstatuspath = indexpath + config.getStatusPostfix()
@@ -177,8 +177,20 @@ def handleOneIndex(indexpath, subdir, indexname):
subdir + os.sep + indexname
print(workdir)
+ shmdir = config.getInMemoryFileSystem()
+
for i in range(1, N + 1):
- handleOnePass(indexpath, workdir, i)
+ if fast:
+ #copy file
+ filename = config.getNgramFileName(i)
+ filepath = workdir + os.sep + filename
+ shmfilepath = shmdir + os.sep + filename
+ utils.copyfile(filepath, shmfilepath)
+ handleOnePass(indexpath, shmdir, i)
+ utils.copyfile(shmfilepath, filepath)
+ os.unlink(shmfilepath)
+ else:
+ handleOnePass(indexpath, workdir, i)
handleBigramPass(indexpath, workdir)
@@ -188,7 +200,7 @@ def handleOneIndex(indexpath, subdir, indexname):
-def walkThroughIndex(path):
+def walkThroughIndex(path, fast):
for root, dirs, files in os.walk(path, topdown=True, onerror=handleError):
for onefile in files:
filepath = os.path.join(root, onefile)
@@ -196,7 +208,7 @@ def walkThroughIndex(path):
if onefile.endswith(indexpostfix):
subdir = os.path.relpath(root, path)
indexname = onefile[:-len(indexpostfix)]
- handleOneIndex(filepath, subdir, indexname)
+ handleOneIndex(filepath, subdir, indexname, fast)
elif onefile.endswith(config.getStatusPostfix()):
pass
else:
@@ -209,7 +221,12 @@ if __name__ == '__main__':
help='index directory', \
default=os.path.join(config.getTextDir(), 'index'))
+ parser.add_argument('--fast', action='store_const', \
+ help='Use /dev/shm to speed up populate', \
+ const=True, default=False)
+
+
args = parser.parse_args()
print(args)
- walkThroughIndex(args.indexdir)
+ walkThroughIndex(args.indexdir, args.fast)
print('done')