diff options
| author | Peng Wu <alexepico@gmail.com> | 2012-09-17 14:55:21 +0800 |
|---|---|---|
| committer | Peng Wu <alexepico@gmail.com> | 2012-09-17 14:55:21 +0800 |
| commit | 1717258b152f1f4cea71471f0efc607723549faa (patch) | |
| tree | 1819e3a891c380997f534868fe86fcb6de7cbadc | |
| parent | d2e22a005c4fe9c1ea135770dbf0786016a8d4f5 (diff) | |
| download | trainer-1717258b152f1f4cea71471f0efc607723549faa.tar.gz trainer-1717258b152f1f4cea71471f0efc607723549faa.tar.xz trainer-1717258b152f1f4cea71471f0efc607723549faa.zip | |
add fast option to segment.py
| -rwxr-xr-x | segment.py | 22 |
1 files changed, 15 insertions, 7 deletions
@@ -21,14 +21,18 @@ def handleError(error): sys.exit(error) -def segmentOneText(infile, outfile, reportfile): +def segmentOneText(infile, outfile, reportfile, fast): infilestatuspath = infile + config.getStatusPostfix() infilestatus = utils.load_status(infilestatuspath) if utils.check_epoch(infilestatus, 'Segment'): return #begin processing - cmdline = '../utils/segment/ngseg >"' + outfile + '"' + if fast: + cmdline = '../utils/segment/spseg >"' + outfile + '"' + else: + cmdline = '../utils/segment/ngseg >"' + outfile + '"' + subprocess = Popen(cmdline, shell=True, stdin=PIPE, stderr=PIPE, \ close_fds=True) @@ -51,7 +55,7 @@ def segmentOneText(infile, outfile, reportfile): utils.store_status(infilestatuspath, infilestatus) -def handleOneIndex(indexpath): +def handleOneIndex(indexpath, fast): indexstatuspath = indexpath + config.getStatusPostfix() indexstatus = utils.load_status(indexstatuspath) if utils.check_epoch(indexstatus, 'Segment'): @@ -68,7 +72,7 @@ def handleOneIndex(indexpath): reportfile = config.getTextDir() + textpath + \ config.getSegmentReportPostfix() print("Processing " + title + '#' + textpath) - segmentOneText(infile, outfile, reportfile) + segmentOneText(infile, outfile, reportfile, fast) print("Processed " + title + '#' + textpath) indexfile.close() #end processing @@ -77,12 +81,12 @@ def handleOneIndex(indexpath): utils.store_status(indexstatuspath, indexstatus) -def walkThroughIndex(path): +def walkThroughIndex(path, fast): for root, dirs, files in os.walk(path, topdown=True, onerror=handleError): for onefile in files: filepath = os.path.join(root, onefile) if onefile.endswith(config.getIndexPostfix()): - handleOneIndex(filepath) + handleOneIndex(filepath, fast) elif onefile.endswith(config.getStatusPostfix()): pass else: @@ -95,7 +99,11 @@ if __name__ == '__main__': help='index directory', \ default=os.path.join(config.getTextDir(), 'index')) + parser.add_argument('--fast', action='store_const', \ + help='Use spseg to speed up segment', \ + const=True, default=False) + args = parser.parse_args() print(args) - walkThroughIndex(args.indexdir) + walkThroughIndex(args.indexdir, args.fast) print('done') |
