From 713f1f7e3e19babb348e6f2896bbf8eb6e23738e Mon Sep 17 00:00:00 2001 From: Jan Pokorný Date: Wed, 9 Oct 2013 16:53:51 +0200 Subject: Fix issues with patches with low entropy in the context MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit -> push for offsets being more close to the average delta Note: this is not overly nice solution, better would be to grab the output of patch utility applied to all the hunks for respective file, but this will limit us in the future (planned) per failing hunk handling. We can always return to the point prior to this commit :) Signed-off-by: Jan Pokorný --- fix-offsets | 99 ++++++++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 65 insertions(+), 34 deletions(-) diff --git a/fix-offsets b/fix-offsets index d9417b5..da4ea42 100755 --- a/fix-offsets +++ b/fix-offsets @@ -25,11 +25,12 @@ re_h = re.compile( ) use_threading = True +#use_threading = False def adjustment(ret, stderrdata): - #print >>sys.stderr, "ret = {0}\nstderrdata = {1}".format(ret, stderrdata) + print >>sys.stderr, "ret = {0}\nstderrdata = {1}".format(ret, stderrdata) for match in re_h.finditer(stderrdata): - #print >>sys.stderr, match.groups() + print >>sys.stderr, match.groups() ret = int(match.groups()[3]) break else: @@ -37,18 +38,13 @@ def adjustment(ret, stderrdata): return ret -def hunk_worker(hunk, header, tres, cmd, **kwargs): +def hunk_worker(hunk, header, tres, i, cmd, **kwargs): proc = Popen(cmd, **kwargs) str_hunk = str(hunk) partial_patch = header + '\n' + str_hunk _, stderrdata = proc.communicate(partial_patch) delta = adjustment(proc.wait(), stderrdata) - if delta: - hunk.startsrc += delta - hunk.starttgt += delta - str_hunk = str(hunk) - tres[hunk] = str_hunk - + tres[i] = delta def proceed(opts, args): if not args: @@ -74,32 +70,67 @@ def proceed(opts, args): header = '{0}--- {1}\n+++ {2}'.format('\n'.join(p.header), p.source, p.target) print header - tres = dict() - ts = set() + tres = [0 for hunk in p.hunks] + ts = [True for hunk in p.hunks] tmax = 10 - for hunk in p.hunks: - if not use_threading: - hunk_worker(hunk, header, tres, cmd, **kwargs) - continue - if len(ts) >= tmax: - while True: - for t in ts: - t.join(0.001) - if not t.is_alive(): - ts.remove(t) - break - else: - continue - break - t = Thread(target=hunk_worker, args=(hunk, header, tres, cmd), - kwargs=kwargs) - t.start() - ts.add(t) - if use_threading: - for t in ts: - t.join() - for hunk in p.hunks: - print tres[hunk], + avg = 0.0 + cnt = 0 + avg_limit = 50.0 + len_ts = len(ts) + while any(ts): + # ts[i] = False if not to continue with that or bool(x) == True + # otherwise (None is a temporary local state!) + for i in xrange(len_ts): + if not ts[i]: + continue + if not use_threading: + hunk_worker(p.hunks[i], header, tres, i, cmd, **kwargs) + ts[i] = None + continue + if i >= tmax: + blockers = filter(lambda (x, y): bool(y), enumerate(ts[:i])) + while len(blockers) >= tmax: + for ii, it in blockers: + it.join(0.001) + if not it.is_alive(): + ts[ii] = None + break + else: + continue + break + t = Thread(target=hunk_worker, args=(p.hunks[i], header, tres, i, cmd), + kwargs=kwargs) + ts[i] = t + t.start() + for i in xrange(len_ts): + t = ts[i] + if t and use_threading: + t.join() + ts[i] = None + if ts[i] is None: + avg = (avg * cnt + tres[i]) + cnt += 1 + avg /= cnt + ts[i] = False + for i in xrange(len_ts): + ts[i] = ts[i] or abs(tres[i] - avg) > avg_limit + if i > 0: + ts[i] = ts[i] or abs(tres[i] - tres[i-1]) > (avg_limit/2) + if i < len_ts - 1: + ts[i] = ts[i] or abs(tres[i] - tres[i+1]) > (avg_limit/2) + if ts[i]: + hunk = p.hunks[i] + delta = int(tres[i] + (tres[i] - avg) / 1.25) + hunk.startsrc += delta + hunk.starttgt += delta + avg_limit *= 1.25 + for i in xrange(len_ts): + delta = tres[i] + hunk = p.hunks[i] + if delta: + hunk.startsrc += delta + hunk.starttgt += delta + print str(hunk) null.close() -- cgit