Thoughts on diff extensions
Matt Mackall
mpm at selenic.com
Mon Jun 20 00:38:02 UTC 2005
On Sun, Jun 19, 2005 at 03:41:44PM -0400, Christopher Li wrote:
> It sounds to me should write a C version of the python difflib.
Already working on it. I started by trimming down SequenceMatcher to
this, which is reasonably small:
class SequenceMatcher:
def __init__(self, isjunk=None, a='', b=''):
self.bl = bl = {}
self.a = a
self.b = b
n = len(b)
populardict = {}
for i, elt in enumerate(b):
if elt in bl:
indices = bl[elt]
if n >= 200 and len(indices) * 100 > n:
populardict[elt] = 1
del indices[:]
else:
indices.append(i)
else:
bl[elt] = [i]
# Purge leftover indices for popular elements.
for elt in populardict:
del bl[elt]
def find_longest_match(self, a1, a2, b1, b2):
a, b, bl = self.a, self.b, self.bl
mi, mj, msize, move = a1, b1, 0, 0
jl = [0] * len(b)
newjl = [0] * len(b)
for i in xrange(a1, a2):
v = bl.get(a[i], ())
for j in v:
if j < b1: continue
if j >= b2: break
k = newjl[j] = jl[j-1] + 1
if k > msize:
mi, mj, msize = i-k+1, j-k+1, k
for j in v:
jl[j] = newjl[j];
while mi-move > a1 and mj-move > b1 and a[mi-move] == b[mj-move]:
move += 1
while mi+msize < a2 and mj+msize < b2 and a[mi+msize] == b[mj+msize]:
msize += 1
return mi - move, mj - move, msize + move
def recurse(self, a1, a2, b1, b2, answer):
i, j, k = x = self.find_longest_match(a1, a2, b1, b2)
if k:
if a1 < i and b1 < j:
self.__helper(a1, i, b1, j, answer)
answer.append(x)
if i+k < a2 and j+k < b2:
self.__helper(i+k, a2, j+k, b2, answer)
--
Mathematics is the supreme nostalgia of our time.
More information about the Mercurial
mailing list