Thoughts on diff extensions

Matt Mackall mpm at selenic.com
Mon Jun 20 00:38:02 UTC 2005


On Sun, Jun 19, 2005 at 03:41:44PM -0400, Christopher Li wrote:
> It sounds to me should write a C version of the python difflib.

Already working on it. I started by trimming down SequenceMatcher to
this, which is reasonably small:

class SequenceMatcher:
    def __init__(self, isjunk=None, a='', b=''):
        self.bl = bl = {}
        self.a = a
        self.b = b

        n = len(b)
        populardict = {}
        for i, elt in enumerate(b):
            if elt in bl:
                indices = bl[elt]
                if n >= 200 and len(indices) * 100 > n:
                    populardict[elt] = 1
                    del indices[:]
                else:
                    indices.append(i)
            else:
                bl[elt] = [i]

        # Purge leftover indices for popular elements.
        for elt in populardict:
            del bl[elt]

    def find_longest_match(self, a1, a2, b1, b2):
        a, b, bl = self.a, self.b, self.bl
        mi, mj, msize, move = a1, b1, 0, 0
        jl = [0] * len(b)
        newjl = [0] * len(b)

        for i in xrange(a1, a2):
            v = bl.get(a[i], ())
            for j in v:
                if j < b1: continue
                if j >= b2: break
                k = newjl[j] = jl[j-1] + 1
                if k > msize:
                    mi, mj, msize = i-k+1, j-k+1, k
            for j in v:
                jl[j] = newjl[j];

        while mi-move > a1 and mj-move > b1 and a[mi-move] == b[mj-move]:
            move += 1
        while mi+msize < a2 and mj+msize < b2 and a[mi+msize] == b[mj+msize]:
            msize += 1
        
        return mi - move, mj - move, msize + move

    def recurse(self, a1, a2, b1, b2, answer):
        i, j, k = x = self.find_longest_match(a1, a2, b1, b2)

        if k:
            if a1 < i and b1 < j:
                self.__helper(a1, i, b1, j, answer)
            answer.append(x)
            if i+k < a2 and j+k < b2:
                self.__helper(i+k, a2, j+k, b2, answer)

-- 
Mathematics is the supreme nostalgia of our time.



More information about the Mercurial mailing list