[PATCH] similar: remove caching from the module level
Sean Farley
sean at farley.io
Fri Jan 13 19:43:59 UTC 2017
# HG changeset patch
# User Sean Farley <sean at farley.io>
# Date 1484336556 28800
# Fri Jan 13 11:42:36 2017 -0800
# Node ID 7c54c9524a0954a3bd2f2fb4451028869f851f6d
# Parent 8540967cd9e0909a9a73dbff458c4cedd4db26aa
similar: remove caching from the module level
To prevent Bad Things⢠from happening, let's rework the logic to not use
util.cachefunc.
diff --git a/mercurial/similar.py b/mercurial/similar.py
--- a/mercurial/similar.py
+++ b/mercurial/similar.py
@@ -11,11 +11,10 @@ import hashlib
from .i18n import _
from . import (
bdiff,
mdiff,
- util,
)
def _findexactmatches(repo, added, removed):
'''find renamed files that have no changes
@@ -41,20 +40,18 @@ def _findexactmatches(repo, added, remov
yield (hashes[h], fctx)
# Done
repo.ui.progress(_('searching for exact renames'), None)
- at util.cachefunc
def _ctxdata(fctx):
# lazily load text
orig = fctx.data()
return orig, mdiff.splitnewlines(orig)
- at util.cachefunc
-def score(fctx1, fctx2):
- text = fctx1.data()
- orig, lines = _ctxdata(fctx2)
+def _score(fctx, otherdata):
+ orig, lines = otherdata
+ text = fctx.data()
# bdiff.blocks() returns blocks of matching lines
# count the number of bytes in each
equal = 0
matches = bdiff.blocks(text, orig)
for x1, x2, y1, y2 in matches:
@@ -62,10 +59,13 @@ def score(fctx1, fctx2):
equal += len(line)
lengths = len(text) + len(orig)
return equal * 2.0 / lengths
+def score(fctx1, fctx2):
+ return _score(fctx1, _ctxdata(fctx2))
+
def _findsimilarmatches(repo, added, removed, threshold):
'''find potentially renamed files based on similar file content
Takes a list of new filectxs and a list of removed filectxs, and yields
(before, after, score) tuples of partial matches.
@@ -73,13 +73,16 @@ def _findsimilarmatches(repo, added, rem
copies = {}
for i, r in enumerate(removed):
repo.ui.progress(_('searching for similar files'), i,
total=len(removed), unit=_('files'))
+ data = None
for a in added:
bestscore = copies.get(a, (None, threshold))[1]
- myscore = score(a, r)
+ if data is None:
+ data = _ctxdata(r)
+ myscore = score(a, data)
if myscore >= bestscore:
copies[a] = (r, myscore)
repo.ui.progress(_('searching'), None)
for dest, v in copies.iteritems():
More information about the Mercurial-devel
mailing list