[PATCH 2 of 2 v2] revset: use localrepo persistent branch name caching
Mads Kiilerich
mads at kiilerich.com
Wed Oct 15 18:14:23 UTC 2014
# HG changeset patch
# User Mads Kiilerich <madski at unity3d.com>
# Date 1413396807 -7200
# Wed Oct 15 20:13:27 2014 +0200
# Node ID 4cb47516ed19199839174415927e2c53c66cd765
# Parent bf7a0169677c0545a63e64690b0e49e50b376703
revset: use localrepo persistent branch name caching
Branch name filtering in revsets was expensive. For every rev it created a
changectx and called .branch(). (Using changelog.branchinfo() would be up to
25% faster.)
Instead, use the new localrepo caching branch lookup method and improve
performance up to 4-6 times.
To prove it, the following revset was run on the relatively small hg repo:
branch(stable)&branch(default)
Before, it took like 1.2 s every time.
With this change, it is a little bit faster first time (primarily because the
new code path uses changelog.branchinfo instead changectx.branch). When the
cache is fully updated it takes 0.3 s, making it 4 times as fast.
Similar tests on Unity's repo show 6 x improvements.
diff --git a/mercurial/revset.py b/mercurial/revset.py
--- a/mercurial/revset.py
+++ b/mercurial/revset.py
@@ -489,16 +489,16 @@ def branch(repo, subset, x):
# note: falls through to the revspec case if no branch with
# this name exists
if pattern in repo.branchmap():
- return subset.filter(lambda r: matcher(repo[r].branch()))
+ return subset.filter(lambda r: matcher(repo.branch(r)))
else:
- return subset.filter(lambda r: matcher(repo[r].branch()))
+ return subset.filter(lambda r: matcher(repo.branch(r)))
s = getset(repo, spanset(repo), x)
b = set()
for r in s:
- b.add(repo[r].branch())
+ b.add(repo.branch(r))
c = s.__contains__
- return subset.filter(lambda r: c(r) or repo[r].branch() in b)
+ return subset.filter(lambda r: c(r) or repo.branch(r) in b)
def bumped(repo, subset, x):
"""``bumped()``
@@ -1431,7 +1431,7 @@ def matching(repo, subset, x):
getfieldfuncs = []
_funcs = {
'user': lambda r: repo[r].user(),
- 'branch': lambda r: repo[r].branch(),
+ 'branch': repo.branch,
'date': lambda r: repo[r].date(),
'description': lambda r: repo[r].description(),
'files': lambda r: repo[r].files(),
@@ -1532,9 +1532,9 @@ def sort(repo, subset, x):
elif k == '-rev':
e.append(-r)
elif k == 'branch':
- e.append(c.branch())
+ e.append(repo.branch(r))
elif k == '-branch':
- e.append(invert(c.branch()))
+ e.append(invert(repo.branch(r)))
elif k == 'desc':
e.append(c.description())
elif k == '-desc':
diff --git a/tests/test-revset.t b/tests/test-revset.t
--- a/tests/test-revset.t
+++ b/tests/test-revset.t
@@ -744,6 +744,64 @@ issue2437
$ log 'ancestors(8) and (heads(branch("-a-b-c-")) or heads(branch(é)))'
4
+branchname caching
+
+ cache creation
+ $ rm .hg/cache/branchnames
+ $ log 'branch("-a-b-c-")'
+ 4
+ $ "$TESTDIR/md5sum.py" .hg/cache/branchnames
+ c31b7e7bdc4e1f69133685cf82550eac .hg/cache/branchnames
+ handling of invalid cache file
+ $ echo > .hg/cache/branchnames
+ $ log 'branch("-a-b-c-")'
+ 4
+ $ "$TESTDIR/md5sum.py" .hg/cache/branchnames
+ c31b7e7bdc4e1f69133685cf82550eac .hg/cache/branchnames
+ handling of extra trailing data
+ $ echo >> .hg/cache/branchnames
+ $ log 'branch("-a-b-c-")'
+ 4
+ $ "$TESTDIR/md5sum.py" .hg/cache/branchnames
+ c31b7e7bdc4e1f69133685cf82550eac .hg/cache/branchnames
+ update after commit
+ $ hg tag tag
+ $ "$TESTDIR/md5sum.py" .hg/cache/branchnames
+ c31b7e7bdc4e1f69133685cf82550eac .hg/cache/branchnames
+ $ log 'branch("-a-b-c-")'
+ 4
+ $ "$TESTDIR/md5sum.py" .hg/cache/branchnames
+ 77d75afbf592f045b609c3a68986a908 .hg/cache/branchnames
+ $ update after rollback
+ $ hg up -qr '.^'
+ $ hg rollback -qf
+ $ "$TESTDIR/md5sum.py" .hg/cache/branchnames
+ 77d75afbf592f045b609c3a68986a908 .hg/cache/branchnames
+ $ log 'branch("-a-b-c-")'
+ 4
+ $ "$TESTDIR/md5sum.py" .hg/cache/branchnames
+ c31b7e7bdc4e1f69133685cf82550eac .hg/cache/branchnames
+ handle non-tip node change (could be a problem with some cache invalidation schemes)
+ $ hg bundle -q --all bu.hg
+ $ hg --config extensions.strip= strip --no-b -qr 8
+ $ hg up -q 7
+ $ hg branch -q hacked
+ $ hg ci --amend -qm 'hacked'
+ $ hg pull -q bu.hg -r 24286f4ae135
+ $ log 'branch("hacked")'
+ 7
+ $ "$TESTDIR/md5sum.py" .hg/cache/branchnames
+ 7e7547be03cd3badb5053899aea5c532 .hg/cache/branchnames
+ $ hg --config extensions.strip= strip --no-b -qr 7:
+ $ hg pull -q bu.hg
+ $ rm bu.hg
+ $ hg up -qr tip
+ $ log 'branch("-a-b-c-")'
+ 4
+ $ "$TESTDIR/md5sum.py" .hg/cache/branchnames
+ 753e486275e544c562da095fd116a441 .hg/cache/branchnames
+(the cache file still contains the now unused 'hacked' branch name)
+
issue2654: report a parse error if the revset was not completely parsed
$ log '1 OR 2'
More information about the Mercurial-devel
mailing list