[PATCH 7 of 7 V2] revset: introduce an API that avoids `formatspec` input serialization
Boris Feld
boris.feld at octobus.net
Mon Jan 14 12:13:17 UTC 2019
# HG changeset patch
# User Boris Feld <boris.feld at octobus.net>
# Date 1546605681 -3600
# Fri Jan 04 13:41:21 2019 +0100
# Node ID 8bcaad324401bd275886a0ae4340a876ac82ab84
# Parent 1def212ed730ce024963fd41f2d341f584521bbf
# EXP-Topic revs-efficiency
# Available At https://bitbucket.org/octobus/mercurial-devel/
# hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 8bcaad324401
revset: introduce an API that avoids `formatspec` input serialization
Instead of having the data fully serialized, the input can be replaced with a
`__internal_input__(<idx>)` entry in the revspec. The actual value at `<idx>`
as to be passed along with the format spec but the operation can get much more
efficient.
Just using it for simple "%ld" case provide a significant boost. For example
here are the impact on a sample discovery run between two pypy repositories
with arbitrary differences (using hg perfdiscovery).
$ hg perfdiscovery
before: ! wall 0.700435 comb 0.710000 user 0.700000 sys 0.010000 (median of 15)
after: ! wall 0.501305 comb 0.510000 user 0.490000 sys 0.020000 (median of 20)
diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py
--- a/mercurial/localrepo.py
+++ b/mercurial/localrepo.py
@@ -1362,9 +1362,8 @@ class localrepository(object):
Returns a revset.abstractsmartset, which is a list-like interface
that contains integer revisions.
'''
- expr = revsetlang.formatspec(expr, *args)
- m = revset.match(None, expr)
- return m(self)
+ tree = revsetlang.spectree(expr, *args)
+ return revset.makematcher(tree)(self)
def set(self, expr, *args):
'''Find revisions matching a revset and emit changectx instances.
diff --git a/mercurial/revset.py b/mercurial/revset.py
--- a/mercurial/revset.py
+++ b/mercurial/revset.py
@@ -125,6 +125,13 @@ def stringset(repo, subset, x, order):
return baseset([x])
return baseset()
+def rawsmartset(repo, subset, x, order):
+ """argument is already a smartset, use that directly"""
+ if order == followorder:
+ return subset & x
+ else:
+ return x & subset
+
def rangeset(repo, subset, x, y, order):
m = getset(repo, fullreposet(repo), x)
n = getset(repo, fullreposet(repo), y)
@@ -2216,6 +2223,7 @@ methods = {
"ancestor": ancestorspec,
"parent": parentspec,
"parentpost": parentpost,
+ "smartset": rawsmartset,
}
subscriptrelations = {
diff --git a/mercurial/revsetlang.py b/mercurial/revsetlang.py
--- a/mercurial/revsetlang.py
+++ b/mercurial/revsetlang.py
@@ -333,7 +333,7 @@ def _analyze(x):
elif op == 'negate':
s = getstring(x[1], _("can't negate that"))
return _analyze(('string', '-' + s))
- elif op in ('string', 'symbol'):
+ elif op in ('string', 'symbol', 'smartset'):
return x
elif op == 'rangeall':
return (op, None)
@@ -373,7 +373,7 @@ def _optimize(x):
return 0, x
op = x[0]
- if op in ('string', 'symbol'):
+ if op in ('string', 'symbol', 'smartset'):
return 0.5, x # single revisions are small
elif op == 'and':
wa, ta = _optimize(x[1])
@@ -691,6 +691,29 @@ def formatspec(expr, *args):
raise error.ProgrammingError("unknown revspec item type: %r" % t)
return b''.join(ret)
+def spectree(expr, *args):
+ """similar to formatspec but return a parsed and optimized tree"""
+ parsed = _parseargs(expr, args)
+ ret = []
+ inputs = []
+ for t, arg in parsed:
+ if t is None:
+ ret.append(arg)
+ elif t == 'baseset':
+ newtree = ('smartset', smartset.baseset(arg))
+ inputs.append(newtree)
+ ret.append("$")
+ else:
+ raise error.ProgrammingError("unknown revspec item type: %r" % t)
+ expr = b''.join(ret)
+ tree = _parsewith(expr, syminitletters=_aliassyminitletters)
+ tree = parser.buildtree(tree, ('symbol', '$'), *inputs)
+ tree = foldconcat(tree)
+ tree = analyze(tree)
+ tree = optimize(tree)
+ return tree
+
+
def _parseargs(expr, args):
"""parse the expression and replace all inexpensive args
More information about the Mercurial-devel
mailing list