[Request] [+-- ] D8654: phases: make phase list dense or dictionaries [PoC]
joerg.sonnenberger (Joerg Sonnenberger)
phabricator at mercurial-scm.org
Wed Jun 24 00:27:41 UTC 2020
joerg.sonnenberger created this revision.
Herald added a reviewer: hg-reviewers.
Herald added a subscriber: mercurial-patches.
REVISION SUMMARY
When the internal and archived phase was added, allphases became sparse
populated. This dramatically increased the number of lookup operations
for public revisions when they hit phasecache.phase as it it is
iterationing over all phases in the phasesets. Fix this by making the
various data structures either sparse lists or dictionaries, depending
on the purpose. This improves a no-change "hg up" for my NetBSD test
repository from 4s to 1.3s.
REPOSITORY
rHG Mercurial
BRANCH
default
REVISION DETAIL
https://phab.mercurial-scm.org/D8654
AFFECTED FILES
mercurial/bundle2.py
mercurial/exchange.py
mercurial/exchangev2.py
mercurial/phases.py
mercurial/repoview.py
CHANGE DETAILS
diff --git a/mercurial/repoview.py b/mercurial/repoview.py
--- a/mercurial/repoview.py
+++ b/mercurial/repoview.py
@@ -129,7 +129,7 @@
def computemutable(repo, visibilityexceptions=None):
assert not repo.changelog.filteredrevs
# fast check to avoid revset call on huge repo
- if any(repo._phasecache.phaseroots[1:]):
+ if any(pycompat.itervalues(repo._phasecache.trackedphaseroots())):
getphase = repo._phasecache.phase
maymutable = filterrevs(repo, b'base')
return frozenset(r for r in maymutable if getphase(repo, r))
@@ -154,7 +154,7 @@
assert not repo.changelog.filteredrevs
cl = repo.changelog
firstmutable = len(cl)
- for roots in repo._phasecache.phaseroots[1:]:
+ for roots in pycompat.itervalues(repo._phasecache.trackedphaseroots()):
if roots:
firstmutable = min(firstmutable, min(cl.rev(r) for r in roots))
# protect from nullrev root
diff --git a/mercurial/phases.py b/mercurial/phases.py
--- a/mercurial/phases.py
+++ b/mercurial/phases.py
@@ -128,25 +128,25 @@
_fphasesentry = struct.Struct(b'>i20s')
-INTERNAL_FLAG = 64 # Phases for mercurial internal usage only
-HIDEABLE_FLAG = 32 # Phases that are hideable
-
# record phase index
public, draft, secret = range(3)
-internal = INTERNAL_FLAG | HIDEABLE_FLAG
-archived = HIDEABLE_FLAG
-allphases = list(range(internal + 1))
-trackedphases = allphases[1:]
+internal = 96
+archived = 32
+allphases = [public, draft, secret, archived, internal]
+trackedphases = allphases[draft:]
# record phase names
cmdphasenames = [b'public', b'draft', b'secret'] # known to `hg phase` command
-phasenames = [None] * len(allphases)
-phasenames[: len(cmdphasenames)] = cmdphasenames
+phasenames = dict(enumerate(cmdphasenames))
phasenames[archived] = b'archived'
phasenames[internal] = b'internal'
+phasenumber = {name: phase for phase, name in phasenames.items()}
+phasenumber2 = phasenumber.copy()
+phasenumber2.update({phase: phase for phase in phasenames})
+phasenumber2.update({b'%i' % phase: phase for phase in phasenames})
# record phase property
mutablephases = tuple(allphases[1:])
remotehiddenphases = tuple(allphases[2:])
-localhiddenphases = tuple(p for p in allphases if p & HIDEABLE_FLAG)
+localhiddenphases = (internal, archived)
def supportinternal(repo):
@@ -167,7 +167,7 @@
"""
repo = repo.unfiltered()
dirty = False
- roots = [set() for i in allphases]
+ roots = {i: set() for i in allphases}
try:
f, pending = txnutil.trypending(repo.root, repo.svfs, b'phaseroots')
try:
@@ -193,7 +193,7 @@
[[PUBLIC_HEADS], [DRAFTS_HEADS], [SECRET_HEADS]]
"""
binarydata = []
- for phase, nodes in enumerate(phasemapping):
+ for phase, nodes in pycompat.iteritems(phasemapping):
for head in nodes:
binarydata.append(_fphasesentry.pack(phase, head))
return b''.join(binarydata)
@@ -203,7 +203,7 @@
"""decode a binary stream into a 'phase -> nodes' mapping
Since phases are integer the mapping is actually a python list."""
- headsbyphase = [[] for i in allphases]
+ headsbyphase = {i: [] for i in allphases}
entrysize = _fphasesentry.size
while True:
entry = stream.read(entrysize)
@@ -323,6 +323,13 @@
self.filterunknown(repo)
self.opener = repo.svfs
+ def trackedphaseroots(self):
+ return {
+ phase: revs
+ for phase, revs in pycompat.iteritems(self.phaseroots)
+ if phase != public
+ }
+
def getrevset(self, repo, phases, subset=None):
"""return a smartset for the given phases"""
self.loadphaserevs(repo) # ensure phase's sets are loaded
@@ -380,7 +387,7 @@
# Shallow copy meant to ensure isolation in
# advance/retractboundary(), nothing more.
ph = self.__class__(None, None, _load=False)
- ph.phaseroots = self.phaseroots[:]
+ ph.phaseroots = self.phaseroots.copy()
ph.dirty = self.dirty
ph.opener = self.opener
ph._loadedrevslen = self._loadedrevslen
@@ -400,17 +407,19 @@
def _getphaserevsnative(self, repo):
repo = repo.unfiltered()
- nativeroots = []
+ nativeroots = [[] for phase in range(internal)]
for phase in trackedphases:
- nativeroots.append(
- pycompat.maplist(repo.changelog.rev, self.phaseroots[phase])
+ nativeroots[phase - 1] = pycompat.maplist(
+ repo.changelog.rev, self.phaseroots[phase]
)
- return repo.changelog.computephases(nativeroots)
+ revslen, phaseset = repo.changelog.computephases(nativeroots)
+ phaseset = {phase: phaseset[phase] or set() for phase in trackedphases}
+ return revslen, phaseset
def _computephaserevspure(self, repo):
repo = repo.unfiltered()
cl = repo.changelog
- self._phasesets = [set() for phase in allphases]
+ self._phasesets = {phase: set() for phase in allphases}
lowerroots = set()
for phase in reversed(trackedphases):
roots = pycompat.maplist(cl.rev, self.phaseroots[phase])
@@ -464,7 +473,7 @@
f.close()
def _write(self, fp):
- for phase, roots in enumerate(self.phaseroots):
+ for phase, roots in pycompat.iteritems(self.phaseroots):
for h in sorted(roots):
fp.write(b'%i %s\n' % (phase, hex(h)))
self.dirty = False
@@ -511,7 +520,7 @@
changes = set() # set of revisions to be changed
delroots = [] # set of root deleted by this path
- for phase in pycompat.xrange(targetphase + 1, len(allphases)):
+ for phase in (phase for phase in allphases if phase > targetphase):
# filter nodes that are not in a compatible phase already
nodes = [
n for n in nodes if self.phase(repo, repo[n].rev()) >= phase
@@ -546,7 +555,11 @@
return changes
def retractboundary(self, repo, tr, targetphase, nodes):
- oldroots = self.phaseroots[: targetphase + 1]
+ oldroots = {
+ phase: revs
+ for phase, revs in pycompat.iteritems(self.phaseroots)
+ if phase <= targetphase
+ }
if tr is None:
phasetracking = None
else:
@@ -565,7 +578,7 @@
# find the phase of the affected revision
for phase in pycompat.xrange(targetphase, -1, -1):
if phase:
- roots = oldroots[phase]
+ roots = oldroots.get(phase, [])
revs = set(repo.revs(b'%ln::%ld', roots, affected))
affected -= revs
else: # public phase
@@ -619,7 +632,7 @@
"""
filtered = False
has_node = repo.changelog.index.has_node # to filter unknown nodes
- for phase, nodes in enumerate(self.phaseroots):
+ for phase, nodes in pycompat.iteritems(self.phaseroots):
missing = sorted(node for node in nodes if not has_node(node))
if missing:
for mnode in missing:
@@ -744,7 +757,7 @@
"""
cl = repo.changelog
- headsbyphase = [[] for i in allphases]
+ headsbyphase = {i: [] for i in allphases}
# No need to keep track of secret phase; any heads in the subset that
# are not mentioned are implicitly secret.
for phase in allphases[:secret]:
@@ -759,6 +772,8 @@
#
# run the update (and fetch transaction) only if there are actually things
# to update. This avoid creating empty transaction during no-op operation.
+ #
+ # XXX This doesn't actually skip secret, but internal
for phase in allphases[:-1]:
revset = b'%ln - _phase(%s)'
@@ -875,18 +890,16 @@
"""
v = ui.config(b'phases', b'new-commit')
try:
- return phasenames.index(v)
- except ValueError:
- try:
- return int(v)
- except ValueError:
- msg = _(b"phases.new-commit: not a valid phase name ('%s')")
- raise error.ConfigError(msg % v)
+ return phasenumber2[v]
+ except KeyError:
+ raise error.ConfigError(
+ _(b"phases.new-commit: not a valid phase name ('%s')") % v
+ )
def hassecret(repo):
"""utility function that check if a repo have any secret changeset."""
- return bool(repo._phasecache.phaseroots[2])
+ return bool(repo._phasecache.phaseroots[secret])
def preparehookargs(node, old, new):
diff --git a/mercurial/exchangev2.py b/mercurial/exchangev2.py
--- a/mercurial/exchangev2.py
+++ b/mercurial/exchangev2.py
@@ -82,14 +82,14 @@
phases.registernew(repo, tr, phases.draft, csetres[b'added'])
# And adjust the phase of all changesets accordingly.
- for phase in phases.phasenames:
+ for phase in phases.phasenames.values():
if phase == b'secret' or not csetres[b'nodesbyphase'][phase]:
continue
phases.advanceboundary(
repo,
tr,
- phases.phasenames.index(phase),
+ phases.phasenumber[phase],
csetres[b'nodesbyphase'][phase],
)
@@ -361,7 +361,7 @@
# so we can set the linkrev accordingly when manifests are added.
manifestnodes[cl.rev(node)] = revision.manifest
- nodesbyphase = {phase: set() for phase in phases.phasenames}
+ nodesbyphase = {phase: set() for phase in phases.phasenames.values()}
remotebookmarks = {}
# addgroup() expects a 7-tuple describing revisions. This normalizes
diff --git a/mercurial/exchange.py b/mercurial/exchange.py
--- a/mercurial/exchange.py
+++ b/mercurial/exchange.py
@@ -1024,12 +1024,12 @@
hasphaseheads = b'heads' in b2caps.get(b'phases', ())
if pushop.remotephases is not None and hasphaseheads:
# check that the remote phase has not changed
- checks = [[] for p in phases.allphases]
+ checks = {p: [] for p in phases.allphases}
checks[phases.public].extend(pushop.remotephases.publicheads)
checks[phases.draft].extend(pushop.remotephases.draftroots)
- if any(checks):
- for nodes in checks:
- nodes.sort()
+ if any(pycompat.itervalues(checks)):
+ for phase in checks:
+ checks[phase].sort()
checkdata = phases.binaryencode(checks)
bundler.newpart(b'check:phases', data=checkdata)
@@ -1104,7 +1104,7 @@
"""push phase information through a bundle2 - binary part"""
pushop.stepsdone.add(b'phases')
if pushop.outdatedphases:
- updates = [[] for p in phases.allphases]
+ updates = {p: [] for p in phases.allphases}
updates[0].extend(h.node() for h in pushop.outdatedphases)
phasedata = phases.binaryencode(updates)
bundler.newpart(b'phase-heads', data=phasedata)
@@ -2658,9 +2658,9 @@
headsbyphase[phases.public].add(node(r))
# transform data in a format used by the encoding function
- phasemapping = []
- for phase in phases.allphases:
- phasemapping.append(sorted(headsbyphase[phase]))
+ phasemapping = {
+ phase: sorted(headsbyphase[phase]) for phase in phases.allphases
+ }
# generate the actual part
phasedata = phases.binaryencode(phasemapping)
diff --git a/mercurial/bundle2.py b/mercurial/bundle2.py
--- a/mercurial/bundle2.py
+++ b/mercurial/bundle2.py
@@ -2207,7 +2207,7 @@
b'remote repository changed while pushing - please try again '
b'(%s is %s expected %s)'
)
- for expectedphase, nodes in enumerate(phasetonodes):
+ for expectedphase, nodes in pycompat.iteritems(phasetonodes):
for n in nodes:
actualphase = phasecache.phase(unfi, cl.rev(n))
if actualphase != expectedphase:
To: joerg.sonnenberger, #hg-reviewers
Cc: mercurial-patches, mercurial-devel
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mercurial-scm.org/pipermail/mercurial-patches/attachments/20200624/ff163015/attachment-0001.html>
More information about the Mercurial-patches
mailing list