[Request] [+-- ] D8654: phases: make phase list dense or dictionaries [PoC]

joerg.sonnenberger (Joerg Sonnenberger) phabricator at mercurial-scm.org
Wed Jun 24 00:27:41 UTC 2020


joerg.sonnenberger created this revision.
Herald added a reviewer: hg-reviewers.
Herald added a subscriber: mercurial-patches.

REVISION SUMMARY
  When the internal and archived phase was added, allphases became sparse
  populated. This dramatically increased the number of lookup operations
  for public revisions when they hit phasecache.phase as it it is
  iterationing over all phases in the phasesets. Fix this by making the
  various data structures either sparse lists or dictionaries, depending
  on the purpose. This improves a no-change "hg up" for my NetBSD test
  repository from 4s to 1.3s.

REPOSITORY
  rHG Mercurial

BRANCH
  default

REVISION DETAIL
  https://phab.mercurial-scm.org/D8654

AFFECTED FILES
  mercurial/bundle2.py
  mercurial/exchange.py
  mercurial/exchangev2.py
  mercurial/phases.py
  mercurial/repoview.py

CHANGE DETAILS

diff --git a/mercurial/repoview.py b/mercurial/repoview.py
--- a/mercurial/repoview.py
+++ b/mercurial/repoview.py
@@ -129,7 +129,7 @@
 def computemutable(repo, visibilityexceptions=None):
     assert not repo.changelog.filteredrevs
     # fast check to avoid revset call on huge repo
-    if any(repo._phasecache.phaseroots[1:]):
+    if any(pycompat.itervalues(repo._phasecache.trackedphaseroots())):
         getphase = repo._phasecache.phase
         maymutable = filterrevs(repo, b'base')
         return frozenset(r for r in maymutable if getphase(repo, r))
@@ -154,7 +154,7 @@
     assert not repo.changelog.filteredrevs
     cl = repo.changelog
     firstmutable = len(cl)
-    for roots in repo._phasecache.phaseroots[1:]:
+    for roots in pycompat.itervalues(repo._phasecache.trackedphaseroots()):
         if roots:
             firstmutable = min(firstmutable, min(cl.rev(r) for r in roots))
     # protect from nullrev root
diff --git a/mercurial/phases.py b/mercurial/phases.py
--- a/mercurial/phases.py
+++ b/mercurial/phases.py
@@ -128,25 +128,25 @@
 
 _fphasesentry = struct.Struct(b'>i20s')
 
-INTERNAL_FLAG = 64  # Phases for mercurial internal usage only
-HIDEABLE_FLAG = 32  # Phases that are hideable
-
 # record phase index
 public, draft, secret = range(3)
-internal = INTERNAL_FLAG | HIDEABLE_FLAG
-archived = HIDEABLE_FLAG
-allphases = list(range(internal + 1))
-trackedphases = allphases[1:]
+internal = 96
+archived = 32
+allphases = [public, draft, secret, archived, internal]
+trackedphases = allphases[draft:]
 # record phase names
 cmdphasenames = [b'public', b'draft', b'secret']  # known to `hg phase` command
-phasenames = [None] * len(allphases)
-phasenames[: len(cmdphasenames)] = cmdphasenames
+phasenames = dict(enumerate(cmdphasenames))
 phasenames[archived] = b'archived'
 phasenames[internal] = b'internal'
+phasenumber = {name: phase for phase, name in phasenames.items()}
+phasenumber2 = phasenumber.copy()
+phasenumber2.update({phase: phase for phase in phasenames})
+phasenumber2.update({b'%i' % phase: phase for phase in phasenames})
 # record phase property
 mutablephases = tuple(allphases[1:])
 remotehiddenphases = tuple(allphases[2:])
-localhiddenphases = tuple(p for p in allphases if p & HIDEABLE_FLAG)
+localhiddenphases = (internal, archived)
 
 
 def supportinternal(repo):
@@ -167,7 +167,7 @@
     """
     repo = repo.unfiltered()
     dirty = False
-    roots = [set() for i in allphases]
+    roots = {i: set() for i in allphases}
     try:
         f, pending = txnutil.trypending(repo.root, repo.svfs, b'phaseroots')
         try:
@@ -193,7 +193,7 @@
     [[PUBLIC_HEADS], [DRAFTS_HEADS], [SECRET_HEADS]]
     """
     binarydata = []
-    for phase, nodes in enumerate(phasemapping):
+    for phase, nodes in pycompat.iteritems(phasemapping):
         for head in nodes:
             binarydata.append(_fphasesentry.pack(phase, head))
     return b''.join(binarydata)
@@ -203,7 +203,7 @@
     """decode a binary stream into a 'phase -> nodes' mapping
 
     Since phases are integer the mapping is actually a python list."""
-    headsbyphase = [[] for i in allphases]
+    headsbyphase = {i: [] for i in allphases}
     entrysize = _fphasesentry.size
     while True:
         entry = stream.read(entrysize)
@@ -323,6 +323,13 @@
             self.filterunknown(repo)
             self.opener = repo.svfs
 
+    def trackedphaseroots(self):
+        return {
+            phase: revs
+            for phase, revs in pycompat.iteritems(self.phaseroots)
+            if phase != public
+        }
+
     def getrevset(self, repo, phases, subset=None):
         """return a smartset for the given phases"""
         self.loadphaserevs(repo)  # ensure phase's sets are loaded
@@ -380,7 +387,7 @@
         # Shallow copy meant to ensure isolation in
         # advance/retractboundary(), nothing more.
         ph = self.__class__(None, None, _load=False)
-        ph.phaseroots = self.phaseroots[:]
+        ph.phaseroots = self.phaseroots.copy()
         ph.dirty = self.dirty
         ph.opener = self.opener
         ph._loadedrevslen = self._loadedrevslen
@@ -400,17 +407,19 @@
 
     def _getphaserevsnative(self, repo):
         repo = repo.unfiltered()
-        nativeroots = []
+        nativeroots = [[] for phase in range(internal)]
         for phase in trackedphases:
-            nativeroots.append(
-                pycompat.maplist(repo.changelog.rev, self.phaseroots[phase])
+            nativeroots[phase - 1] = pycompat.maplist(
+                repo.changelog.rev, self.phaseroots[phase]
             )
-        return repo.changelog.computephases(nativeroots)
+        revslen, phaseset = repo.changelog.computephases(nativeroots)
+        phaseset = {phase: phaseset[phase] or set() for phase in trackedphases}
+        return revslen, phaseset
 
     def _computephaserevspure(self, repo):
         repo = repo.unfiltered()
         cl = repo.changelog
-        self._phasesets = [set() for phase in allphases]
+        self._phasesets = {phase: set() for phase in allphases}
         lowerroots = set()
         for phase in reversed(trackedphases):
             roots = pycompat.maplist(cl.rev, self.phaseroots[phase])
@@ -464,7 +473,7 @@
             f.close()
 
     def _write(self, fp):
-        for phase, roots in enumerate(self.phaseroots):
+        for phase, roots in pycompat.iteritems(self.phaseroots):
             for h in sorted(roots):
                 fp.write(b'%i %s\n' % (phase, hex(h)))
         self.dirty = False
@@ -511,7 +520,7 @@
 
         changes = set()  # set of revisions to be changed
         delroots = []  # set of root deleted by this path
-        for phase in pycompat.xrange(targetphase + 1, len(allphases)):
+        for phase in (phase for phase in allphases if phase > targetphase):
             # filter nodes that are not in a compatible phase already
             nodes = [
                 n for n in nodes if self.phase(repo, repo[n].rev()) >= phase
@@ -546,7 +555,11 @@
         return changes
 
     def retractboundary(self, repo, tr, targetphase, nodes):
-        oldroots = self.phaseroots[: targetphase + 1]
+        oldroots = {
+            phase: revs
+            for phase, revs in pycompat.iteritems(self.phaseroots)
+            if phase <= targetphase
+        }
         if tr is None:
             phasetracking = None
         else:
@@ -565,7 +578,7 @@
             # find the phase of the affected revision
             for phase in pycompat.xrange(targetphase, -1, -1):
                 if phase:
-                    roots = oldroots[phase]
+                    roots = oldroots.get(phase, [])
                     revs = set(repo.revs(b'%ln::%ld', roots, affected))
                     affected -= revs
                 else:  # public phase
@@ -619,7 +632,7 @@
         """
         filtered = False
         has_node = repo.changelog.index.has_node  # to filter unknown nodes
-        for phase, nodes in enumerate(self.phaseroots):
+        for phase, nodes in pycompat.iteritems(self.phaseroots):
             missing = sorted(node for node in nodes if not has_node(node))
             if missing:
                 for mnode in missing:
@@ -744,7 +757,7 @@
     """
     cl = repo.changelog
 
-    headsbyphase = [[] for i in allphases]
+    headsbyphase = {i: [] for i in allphases}
     # No need to keep track of secret phase; any heads in the subset that
     # are not mentioned are implicitly secret.
     for phase in allphases[:secret]:
@@ -759,6 +772,8 @@
     #
     # run the update (and fetch transaction) only if there are actually things
     # to update. This avoid creating empty transaction during no-op operation.
+    #
+    # XXX This doesn't actually skip secret, but internal
 
     for phase in allphases[:-1]:
         revset = b'%ln - _phase(%s)'
@@ -875,18 +890,16 @@
     """
     v = ui.config(b'phases', b'new-commit')
     try:
-        return phasenames.index(v)
-    except ValueError:
-        try:
-            return int(v)
-        except ValueError:
-            msg = _(b"phases.new-commit: not a valid phase name ('%s')")
-            raise error.ConfigError(msg % v)
+        return phasenumber2[v]
+    except KeyError:
+        raise error.ConfigError(
+            _(b"phases.new-commit: not a valid phase name ('%s')") % v
+        )
 
 
 def hassecret(repo):
     """utility function that check if a repo have any secret changeset."""
-    return bool(repo._phasecache.phaseroots[2])
+    return bool(repo._phasecache.phaseroots[secret])
 
 
 def preparehookargs(node, old, new):
diff --git a/mercurial/exchangev2.py b/mercurial/exchangev2.py
--- a/mercurial/exchangev2.py
+++ b/mercurial/exchangev2.py
@@ -82,14 +82,14 @@
         phases.registernew(repo, tr, phases.draft, csetres[b'added'])
 
     # And adjust the phase of all changesets accordingly.
-    for phase in phases.phasenames:
+    for phase in phases.phasenames.values():
         if phase == b'secret' or not csetres[b'nodesbyphase'][phase]:
             continue
 
         phases.advanceboundary(
             repo,
             tr,
-            phases.phasenames.index(phase),
+            phases.phasenumber[phase],
             csetres[b'nodesbyphase'][phase],
         )
 
@@ -361,7 +361,7 @@
         # so we can set the linkrev accordingly when manifests are added.
         manifestnodes[cl.rev(node)] = revision.manifest
 
-    nodesbyphase = {phase: set() for phase in phases.phasenames}
+    nodesbyphase = {phase: set() for phase in phases.phasenames.values()}
     remotebookmarks = {}
 
     # addgroup() expects a 7-tuple describing revisions. This normalizes
diff --git a/mercurial/exchange.py b/mercurial/exchange.py
--- a/mercurial/exchange.py
+++ b/mercurial/exchange.py
@@ -1024,12 +1024,12 @@
     hasphaseheads = b'heads' in b2caps.get(b'phases', ())
     if pushop.remotephases is not None and hasphaseheads:
         # check that the remote phase has not changed
-        checks = [[] for p in phases.allphases]
+        checks = {p: [] for p in phases.allphases}
         checks[phases.public].extend(pushop.remotephases.publicheads)
         checks[phases.draft].extend(pushop.remotephases.draftroots)
-        if any(checks):
-            for nodes in checks:
-                nodes.sort()
+        if any(pycompat.itervalues(checks)):
+            for phase in checks:
+                checks[phase].sort()
             checkdata = phases.binaryencode(checks)
             bundler.newpart(b'check:phases', data=checkdata)
 
@@ -1104,7 +1104,7 @@
     """push phase information through a bundle2 - binary part"""
     pushop.stepsdone.add(b'phases')
     if pushop.outdatedphases:
-        updates = [[] for p in phases.allphases]
+        updates = {p: [] for p in phases.allphases}
         updates[0].extend(h.node() for h in pushop.outdatedphases)
         phasedata = phases.binaryencode(updates)
         bundler.newpart(b'phase-heads', data=phasedata)
@@ -2658,9 +2658,9 @@
                     headsbyphase[phases.public].add(node(r))
 
         # transform data in a format used by the encoding function
-        phasemapping = []
-        for phase in phases.allphases:
-            phasemapping.append(sorted(headsbyphase[phase]))
+        phasemapping = {
+            phase: sorted(headsbyphase[phase]) for phase in phases.allphases
+        }
 
         # generate the actual part
         phasedata = phases.binaryencode(phasemapping)
diff --git a/mercurial/bundle2.py b/mercurial/bundle2.py
--- a/mercurial/bundle2.py
+++ b/mercurial/bundle2.py
@@ -2207,7 +2207,7 @@
         b'remote repository changed while pushing - please try again '
         b'(%s is %s expected %s)'
     )
-    for expectedphase, nodes in enumerate(phasetonodes):
+    for expectedphase, nodes in pycompat.iteritems(phasetonodes):
         for n in nodes:
             actualphase = phasecache.phase(unfi, cl.rev(n))
             if actualphase != expectedphase:



To: joerg.sonnenberger, #hg-reviewers
Cc: mercurial-patches, mercurial-devel
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mercurial-scm.org/pipermail/mercurial-patches/attachments/20200624/ff163015/attachment-0001.html>


More information about the Mercurial-patches mailing list