D6259: revset: on-disk cache for children queries

joerg.sonnenberger (Joerg Sonnenberger) phabricator at mercurial-scm.org
Mon Jul 13 01:54:32 UTC 2020


joerg.sonnenberger retitled this revision from "[POC] revset: on-disk cache for children queries" to "revset: on-disk cache for children queries".
joerg.sonnenberger updated this revision to Diff 21871.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D6259?vs=14789&id=21871

BRANCH
  default

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D6259/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D6259

AFFECTED FILES
  mercurial/localrepo.py
  mercurial/revset.py
  tests/test-clone-uncompressed.t
  tests/test-clone.t
  tests/test-debugcommands.t
  tests/test-hardlinks.t
  tests/test-remote-hidden.t
  tests/test-server-view.t

CHANGE DETAILS

diff --git a/tests/test-server-view.t b/tests/test-server-view.t
--- a/tests/test-server-view.t
+++ b/tests/test-server-view.t
@@ -56,6 +56,7 @@
   branch2-served.hidden%89c45d2fa07e
   branch2-visible%89c45d2fa07e
   branch2-visible-hidden%89c45d2fa07e
+  childrencache
   hgtagsfnodes1
   rbc-names-v1
   rbc-revs-v1
diff --git a/tests/test-remote-hidden.t b/tests/test-remote-hidden.t
--- a/tests/test-remote-hidden.t
+++ b/tests/test-remote-hidden.t
@@ -83,6 +83,7 @@
   branch2-served
   branch2-served.hidden
   branch2-visible
+  childrencache
   rbc-names-v1
   rbc-revs-v1
   tags2
diff --git a/tests/test-hardlinks.t b/tests/test-hardlinks.t
--- a/tests/test-hardlinks.t
+++ b/tests/test-hardlinks.t
@@ -239,6 +239,7 @@
   2 r4/.hg/branch
   2 r4/.hg/cache/branch2-base
   2 r4/.hg/cache/branch2-served
+  2 r4/.hg/cache/childrencache
   2 r4/.hg/cache/rbc-names-v1
   2 r4/.hg/cache/rbc-revs-v1
   2 r4/.hg/dirstate
@@ -290,6 +291,7 @@
   1 r4/.hg/branch
   2 r4/.hg/cache/branch2-base
   2 r4/.hg/cache/branch2-served
+  2 r4/.hg/cache/childrencache
   2 r4/.hg/cache/rbc-names-v1
   2 r4/.hg/cache/rbc-revs-v1
   1 r4/.hg/dirstate
diff --git a/tests/test-debugcommands.t b/tests/test-debugcommands.t
--- a/tests/test-debugcommands.t
+++ b/tests/test-debugcommands.t
@@ -546,6 +546,7 @@
   .hg/cache/rbc-revs-v1
   .hg/cache/rbc-names-v1
   .hg/cache/hgtagsfnodes1
+  .hg/cache/childrencache
   .hg/cache/branch2-visible-hidden
   .hg/cache/branch2-visible
   .hg/cache/branch2-served.hidden
diff --git a/tests/test-clone.t b/tests/test-clone.t
--- a/tests/test-clone.t
+++ b/tests/test-clone.t
@@ -43,6 +43,7 @@
   default                       10:a7949464abda
   $ ls .hg/cache
   branch2-served
+  childrencache
   rbc-names-v1
   rbc-revs-v1
 
diff --git a/tests/test-clone-uncompressed.t b/tests/test-clone-uncompressed.t
--- a/tests/test-clone-uncompressed.t
+++ b/tests/test-clone-uncompressed.t
@@ -193,6 +193,7 @@
 
   $ ls -1 clone1/.hg/cache
   branch2-served
+  childrencache
   rbc-names-v1
   rbc-revs-v1
 #endif
diff --git a/mercurial/revset.py b/mercurial/revset.py
--- a/mercurial/revset.py
+++ b/mercurial/revset.py
@@ -492,14 +492,14 @@
     cs = set()
     for r in getset(repo, fullreposet(repo), x):
         for i in range(n):
-            c = repo[r].children()
+            c = repo._childrencache.children(r)
             if len(c) == 0:
                 break
             if len(c) > 1:
                 raise error.RepoLookupError(
-                    _(b"revision in set has more than one child")
+                    _("revision in set has more than one child")
                 )
-            r = c[0].rev()
+            r = list(c)[0]
         else:
             cs.add(r)
     return subset & cs
@@ -719,21 +719,17 @@
 
 
 def _children(repo, subset, parentset):
+    def visiblerev(rev):
+        try:
+            repo.changelog.node(rev)
+            return True
+        except IndexError:
+            return False
+
     if not parentset:
         return baseset()
-    cs = set()
-    pr = repo.changelog.parentrevs
-    minrev = parentset.min()
-    nullrev = node.nullrev
-    for r in subset:
-        if r <= minrev:
-            continue
-        p1, p2 = pr(r)
-        if p1 in parentset:
-            cs.add(r)
-        if p2 != nullrev and p2 in parentset:
-            cs.add(r)
-    return baseset(cs)
+    cs = set.union(*[set(repo._childrencache.children(c)) for c in parentset])
+    return baseset({rev for rev in cs if visiblerev(rev)})
 
 
 @predicate(b'children(set)', safe=True)
diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py
--- a/mercurial/localrepo.py
+++ b/mercurial/localrepo.py
@@ -10,6 +10,7 @@
 import errno
 import os
 import random
+import struct
 import sys
 import time
 import weakref
@@ -1178,6 +1179,7 @@
 
         self._branchcaches = branchmap.BranchMapCache()
         self._revbranchcache = None
+        self.__childrencache = None
         self._filterpats = {}
         self._datafilters = {}
         self._transref = self._lockref = self._wlockref = None
@@ -1288,6 +1290,8 @@
     def _writecaches(self):
         if self._revbranchcache:
             self._revbranchcache.write()
+        if self.__childrencache:
+            self.__childrencache.write()
 
     def _restrictcapabilities(self, caps):
         if self.ui.configbool(b'experimental', b'bundle2-advertise'):
@@ -1471,6 +1475,16 @@
     def manifestlog(self):
         return self.store.manifestlog(self, self._storenarrowmatch)
 
+        rootstore = manifest.manifestrevlog(self.svfs)
+        return manifest.manifestlog(
+            self.svfs, self, rootstore, self._storenarrowmatch
+        )
+
+    @unfilteredpropertycache
+    def _childrencache(self):
+        self.__childrencache = childrencache(self)
+        return self.__childrencache
+
     @repofilecache(b'dirstate')
     def dirstate(self):
         return self._makedirstate()
@@ -2535,6 +2549,8 @@
             # ensure the working copy parents are in the manifestfulltextcache
             for ctx in self[b'.'].parents():
                 ctx.manifest()  # accessing the manifest is enough
+            self._childrencache.clear()
+            self._childrencache.write()
 
             # accessing fnode cache warms the cache
             tagsmod.fnoderevs(self.ui, unfi, unfi.changelog.revs())
@@ -2551,6 +2567,9 @@
             for filt in repoview.filtertable.keys():
                 filtered = self.filtered(filt)
                 filtered.branchmap().write(filtered)
+        else:
+            if self.__childrencache:
+                self.__childrencache.write()
 
     def invalidatecaches(self):
 
@@ -2561,6 +2580,7 @@
         self._branchcaches.clear()
         self.invalidatevolatilesets()
         self._sparsesignaturecache.clear()
+        self._childrencache.clear()
 
     def invalidatevolatilesets(self):
         self.filteredrevcache.clear()
@@ -3809,3 +3829,123 @@
     # We may have a repoview, which intercepts __setattr__. So be sure
     # we operate at the lowest level possible.
     object.__setattr__(repo, '__class__', poisonedrepository)
+
+
+class childrencache(object):
+    """Persistent cache, mapping from revision number to revision numbers of
+    the direct children. This is a low level cache, independent of filtering.
+    """
+
+    _filename = b'childrencache'
+
+    def __init__(self, repo, readonly=False):
+        assert repo.filtername is None
+        self._data = None
+        self._new_data = {}
+        self._read = False
+        self._repo = repo.unfiltered()
+        self._readonly = readonly
+
+    def children(self, rev):
+        if not self._read:
+            self.read()
+        if rev >= self._lastrecorded:
+            self._update_changes(self._lastrecorded)
+        children = self._new_data.get(rev, set())
+        rev += 1
+        if rev >= self._lastknown:
+            return children
+        children = children.copy()
+        child_len, child_off = struct.unpack_from('>ll', self._data, 8 * rev)
+        if child_len == 1:
+            children.add(child_off)
+        elif child_len > 1:
+            start = 8 * self._lastknown + child_off * 4
+            if len(self._data) < start + child_len * 4:
+                raise error.Abort(b'Truncated children cache')
+            for i in pycompat.xrange(child_len):
+                (child,) = struct.unpack_from('>l', self._data, start)
+                children.add(child)
+                start += 4
+        return children
+
+    def clear(self):
+        self._read = True
+        self._data = None
+        self._new_data = {}
+        self._lastknown = 0
+        self._update_changes(0)
+
+    def _update_changes(self, start):
+        idx = self._repo.changelog.index
+        lastrev = len(self._repo)
+        for r in pycompat.xrange(start, lastrev):
+            rev = idx[r]
+            p1, p2 = rev[5:7]
+            if p1 != nullrev or p2 == nullrev:
+                self._new_data.setdefault(p1, set()).add(r)
+            if p2 != nullrev:
+                self._new_data.setdefault(p2, set()).add(r)
+        self._lastrecorded = lastrev
+
+    def read(self):
+        if self._read:
+            return
+        self._read = True
+        try:
+            data = self._repo.cachevfs.read(self._filename)
+        except (OSError, IOError):
+            data = ""
+        if len(data) < 24:
+            self._repo.ui.debug(
+                b'children cache missing or too short, ignored\n'
+            )
+            self._lastknown = 0
+            self._update_changes(0)
+            return
+        tip, lastrev = struct.unpack_from('>20sl', data, 0)
+        if lastrev == 0:
+            self._repo.ui.debug(b'children cache empty, ignored\n')
+            self._lastknown = 0
+            self._update_changes(0)
+            return
+        if (
+            lastrev > len(self._repo) + 1
+            or self._repo.changelog.node(lastrev - 2) != tip
+        ):
+            self._lastknown = 0
+            self._update_changes(0)
+            self._repo.ui.debug(
+                b'children cache does not match current repository state, ignored\n'
+            )
+            return
+        data = data[24:]
+        if lastrev * 8 > len(data):
+            self._lastknown = 0
+            self._update_changes(0)
+            self._repo.ui.debug(b'children cache was truncated, ignored\n')
+            return
+        self._data = data
+        self._lastknown = lastrev
+        self._update_changes(lastrev)
+
+    def write(self):
+        if not self._new_data or self._readonly:
+            return
+        lastrev = len(self._repo) + 1
+        data = [struct.pack('>20sl', self._repo.changelog.tip(), lastrev)]
+        data2 = []
+        for r in pycompat.xrange(lastrev):
+            children = self.children(r - 1)
+            data.append(struct.pack('>l', len(children)))
+            if len(children) == 0:
+                data.append(struct.pack('>l', 0))
+            elif len(children) == 1:
+                children = list(children)
+                data.append(struct.pack('>l', children[0]))
+            else:
+                data.append(struct.pack('>l', len(data2)))
+                for c in children:
+                    data2.append(struct.pack('>l', c))
+        output = b''.join(data) + b''.join(data2)
+        self._repo.cachevfs.write(self._filename, output, atomictemp=True)



To: joerg.sonnenberger, #hg-reviewers, marmoute
Cc: mercurial-patches, marmoute, martinvonz, indygreg, mercurial-devel


More information about the Mercurial-devel mailing list