D6259: revset: on-disk cache for children queries
joerg.sonnenberger (Joerg Sonnenberger)
phabricator at mercurial-scm.org
Mon Jul 13 01:54:32 UTC 2020
joerg.sonnenberger retitled this revision from "[POC] revset: on-disk cache for children queries" to "revset: on-disk cache for children queries".
joerg.sonnenberger updated this revision to Diff 21871.
REPOSITORY
rHG Mercurial
CHANGES SINCE LAST UPDATE
https://phab.mercurial-scm.org/D6259?vs=14789&id=21871
BRANCH
default
CHANGES SINCE LAST ACTION
https://phab.mercurial-scm.org/D6259/new/
REVISION DETAIL
https://phab.mercurial-scm.org/D6259
AFFECTED FILES
mercurial/localrepo.py
mercurial/revset.py
tests/test-clone-uncompressed.t
tests/test-clone.t
tests/test-debugcommands.t
tests/test-hardlinks.t
tests/test-remote-hidden.t
tests/test-server-view.t
CHANGE DETAILS
diff --git a/tests/test-server-view.t b/tests/test-server-view.t
--- a/tests/test-server-view.t
+++ b/tests/test-server-view.t
@@ -56,6 +56,7 @@
branch2-served.hidden%89c45d2fa07e
branch2-visible%89c45d2fa07e
branch2-visible-hidden%89c45d2fa07e
+ childrencache
hgtagsfnodes1
rbc-names-v1
rbc-revs-v1
diff --git a/tests/test-remote-hidden.t b/tests/test-remote-hidden.t
--- a/tests/test-remote-hidden.t
+++ b/tests/test-remote-hidden.t
@@ -83,6 +83,7 @@
branch2-served
branch2-served.hidden
branch2-visible
+ childrencache
rbc-names-v1
rbc-revs-v1
tags2
diff --git a/tests/test-hardlinks.t b/tests/test-hardlinks.t
--- a/tests/test-hardlinks.t
+++ b/tests/test-hardlinks.t
@@ -239,6 +239,7 @@
2 r4/.hg/branch
2 r4/.hg/cache/branch2-base
2 r4/.hg/cache/branch2-served
+ 2 r4/.hg/cache/childrencache
2 r4/.hg/cache/rbc-names-v1
2 r4/.hg/cache/rbc-revs-v1
2 r4/.hg/dirstate
@@ -290,6 +291,7 @@
1 r4/.hg/branch
2 r4/.hg/cache/branch2-base
2 r4/.hg/cache/branch2-served
+ 2 r4/.hg/cache/childrencache
2 r4/.hg/cache/rbc-names-v1
2 r4/.hg/cache/rbc-revs-v1
1 r4/.hg/dirstate
diff --git a/tests/test-debugcommands.t b/tests/test-debugcommands.t
--- a/tests/test-debugcommands.t
+++ b/tests/test-debugcommands.t
@@ -546,6 +546,7 @@
.hg/cache/rbc-revs-v1
.hg/cache/rbc-names-v1
.hg/cache/hgtagsfnodes1
+ .hg/cache/childrencache
.hg/cache/branch2-visible-hidden
.hg/cache/branch2-visible
.hg/cache/branch2-served.hidden
diff --git a/tests/test-clone.t b/tests/test-clone.t
--- a/tests/test-clone.t
+++ b/tests/test-clone.t
@@ -43,6 +43,7 @@
default 10:a7949464abda
$ ls .hg/cache
branch2-served
+ childrencache
rbc-names-v1
rbc-revs-v1
diff --git a/tests/test-clone-uncompressed.t b/tests/test-clone-uncompressed.t
--- a/tests/test-clone-uncompressed.t
+++ b/tests/test-clone-uncompressed.t
@@ -193,6 +193,7 @@
$ ls -1 clone1/.hg/cache
branch2-served
+ childrencache
rbc-names-v1
rbc-revs-v1
#endif
diff --git a/mercurial/revset.py b/mercurial/revset.py
--- a/mercurial/revset.py
+++ b/mercurial/revset.py
@@ -492,14 +492,14 @@
cs = set()
for r in getset(repo, fullreposet(repo), x):
for i in range(n):
- c = repo[r].children()
+ c = repo._childrencache.children(r)
if len(c) == 0:
break
if len(c) > 1:
raise error.RepoLookupError(
- _(b"revision in set has more than one child")
+ _("revision in set has more than one child")
)
- r = c[0].rev()
+ r = list(c)[0]
else:
cs.add(r)
return subset & cs
@@ -719,21 +719,17 @@
def _children(repo, subset, parentset):
+ def visiblerev(rev):
+ try:
+ repo.changelog.node(rev)
+ return True
+ except IndexError:
+ return False
+
if not parentset:
return baseset()
- cs = set()
- pr = repo.changelog.parentrevs
- minrev = parentset.min()
- nullrev = node.nullrev
- for r in subset:
- if r <= minrev:
- continue
- p1, p2 = pr(r)
- if p1 in parentset:
- cs.add(r)
- if p2 != nullrev and p2 in parentset:
- cs.add(r)
- return baseset(cs)
+ cs = set.union(*[set(repo._childrencache.children(c)) for c in parentset])
+ return baseset({rev for rev in cs if visiblerev(rev)})
@predicate(b'children(set)', safe=True)
diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py
--- a/mercurial/localrepo.py
+++ b/mercurial/localrepo.py
@@ -10,6 +10,7 @@
import errno
import os
import random
+import struct
import sys
import time
import weakref
@@ -1178,6 +1179,7 @@
self._branchcaches = branchmap.BranchMapCache()
self._revbranchcache = None
+ self.__childrencache = None
self._filterpats = {}
self._datafilters = {}
self._transref = self._lockref = self._wlockref = None
@@ -1288,6 +1290,8 @@
def _writecaches(self):
if self._revbranchcache:
self._revbranchcache.write()
+ if self.__childrencache:
+ self.__childrencache.write()
def _restrictcapabilities(self, caps):
if self.ui.configbool(b'experimental', b'bundle2-advertise'):
@@ -1471,6 +1475,16 @@
def manifestlog(self):
return self.store.manifestlog(self, self._storenarrowmatch)
+ rootstore = manifest.manifestrevlog(self.svfs)
+ return manifest.manifestlog(
+ self.svfs, self, rootstore, self._storenarrowmatch
+ )
+
+ @unfilteredpropertycache
+ def _childrencache(self):
+ self.__childrencache = childrencache(self)
+ return self.__childrencache
+
@repofilecache(b'dirstate')
def dirstate(self):
return self._makedirstate()
@@ -2535,6 +2549,8 @@
# ensure the working copy parents are in the manifestfulltextcache
for ctx in self[b'.'].parents():
ctx.manifest() # accessing the manifest is enough
+ self._childrencache.clear()
+ self._childrencache.write()
# accessing fnode cache warms the cache
tagsmod.fnoderevs(self.ui, unfi, unfi.changelog.revs())
@@ -2551,6 +2567,9 @@
for filt in repoview.filtertable.keys():
filtered = self.filtered(filt)
filtered.branchmap().write(filtered)
+ else:
+ if self.__childrencache:
+ self.__childrencache.write()
def invalidatecaches(self):
@@ -2561,6 +2580,7 @@
self._branchcaches.clear()
self.invalidatevolatilesets()
self._sparsesignaturecache.clear()
+ self._childrencache.clear()
def invalidatevolatilesets(self):
self.filteredrevcache.clear()
@@ -3809,3 +3829,123 @@
# We may have a repoview, which intercepts __setattr__. So be sure
# we operate at the lowest level possible.
object.__setattr__(repo, '__class__', poisonedrepository)
+
+
+class childrencache(object):
+ """Persistent cache, mapping from revision number to revision numbers of
+ the direct children. This is a low level cache, independent of filtering.
+ """
+
+ _filename = b'childrencache'
+
+ def __init__(self, repo, readonly=False):
+ assert repo.filtername is None
+ self._data = None
+ self._new_data = {}
+ self._read = False
+ self._repo = repo.unfiltered()
+ self._readonly = readonly
+
+ def children(self, rev):
+ if not self._read:
+ self.read()
+ if rev >= self._lastrecorded:
+ self._update_changes(self._lastrecorded)
+ children = self._new_data.get(rev, set())
+ rev += 1
+ if rev >= self._lastknown:
+ return children
+ children = children.copy()
+ child_len, child_off = struct.unpack_from('>ll', self._data, 8 * rev)
+ if child_len == 1:
+ children.add(child_off)
+ elif child_len > 1:
+ start = 8 * self._lastknown + child_off * 4
+ if len(self._data) < start + child_len * 4:
+ raise error.Abort(b'Truncated children cache')
+ for i in pycompat.xrange(child_len):
+ (child,) = struct.unpack_from('>l', self._data, start)
+ children.add(child)
+ start += 4
+ return children
+
+ def clear(self):
+ self._read = True
+ self._data = None
+ self._new_data = {}
+ self._lastknown = 0
+ self._update_changes(0)
+
+ def _update_changes(self, start):
+ idx = self._repo.changelog.index
+ lastrev = len(self._repo)
+ for r in pycompat.xrange(start, lastrev):
+ rev = idx[r]
+ p1, p2 = rev[5:7]
+ if p1 != nullrev or p2 == nullrev:
+ self._new_data.setdefault(p1, set()).add(r)
+ if p2 != nullrev:
+ self._new_data.setdefault(p2, set()).add(r)
+ self._lastrecorded = lastrev
+
+ def read(self):
+ if self._read:
+ return
+ self._read = True
+ try:
+ data = self._repo.cachevfs.read(self._filename)
+ except (OSError, IOError):
+ data = ""
+ if len(data) < 24:
+ self._repo.ui.debug(
+ b'children cache missing or too short, ignored\n'
+ )
+ self._lastknown = 0
+ self._update_changes(0)
+ return
+ tip, lastrev = struct.unpack_from('>20sl', data, 0)
+ if lastrev == 0:
+ self._repo.ui.debug(b'children cache empty, ignored\n')
+ self._lastknown = 0
+ self._update_changes(0)
+ return
+ if (
+ lastrev > len(self._repo) + 1
+ or self._repo.changelog.node(lastrev - 2) != tip
+ ):
+ self._lastknown = 0
+ self._update_changes(0)
+ self._repo.ui.debug(
+ b'children cache does not match current repository state, ignored\n'
+ )
+ return
+ data = data[24:]
+ if lastrev * 8 > len(data):
+ self._lastknown = 0
+ self._update_changes(0)
+ self._repo.ui.debug(b'children cache was truncated, ignored\n')
+ return
+ self._data = data
+ self._lastknown = lastrev
+ self._update_changes(lastrev)
+
+ def write(self):
+ if not self._new_data or self._readonly:
+ return
+ lastrev = len(self._repo) + 1
+ data = [struct.pack('>20sl', self._repo.changelog.tip(), lastrev)]
+ data2 = []
+ for r in pycompat.xrange(lastrev):
+ children = self.children(r - 1)
+ data.append(struct.pack('>l', len(children)))
+ if len(children) == 0:
+ data.append(struct.pack('>l', 0))
+ elif len(children) == 1:
+ children = list(children)
+ data.append(struct.pack('>l', children[0]))
+ else:
+ data.append(struct.pack('>l', len(data2)))
+ for c in children:
+ data2.append(struct.pack('>l', c))
+ output = b''.join(data) + b''.join(data2)
+ self._repo.cachevfs.write(self._filename, output, atomictemp=True)
To: joerg.sonnenberger, #hg-reviewers, marmoute
Cc: mercurial-patches, marmoute, martinvonz, indygreg, mercurial-devel
More information about the Mercurial-devel
mailing list