[PATCH 1 of 2 v2] localrepo: persistent caching of branch names
Augie Fackler
raf at durin42.com
Wed Oct 15 18:35:45 UTC 2014
On Wed, Oct 15, 2014 at 08:14:22PM +0200, Mads Kiilerich wrote:
> # HG changeset patch
> # User Mads Kiilerich <madski at unity3d.com>
> # Date 1413396806 -7200
> # Wed Oct 15 20:13:26 2014 +0200
> # Node ID bf7a0169677c0545a63e64690b0e49e50b376703
> # Parent 48c0b101a9de1fdbd638daa858da845cd05a6be7
> localrepo: persistent caching of branch names
>
> It is expensive to retrieve the branch name. Very expensive when creating a
> changectx and calling .branch() - slightly less when using
> changelog.branchinfo().
>
> Now, to really speed things up, cache the results on disk. For each repo
> revision store the node hash and a reference to the branch name. To avoid using
> too much space, each branch name is only stored once. To make it 100% stable
> against repository mutations, always check the node hash before using the
> cache content.
>
> This change promise to speed some operations up 4-6 times when it actually is
> used.
>
> A simpler approach that didn't store and validate node hashes for every
> revision had a 20 x speedup but could be tricked when modifying history. It
> would usually reset the cache, but when trying very hard it could be tricked
> into not noticing changes.
>
> diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py
> --- a/mercurial/localrepo.py
> +++ b/mercurial/localrepo.py
> @@ -6,7 +6,7 @@
> # GNU General Public License version 2 or any later version.
> from node import hex, nullid, short
> from i18n import _
> -import urllib
> +import urllib, struct, array
> import peer, changegroup, subrepo, pushkey, obsolete, repoview
> import changelog, dirstate, filelog, manifest, context, bookmarks, phases
> import lock as lockmod
> @@ -21,6 +21,14 @@ import branchmap, pathutil
> propertycache = util.propertycache
> filecache = scmutil.filecache
>
> +# branch name caching
> +bcfilename = 'cache/branchnames'
> +bcversion = 2345164374
> +bcheadfmt = '>LLL'
> +bcheadsize = struct.calcsize(bcheadfmt)
> +bcrecfmt = '>20sH'
> +bcrecsize = struct.calcsize(bcrecfmt)
> +
> class repofilecache(filecache):
> """All filecache usage on repo are done for logic that should be unfiltered
> """
> @@ -179,6 +187,7 @@ class localrepository(object):
> openerreqs = set(('revlogv1', 'generaldelta'))
> requirements = ['revlogv1']
> filtername = None
> + _branchcachedirty = None
>
> # a list of (ui, featureset) functions.
> # only functions defined in module of enabled extensions are invoked
> @@ -298,7 +307,7 @@ class localrepository(object):
Oh, and I'm not enthusiastic about stuffing new behavior into
localrepo. I'm open to suggestions on how we can better factor this.
> self.filteredrevcache = {}
>
> def close(self):
> - pass
> + self._branchcachesave()
>
> def _restrictcapabilities(self, caps):
> # bundle2 is not ready for prime time, drop it unless explicitly
> @@ -723,6 +732,74 @@ class localrepository(object):
> repo = (remote and remote.local()) and remote or self
> return repo[key].branch()
>
> + def _branchcacheload(self):
> + """Load cached branch values."""
> + try:
> + data = self.vfs.open(bcfilename).read()
> + except IOError:
> + data = ''
> +
> + self._branches = []
> + self._branchrecs = array.array('c') # bytes of struct type bcrecfmt
> + self.__dict__['_branchcachedirty'] = True
> + reporecslen = len(self) * bcrecsize
> + if len(data) >= bcheadsize:
> + v, recsstart, recslen = struct.unpack_from(bcheadfmt, data)
> + if v == bcversion and len(data) == recsstart + recslen:
> + if recsstart:
> + self._branches = \
> + data[bcheadsize:recsstart].split('\0')
> + self._branchrecs.fromstring(
> + buffer(data, recsstart, min(recslen, reporecslen)))
> + self.__dict__['_branchcachedirty'] = recslen > reporecslen
> + else:
> + self.ui.debug('branch cache file was invalid\n')
> +
> + if len(self._branchrecs) < reporecslen:
> + self._branchrecs.extend(
> + '\xff' * (reporecslen - len(self._branchrecs)))
> +
> + self._branchnamesindex = dict((b, r)
> + for r, b in enumerate(self._branches))
> +
> + def branch(self, rev):
> + """return branch name of rev, using and updating persistent cache."""
> + if self._branchcachedirty is None:
> + self._branchcacheload()
> +
> + node = self.changelog.node(rev)
> + cachenode, branchidx = struct.unpack_from(bcrecfmt, self._branchrecs,
> + rev * bcrecsize)
> + if cachenode == node and branchidx < len(self._branches):
> + return self._branches[branchidx]
> + b, _close = self.changelog.branchinfo(rev)
> + if b in self._branchnamesindex:
> + branchidx = self._branchnamesindex[b]
> + else:
> + branchidx = len(self._branches)
> + self._branches.append(b)
> + self._branchnamesindex[b] = branchidx
> + struct.pack_into(bcrecfmt, self._branchrecs, rev * bcrecsize,
> + node, branchidx)
> + self.__dict__['_branchcachedirty'] = True
> + return b
> +
> + def _branchcachesave(self):
> + """save branch cache if it is dirty"""
> + if self._branchcachedirty:
> + self.ui.debug('writing branch cache file\n')
> + try:
> + f = self.vfs.open(bcfilename, 'w', atomictemp=True)
> + s = '\0'.join(self._branches)
> + f.write(struct.pack(bcheadfmt, bcversion,
> + bcheadsize + len(s), len(self._branchrecs)))
> + f.write(s)
> + f.write(self._branchrecs)
> + f.close()
> + except IOError:
> + pass
> + self.__dict__['_branchcachedirty'] = False
> +
> def known(self, nodes):
> nm = self.changelog.nodemap
> pc = self._phasecache
> _______________________________________________
> Mercurial-devel mailing list
> Mercurial-devel at selenic.com
> http://selenic.com/mailman/listinfo/mercurial-devel
More information about the Mercurial-devel
mailing list