rfc: efficiently read part of the manifest
Daniel Holth
dholth at fastmail.fm
Thu Apr 26 04:44:51 UTC 2007
fulltext 0.0326070785522
Small manifest: 0.01393699646
Big manifest: 0.0789821147919
Small has 770
Big has 10613
b. Iterate with startswith("desired-path/") through the big manifest to
get the same result
fulltext 0.0342130661011
Small manifest: 0.0146701335907
Big manifest: 0.0930209159851
Small has 770
Big has 10613
This code is a stab at parsing and returning only part of the manifest.
Often, e.g. in TracMercurial, we are browsing a particular directory. We
do not want to iterate through the entire manifest with
filename.startswith(current_directory); instead, we would prefer to get
the contents of the current directory in O(log n) time.
It saves a little bit of time?
On the IRC channel it was suggested that this partial manifest feature
might be useful in lots of places, but I'm not sure what those places are=
=2E
-- Daniel Holth
--------------020000050000000909070501
Content-Type: text/x-patch;
name="partial-manifest.patch"
Content-Transfer-Encoding: quoted-printable
Content-Disposition: inline;
filename="partial-manifest.patch"
# HG changeset patch
# User Daniel Holth <dholth at fastmail.fm>
# Date 1177561013 14400
# Node ID dadb2c3e81c77164fe814c531e3df68a9a52f8cf
# Parent 10edaed7f909c9b7ba1c5844e93ebf217746c8c5
efficient partial manifest functions
diff -r 10edaed7f909 -r dadb2c3e81c7 manifestmonkey
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/manifestmonkey Thu Apr 26 00:16:53 2007 -0400
@@ -0,0 +1,33 @@
+#!/usr/bin/env python
+# Parse manifests
+
+import pprint
+import time
+
+import mercurial.localrepo
+lr =3D mercurial.localrepo.localrepository(None)
+
+node =3D lr.manifest.lookup(lr.changelog.count() - 3)
+
+begin =3D time.time()
+fulltext =3D lr.manifest.revision(node)
+end =3D time.time()
+print "fulltext", end - begin
+
+# 58 out of 516 entries
+begin =3D time.time()
+text =3D lr.manifest.prefixes(node, "mercurial/")
+small_manifest =3D lr.manifest.read_text(text)
+end =3D time.time()
+print "Small manifest:", end - begin
+
+begin =3D time.time()
+big_manifest =3D lr.manifest.read(node)
+end =3D time.time()
+print "Big manifest:", end - begin
+
+print "Small has", len(small_manifest[1])
+# pprint.pprint(small_manifest)
+print "Big has", len(big_manifest)
+
+# pprint.pprint(small_manifest)
diff -r 10edaed7f909 -r dadb2c3e81c7 mercurial/manifest.py
--- a/mercurial/manifest.py Mon Apr 16 12:37:30 2007 -0500
+++ b/mercurial/manifest.py Thu Apr 26 00:16:53 2007 -0400
@@ -62,6 +62,14 @@ class manifest(revlog):
mapping.rawset(f, n)
self.mapcache =3D (node, mapping)
return mapping
+
+ def read_text(self, text):
+ mapping =3D manifestdict()
+ other =3D []
+ for f, n in self.parselines(text):
+ mapping.rawset(f, n)
+ other.append((f, n))
+ return mapping, other
=20
def _search(self, m, s, lo=3D0, hi=3DNone):
'''return a tuple (start, end) that says where to find s within =
m.
@@ -116,6 +124,17 @@ class manifest(revlog):
f, n =3D l.split('\0')
return bin(n[:40]), n[40:-1]
=20
+ def prefixes(self, node, prefix):
+ '''look up everything in the manifest starting with a particular=
+ prefix efficiently.'''
+ text =3D self.revision(node)
+ try:
+ start =3D self._search(text, prefix)[0]
+ end =3D self._search(text, prefix[:-1] + chr(ord(prefix[-1])=
+1))[1]
+ except IndexError, e:
+ return text
+ return text[start:end]
+
def add(self, map, transaction, link, p1=3DNone, p2=3DNone,
changed=3DNone):
# apply the changes collected during the bisect loop to our addl=
ist
--------------020000050000000909070501--
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 252 bytes
Desc: OpenPGP digital signature
URL: <http://lists.mercurial-scm.org/pipermail/mercurial-devel/attachments/20070426/20813dde/attachment.asc>
More information about the Mercurial-devel
mailing list