[PATCH] Add script to rewrite manifest to workaround lack of parent deltas
Greg Ward
greg-hg at gerg.ca
Wed Aug 19 21:36:37 UTC 2009
# HG changeset patch
# User Greg Ward <greg-hg at gerg.ca>
# Date 1233047576 0
# Node ID d7ff31c478891a8ef2273a0d0997c99a2b05092b
# Parent c5173a05aec8ee8e74a5bfc101e0ed7ed9f24625
Add script to rewrite manifest to workaround lack of parent deltas.
Based on a patch to rewrite-log by Benoit Boissinot that I found here:
http://article.gmane.org/gmane.comp.version-control.mercurial.general/11908
Probably not ready to push yet: I'm just sending this now for initial
feedback: is this worth having, is the name appropriate, did I get
transactions/locking right, etc.?
diff --git a/contrib/shrink-manifest.py b/contrib/shrink-manifest.py
new file mode 100755
--- /dev/null
+++ b/contrib/shrink-manifest.py
@@ -0,0 +1,141 @@
+#!/usr/bin/env python
+
+"""\
+Reorder the manifest file in the current repository to save space.
+Specifically, this topologically sorts the revisions in the manifest so that
+revisions on the same branch are adjacent as much as possible. This is a
+workaround for the fact that Mercurial computes deltas relative to the previous
+revision rather than relative to a parent revision.
+"""
+
+# Originally written by Benoit Boissinot <benoit.boissinot at ens-lyon.org> as a
+# patch to rewrite-log. Cleaned up, refactored, documented, and renamed by Greg
+# Ward <greg at gerg.ca>.
+
+# XXX would be nice to have a way to verify the repository after shrinking,
+# e.g. by comparing "before" and "after" states of random changesets (maybe:
+# export before, shrink, export after, diff).
+
+import sys, os, tempfile
+from mercurial import ui as ui_, hg, revlog, transaction, node, util
+
+def good_sort(rl):
+ write = sys.stdout.write
+
+ children = {}
+ root = []
+ # build children and roots
+ write('reading %d revs ' % len(rl))
+ #for i in revs:
+ i = 0
+ while i < len(rl):
+ children[i] = []
+ parents = [p for p in rl.parentrevs(i) if p != -1]
+ for p in parents:
+ assert p in children
+ if len(parents) == 0:
+ root.append(i)
+ else:
+ for p in parents:
+ children[p].append(i)
+
+ if i % 1000 == 0:
+ write('.')
+ i += 1
+ write('\n')
+
+ #print children, visit
+ write('sorting ...')
+ visit = root
+ ret = []
+ while visit:
+ i = visit.pop(0)
+ ret.append(i)
+ if i not in children:
+ # this only happens if some node's p1 == p2, which can happen in the
+ # manifest in certain circumstances
+ break
+ next = []
+ for c in children.pop(i):
+ parents_with_child = [p for p in rl.parentrevs(c) if p != -1 and p in children]
+ if len(parents_with_child) == 0:
+ next.append(c)
+ visit = next + visit
+ write('\n')
+ return ret
+
+def write_revs(r1, r2, order, tr):
+ write = sys.stdout.write
+ write('writing %d revs ' % len(order))
+ count = 0
+ for rev in order:
+ n = r1.node(rev)
+ p1, p2 = r1.parents(n)
+ l = r1.linkrev(rev)
+ t = r1.revision(n)
+ n2 = r2.addrevision(t, tr, l, p1, p2)
+
+ if count % 1000 == 0:
+ write('.')
+ count += 1
+ write('\n')
+
+def report_shrinkage(olddatafn, newdatafn):
+ oldsize = float(os.stat(olddatafn).st_size)
+ newsize = float(os.stat(newdatafn).st_size)
+ sys.stdout.write('old file size: %12d bytes (%6.1f MiB)\n'
+ % (oldsize, oldsize/1024/1024))
+ sys.stdout.write('new file size: %12d bytes (%6.1f MiB)\n'
+ % (newsize, newsize/1024/1024))
+
+ shrink_percent = (oldsize - newsize) / oldsize * 100
+ shrink_factor = oldsize / newsize
+ sys.stdout.write('shrinkage: %.1f%% (%.1fx)\n'
+ % (shrink_percent, shrink_factor))
+
+def main():
+
+ # unbuffer stdout for nice progress output
+ sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0)
+ write = sys.stdout.write
+
+ # Open the local repository
+ ui = ui_.ui()
+ repo = hg.repository(ui)
+
+ indexfn = repo.join('store/00manifest.i')
+ datafn = indexfn[:-2] + '.d'
+ (tmpfd, tmpindexfn) = tempfile.mkstemp(
+ dir=repo.join('store'), prefix='00manifest.', suffix='.i')
+ tmpdatafn = tmpindexfn[:-2] + '.d'
+ os.close(tmpfd)
+
+ r1 = revlog.revlog(util.opener(os.getcwd(), audit=False), indexfn)
+ r2 = revlog.revlog(util.opener(os.getcwd(), audit=False), tmpindexfn)
+
+ # XXX shouldn't the journal be in .hg/store?
+ # XXX shouldn't we lock the store?
+ tr = transaction.transaction(sys.stderr.write, open, "journal")
+
+ try:
+ order = good_sort(r1)
+ write_revs(r1, r2, order, tr)
+ report_shrinkage(datafn, tmpdatafn)
+ tr.close()
+ except:
+ # abort transaction first, so we truncate the files before deleting them
+ tr.abort()
+ if os.path.exists(tmpindexfn):
+ os.unlink(tmpindexfn)
+ if os.path.exists(tmpdatafn):
+ os.unlink(tmpdatafn)
+ raise
+
+ # XXX this will crash if there is no .d file ... but if that's the case,
+ # this manifest is not big enough to be worth shrinking!
+ os.rename(indexfn, indexfn + '.old')
+ os.rename(datafn, datafn + '.old')
+ os.rename(tmpindexfn, indexfn)
+ os.rename(tmpdatafn, datafn)
+
+main()
More information about the Mercurial-devel
mailing list