[PATCH 4 of 4] hgext: add an lz4revlog extension
Bryan O'Sullivan
bos at serpentine.com
Mon Jun 25 21:58:34 UTC 2012
# HG changeset patch
# User Bryan O'Sullivan <bryano at fb.com>
# Date 1340661485 25200
# Node ID dd1fd4ec9070bd601bb5201de4b8fdace0dbc903
# Parent e6054ad06d599fe5dca3172e9f9cd577208ff1b9
hgext: add an lz4revlog extension
This trades space (25-30% increase) for decompress performance (35
times faster). Since revlog decompression is a major bottleneck,
this improves the performance of many operations.
A couple of examples from a kernel tree: update time improves by
about 35%, as does annotate time.
lz4 makes a bigger difference for repos with larger quantities of
compressed data. Reconstructing a large manifest improves from 0.8
seconds with zlib to 0.1 with lz4, for instance.
diff --git a/hgext/lz4revlog.py b/hgext/lz4revlog.py
new file mode 100644
--- /dev/null
+++ b/hgext/lz4revlog.py
@@ -0,0 +1,81 @@
+# lz4revlog.py - lz4 delta compression for mercurial
+#
+# Copyright 2012 Facebook
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+
+'''store revlog deltas using lz4 compression
+
+This extension uses the lz4 compression algorithm to store deltas,
+rather than Mercurial's default of zlib compression. lz4 offers much
+faster decompression than zlib, at a cost of about 30% more disk
+space. The improvement in decompression speed leads to speedups in
+many common operations, such as update and history traversal.
+
+To use lz4 compression, a repository can be created from scratch or
+converted from an existing repository, for example using :hg:`clone
+--pull`.
+
+The behaviour of Mercurial in an existing zlib-compressed repository
+will not be affected by this extension.
+
+To avoid use of lz4 when cloning or creating a new repository, use
+:hg:`--config format.uselz4=no`.
+
+Interop with other Mercurial repositories is generally not affected by
+this extension.
+'''
+
+from mercurial import error, extensions, localrepo, revlog, util
+from mercurial.i18n import _
+import lz4
+
+try:
+ _compress = lz4.compressHC
+ _decompress = lz4.decompress
+ # don't crash horribly if invoked on an incompatible hg
+ usable = localrepo.localrepository.openerreqs
+except (AttributeError, ImportError):
+ def lz4missing(eek):
+ raise util.Abort(_('the lz4revlog extension requires lz4 support'))
+ _compress = _decompress = lz4missing
+ usable = False
+
+def decompress(orig, bin):
+ if not bin:
+ return bin
+ t = bin[0]
+ if t == '4':
+ return _decompress(bin[1:])
+ return orig(bin)
+
+if usable:
+ @extensions.replaceclass(localrepo, 'localrepository')
+ class lz4repo(localrepo.localrepository):
+ def _baserequirements(self, create):
+ reqs = super(lz4repo, self)._baserequirements(create)
+ if create and self.ui.configbool('format', 'uselz4', True):
+ reqs.append('lz4revlog')
+ return reqs
+
+ @extensions.replaceclass(revlog, 'revlog')
+ class lz4revlog(revlog.revlog):
+ def __init__(self, opener, indexfile):
+ super(lz4revlog, self).__init__(opener, indexfile)
+ opts = getattr(opener, 'options', None)
+ self._lz4 = opts and 'lz4revlog' in opts
+
+ def compress(self, text):
+ if self._lz4:
+ l = len(text)
+ c = _compress(text)
+ if len(text) <= len(c):
+ return ('u', text)
+ return ('', '4' + c)
+ return super(lz4revlog, self).compress(text)
+
+ extensions.wrapfunction(revlog, 'decompress', decompress)
+ cls = localrepo.localrepository
+ for reqs in 'supportedformats supported openerreqs'.split():
+ getattr(cls, reqs).add('lz4revlog')
diff --git a/tests/hghave.py b/tests/hghave.py
--- a/tests/hghave.py
+++ b/tests/hghave.py
@@ -267,6 +267,13 @@ def has_tic():
def has_msys():
return os.getenv('MSYSTEM')
+def has_lz4():
+ try:
+ import lz4
+ return lz4.compressHC
+ except (ImportError, AttributeError):
+ return False
+
checks = {
"true": (lambda: True, "yak shaving"),
"false": (lambda: False, "nail clipper"),
@@ -287,6 +294,7 @@ checks = {
"icasefs": (has_icasefs, "case insensitive file system"),
"inotify": (has_inotify, "inotify extension support"),
"lsprof": (has_lsprof, "python lsprof module"),
+ "lz4": (has_lz4, "python lz4 module"),
"mtn": (has_mtn, "monotone client (>= 1.0)"),
"outer-repo": (has_outer_repo, "outer repo"),
"p4": (has_p4, "Perforce server and client"),
diff --git a/tests/test-lz4revlog.t b/tests/test-lz4revlog.t
new file mode 100644
--- /dev/null
+++ b/tests/test-lz4revlog.t
@@ -0,0 +1,125 @@
+ $ "$TESTDIR/hghave" lz4 || exit 80
+
+ $ hg init a
+ $ cd a
+ $ echo a>a
+ $ hg ci -q -A -m 0
+
+ $ echo "[extensions]" >> $HGRCPATH
+ $ echo "lz4revlog=" >> $HGRCPATH
+
+having lz4revlog enabled should not affect an existing repo
+
+ $ for i in 0 1 2 3 4 5 6 7 8 9; do
+ > echo qqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqquuuuuuuuuuuuuuuuuuuuqqqq$i >> a
+ $ done
+ $ hg ci -q -m 1
+ $ hg verify -q
+
+ $ cd ..
+
+regular clone of an existing zlib repo should still use zlib
+
+ $ hg clone a b
+ updating to branch default
+ 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
+ $ sort b/.hg/requires
+ dotencode
+ fncache
+ revlogv1
+ store
+
+pulled clone of zlib should use lz4
+
+ $ hg clone -q --pull a alz4
+ $ sort alz4/.hg/requires
+ dotencode
+ fncache
+ lz4revlog
+ revlogv1
+ store
+
+disable lz4, then clone
+
+ $ hg --config format.uselz4=False clone --pull a w
+ requesting all changes
+ adding changesets
+ adding manifests
+ adding file changes
+ added 2 changesets with 2 changes to 1 files
+ updating to branch default
+ 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
+ $ sort w/.hg/requires
+ dotencode
+ fncache
+ revlogv1
+ store
+
+attempt to disable lz4 should be ignored for hardlinked clone
+
+ $ hg --config format.uselz4=False clone alz4 azlib
+ updating to branch default
+ 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
+ $ sort azlib/.hg/requires
+ dotencode
+ fncache
+ lz4revlog
+ revlogv1
+ store
+
+a new repo should use lz4 by default
+
+ $ hg init lz
+ $ cd lz
+ $ echo a>a
+ $ hg ci -q -A -m lz0
+ $ for i in 0 1 2 3 4 5 6 7 8 9; do
+ > echo qqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqquuuuuuuuuuuuuuuuuuuu$i >> a
+ $ done
+ $ hg ci -q -m lz1
+ $ hg verify -q
+ $ hg tip
+ changeset: 1:186d32280905
+ tag: tip
+ user: test
+ date: Thu Jan 01 00:00:00 1970 +0000
+ summary: lz1
+
+ $ sort .hg/requires
+ dotencode
+ fncache
+ lz4revlog
+ revlogv1
+ store
+
+vanilla hg should bail in an lz4 repo
+
+ $ hg --config 'extensions.lz4revlog=!' tip
+ abort: unknown repository format: requires features 'lz4revlog' (upgrade Mercurial)!
+ [255]
+
+start a server
+
+ $ "$TESTDIR/hghave" serve || exit 80
+
+ $ hg --config server.uncompressed=True serve -p $HGPORT -d --pid-file=../hg1.pid -E ../error.log
+ $ cat ../hg1.pid >> $DAEMON_PIDS
+
+uncompressed clone from lz4 to lz4 should be fine
+
+ $ cd ..
+ $ hg clone --uncompressed http://localhost:$HGPORT/ happy
+ streaming all changes
+ 3 files to transfer, 665 bytes of data
+ transferred 665 bytes in * seconds (*/sec) (glob)
+ updating to branch default
+ 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
+
+uncompressed clone from lz4 to non-lz4 should fall back to pull
+
+ $ hg --config 'extensions.lz4revlog=!' clone -U --uncompressed http://localhost:$HGPORT/ nonesuch
+ requesting all changes
+ adding changesets
+ adding manifests
+ adding file changes
+ added 2 changesets with 2 changes to 1 files
More information about the Mercurial-devel
mailing list