[PATCH RFC] revlog: add support for lz4 compression

Bryan O'Sullivan bos at serpentine.com
Wed Jun 6 20:05:27 UTC 2012


# HG changeset patch
# User Bryan O'Sullivan <bryano at fb.com>
# Date 1339007455 25200
# Node ID f8653a28ba3705e467b9c697328e4231c8dc838e
# Parent  d566aa319d5f7c58c69b985b53ff7498f08e53c6
revlog: add support for lz4 compression

diff --git a/mercurial/help/config.txt b/mercurial/help/config.txt
--- a/mercurial/help/config.txt
+++ b/mercurial/help/config.txt
@@ -517,6 +517,15 @@ Example for ``~/.hgrc``::
     option ensures that the on-disk format of newly created
     repositories will be compatible with Mercurial before version 1.1.
 
+``uselz4``
+    Enable or disable the "lz4" repository format, which uses lz4
+    instead of zlib when compressing deltas. This maintains similar
+    write performance and space usage as zlib, but offers
+    significantly better read performance. Disabled by
+    default. Disabling this option ensures that the on-disk format of
+    newly created repositories will be compatible with Mercurial
+    before version 2.3.
+
 ``dotencode``
     Enable or disable the "dotencode" repository format which enhances
     the "fncache" repository format (which has to be enabled to use
diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py
--- a/mercurial/localrepo.py
+++ b/mercurial/localrepo.py
@@ -10,7 +10,7 @@ from i18n import _
 import repo, changegroup, subrepo, discovery, pushkey
 import changelog, dirstate, filelog, manifest, context, bookmarks, phases
 import lock, transaction, store, encoding
-import scmutil, util, extensions, hook, error, revset
+import scmutil, util, extensions, hook, error, revset, revlog
 import match as matchmod
 import merge as mergemod
 import tags as tagsmod
@@ -27,7 +27,7 @@ class storecache(filecache):
 class localrepository(repo.repository):
     capabilities = set(('lookup', 'changegroupsubset', 'branchmap', 'pushkey',
                         'known', 'getbundle'))
-    supportedformats = set(('revlogv1', 'generaldelta'))
+    supportedformats = set(('revlogv1', 'generaldelta', 'lz4'))
     supported = supportedformats | set(('store', 'fncache', 'shared',
                                         'dotencode'))
 
@@ -58,6 +58,9 @@ class localrepository(repo.repository):
                     util.makedirs(path)
                 util.makedir(self.path, notindexed=True)
                 requirements = ["revlogv1"]
+                havelz4 = not revlog.lz4missing
+                if havelz4 and self.ui.configbool('format', 'uselz4'):
+                    requirements.append('lz4')
                 if self.ui.configbool('format', 'usestore', True):
                     os.mkdir(os.path.join(self.path, "store"))
                     requirements.append("store")
@@ -121,7 +124,7 @@ class localrepository(repo.repository):
 
     def _applyrequirements(self, requirements):
         self.requirements = requirements
-        openerreqs = set(('revlogv1', 'generaldelta'))
+        openerreqs = set(('revlogv1', 'generaldelta', 'lz4'))
         self.sopener.options = dict((r, 1) for r in requirements
                                            if r in openerreqs)
 
diff --git a/mercurial/revlog.py b/mercurial/revlog.py
--- a/mercurial/revlog.py
+++ b/mercurial/revlog.py
@@ -15,23 +15,33 @@ and O(changes) merge between branches.
 from node import bin, hex, nullid, nullrev
 from i18n import _
 import ancestor, mdiff, parsers, error, util, dagutil
-import struct, zlib, errno
+import struct, zlib, lz4, errno
 
 _pack = struct.pack
 _unpack = struct.unpack
-_compress = zlib.compress
-_decompress = zlib.decompress
+_zlibcompress = zlib.compress
+_zlibdecompress = zlib.decompress
 _sha = util.sha1
 
+try:
+    _lz4compress = lz4.compressHC
+    _lz4decompress = lz4.decompress
+    lz4missing = False
+except ImportError:
+    def lz4missing(eek):
+        raise error.RevlogError(_('this repo requires lz4 support'))
+    _lz4compress = _lz4decompress = lz4missing
+
 # revlog header flags
 REVLOGV0 = 0
 REVLOGNG = 1
 REVLOGNGINLINEDATA = (1 << 16)
 REVLOGGENERALDELTA = (1 << 17)
+REVLOGLZ4 = (1 << 18)
 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
 REVLOG_DEFAULT_FORMAT = REVLOGNG
 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
-REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
+REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA | REVLOGLZ4
 
 # revlog index flags
 REVIDX_KNOWN_FLAGS = 0
@@ -75,7 +85,7 @@ def hash(text, p1, p2):
     s.update(text)
     return s.digest()
 
-def compress(text):
+def compress(text, uselz4):
     """ generate a possibly-compressed representation of text """
     if not text:
         return ("", text)
@@ -83,6 +93,8 @@ def compress(text):
     bin = None
     if l < 44:
         pass
+    elif uselz4:
+        bin = '4' + _lz4compress(text)
     elif l > 1000000:
         # zlib makes an internal copy, thus doubling memory usage for
         # large files, so lets do this in pieces
@@ -97,7 +109,7 @@ def compress(text):
         if sum(map(len, p)) < l:
             bin = "".join(p)
     else:
-        bin = _compress(text)
+        bin = _zlibcompress(text)
     if bin is None or len(bin) > l:
         if text[0] == '\0':
             return ("", text)
@@ -112,7 +124,9 @@ def decompress(bin):
     if t == '\0':
         return bin
     if t == 'x':
-        return _decompress(bin)
+        return _zlibdecompress(bin)
+    if t == '4':
+        return _lz4decompress(bin[1:])
     if t == 'u':
         return bin[1:]
     raise RevlogError(_("unknown compression type %r") % t)
@@ -231,6 +245,8 @@ class revlog(object):
             if 'revlogv1' in opts:
                 if 'generaldelta' in opts:
                     v |= REVLOGGENERALDELTA
+                if 'lz4' in opts:
+                    v |= REVLOGLZ4
             else:
                 v = 0
 
@@ -250,6 +266,7 @@ class revlog(object):
         self.version = v
         self._inline = v & REVLOGNGINLINEDATA
         self._generaldelta = v & REVLOGGENERALDELTA
+        self._lz4 = v & REVLOGLZ4
         flags = v & ~0xFFFF
         fmt = v & 0xFFFF
         if fmt == REVLOGV0 and flags:
@@ -1052,7 +1069,7 @@ class revlog(object):
                 t = buildtext()
                 ptext = self.revision(self.node(rev))
                 delta = mdiff.textdiff(ptext, t)
-            data = compress(delta)
+            data = compress(delta, self._lz4)
             l = len(data[1]) + len(data[0])
             if basecache[0] == rev:
                 chainbase = basecache[1]
@@ -1096,7 +1113,7 @@ class revlog(object):
             textlen = len(text)
         if d is None or dist > textlen * 2:
             text = buildtext()
-            data = compress(text)
+            data = compress(text, self._lz4)
             l = len(data[1]) + len(data[0])
             base = chainbase = curr
 



More information about the Mercurial-devel mailing list