[PATCH RFC] revlog: add support for lz4 compression
Bryan O'Sullivan
bos at serpentine.com
Wed Jun 6 20:05:27 UTC 2012
# HG changeset patch
# User Bryan O'Sullivan <bryano at fb.com>
# Date 1339007455 25200
# Node ID f8653a28ba3705e467b9c697328e4231c8dc838e
# Parent d566aa319d5f7c58c69b985b53ff7498f08e53c6
revlog: add support for lz4 compression
diff --git a/mercurial/help/config.txt b/mercurial/help/config.txt
--- a/mercurial/help/config.txt
+++ b/mercurial/help/config.txt
@@ -517,6 +517,15 @@ Example for ``~/.hgrc``::
option ensures that the on-disk format of newly created
repositories will be compatible with Mercurial before version 1.1.
+``uselz4``
+ Enable or disable the "lz4" repository format, which uses lz4
+ instead of zlib when compressing deltas. This maintains similar
+ write performance and space usage as zlib, but offers
+ significantly better read performance. Disabled by
+ default. Disabling this option ensures that the on-disk format of
+ newly created repositories will be compatible with Mercurial
+ before version 2.3.
+
``dotencode``
Enable or disable the "dotencode" repository format which enhances
the "fncache" repository format (which has to be enabled to use
diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py
--- a/mercurial/localrepo.py
+++ b/mercurial/localrepo.py
@@ -10,7 +10,7 @@ from i18n import _
import repo, changegroup, subrepo, discovery, pushkey
import changelog, dirstate, filelog, manifest, context, bookmarks, phases
import lock, transaction, store, encoding
-import scmutil, util, extensions, hook, error, revset
+import scmutil, util, extensions, hook, error, revset, revlog
import match as matchmod
import merge as mergemod
import tags as tagsmod
@@ -27,7 +27,7 @@ class storecache(filecache):
class localrepository(repo.repository):
capabilities = set(('lookup', 'changegroupsubset', 'branchmap', 'pushkey',
'known', 'getbundle'))
- supportedformats = set(('revlogv1', 'generaldelta'))
+ supportedformats = set(('revlogv1', 'generaldelta', 'lz4'))
supported = supportedformats | set(('store', 'fncache', 'shared',
'dotencode'))
@@ -58,6 +58,9 @@ class localrepository(repo.repository):
util.makedirs(path)
util.makedir(self.path, notindexed=True)
requirements = ["revlogv1"]
+ havelz4 = not revlog.lz4missing
+ if havelz4 and self.ui.configbool('format', 'uselz4'):
+ requirements.append('lz4')
if self.ui.configbool('format', 'usestore', True):
os.mkdir(os.path.join(self.path, "store"))
requirements.append("store")
@@ -121,7 +124,7 @@ class localrepository(repo.repository):
def _applyrequirements(self, requirements):
self.requirements = requirements
- openerreqs = set(('revlogv1', 'generaldelta'))
+ openerreqs = set(('revlogv1', 'generaldelta', 'lz4'))
self.sopener.options = dict((r, 1) for r in requirements
if r in openerreqs)
diff --git a/mercurial/revlog.py b/mercurial/revlog.py
--- a/mercurial/revlog.py
+++ b/mercurial/revlog.py
@@ -15,23 +15,33 @@ and O(changes) merge between branches.
from node import bin, hex, nullid, nullrev
from i18n import _
import ancestor, mdiff, parsers, error, util, dagutil
-import struct, zlib, errno
+import struct, zlib, lz4, errno
_pack = struct.pack
_unpack = struct.unpack
-_compress = zlib.compress
-_decompress = zlib.decompress
+_zlibcompress = zlib.compress
+_zlibdecompress = zlib.decompress
_sha = util.sha1
+try:
+ _lz4compress = lz4.compressHC
+ _lz4decompress = lz4.decompress
+ lz4missing = False
+except ImportError:
+ def lz4missing(eek):
+ raise error.RevlogError(_('this repo requires lz4 support'))
+ _lz4compress = _lz4decompress = lz4missing
+
# revlog header flags
REVLOGV0 = 0
REVLOGNG = 1
REVLOGNGINLINEDATA = (1 << 16)
REVLOGGENERALDELTA = (1 << 17)
+REVLOGLZ4 = (1 << 18)
REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
REVLOG_DEFAULT_FORMAT = REVLOGNG
REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
-REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
+REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA | REVLOGLZ4
# revlog index flags
REVIDX_KNOWN_FLAGS = 0
@@ -75,7 +85,7 @@ def hash(text, p1, p2):
s.update(text)
return s.digest()
-def compress(text):
+def compress(text, uselz4):
""" generate a possibly-compressed representation of text """
if not text:
return ("", text)
@@ -83,6 +93,8 @@ def compress(text):
bin = None
if l < 44:
pass
+ elif uselz4:
+ bin = '4' + _lz4compress(text)
elif l > 1000000:
# zlib makes an internal copy, thus doubling memory usage for
# large files, so lets do this in pieces
@@ -97,7 +109,7 @@ def compress(text):
if sum(map(len, p)) < l:
bin = "".join(p)
else:
- bin = _compress(text)
+ bin = _zlibcompress(text)
if bin is None or len(bin) > l:
if text[0] == '\0':
return ("", text)
@@ -112,7 +124,9 @@ def decompress(bin):
if t == '\0':
return bin
if t == 'x':
- return _decompress(bin)
+ return _zlibdecompress(bin)
+ if t == '4':
+ return _lz4decompress(bin[1:])
if t == 'u':
return bin[1:]
raise RevlogError(_("unknown compression type %r") % t)
@@ -231,6 +245,8 @@ class revlog(object):
if 'revlogv1' in opts:
if 'generaldelta' in opts:
v |= REVLOGGENERALDELTA
+ if 'lz4' in opts:
+ v |= REVLOGLZ4
else:
v = 0
@@ -250,6 +266,7 @@ class revlog(object):
self.version = v
self._inline = v & REVLOGNGINLINEDATA
self._generaldelta = v & REVLOGGENERALDELTA
+ self._lz4 = v & REVLOGLZ4
flags = v & ~0xFFFF
fmt = v & 0xFFFF
if fmt == REVLOGV0 and flags:
@@ -1052,7 +1069,7 @@ class revlog(object):
t = buildtext()
ptext = self.revision(self.node(rev))
delta = mdiff.textdiff(ptext, t)
- data = compress(delta)
+ data = compress(delta, self._lz4)
l = len(data[1]) + len(data[0])
if basecache[0] == rev:
chainbase = basecache[1]
@@ -1096,7 +1113,7 @@ class revlog(object):
textlen = len(text)
if d is None or dist > textlen * 2:
text = buildtext()
- data = compress(text)
+ data = compress(text, self._lz4)
l = len(data[1]) + len(data[0])
base = chainbase = curr
More information about the Mercurial-devel
mailing list