[Updated] D10646: revlog: add a "data compression mode" entry in the index tuple

marmoute (Pierre-Yves David) phabricator at mercurial-scm.org
Mon May 17 21:56:49 UTC 2021


Closed by commit rHG130c9f7ed914: revlog: add a "data compression mode" entry in the index tuple (authored by marmoute).
This revision was automatically updated to reflect the committed changes.

CHANGED PRIOR TO COMMIT
  https://phab.mercurial-scm.org/D10646?vs=27885&id=28039#toc

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D10646?vs=27885&id=28039

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D10646/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D10646

AFFECTED FILES
  mercurial/bundlerepo.py
  mercurial/cext/parsers.c
  mercurial/cext/revlog.c
  mercurial/policy.py
  mercurial/pure/parsers.py
  mercurial/revlog.py
  mercurial/revlogutils/constants.py
  mercurial/revlogutils/revlogv0.py
  mercurial/unionrepo.py
  tests/test-parseindex2.py

CHANGE DETAILS

diff --git a/tests/test-parseindex2.py b/tests/test-parseindex2.py
--- a/tests/test-parseindex2.py
+++ b/tests/test-parseindex2.py
@@ -21,6 +21,9 @@
     policy,
     pycompat,
 )
+from mercurial.revlogutils import (
+    constants,
+)
 
 parsers = policy.importmod('parsers')
 
@@ -49,7 +52,7 @@
         cache = (0, data)
         while off <= l:
             e = struct.unpack(indexformatng, data[off : off + s])
-            e = e + (0, 0)
+            e = e + (0, 0, constants.COMP_MODE_INLINE)
             nodemap[e[7]] = n
             append(e)
             n += 1
@@ -59,7 +62,7 @@
     else:
         while off <= l:
             e = struct.unpack(indexformatng, data[off : off + s])
-            e = e + (0, 0)
+            e = e + (0, 0, constants.COMP_MODE_INLINE)
             nodemap[e[7]] = n
             append(e)
             n += 1
@@ -242,7 +245,19 @@
                 break
 
     def testminusone(self):
-        want = (0, 0, 0, -1, -1, -1, -1, sha1nodeconstants.nullid, 0, 0)
+        want = (
+            0,
+            0,
+            0,
+            -1,
+            -1,
+            -1,
+            -1,
+            sha1nodeconstants.nullid,
+            0,
+            0,
+            constants.COMP_MODE_INLINE,
+        )
         index, junk = parsers.parse_index2(data_inlined, True)
         got = index[-1]
         self.assertEqual(want, got)  # inline data
@@ -264,7 +279,20 @@
             # node won't matter for this test, let's just make sure
             # they don't collide. Other data don't matter either.
             node = hexrev(p1) + hexrev(p2) + b'.' * 12
-            index.append((0, 0, 12, 1, 34, p1, p2, node, 0, 0))
+            e = (
+                0,
+                0,
+                12,
+                1,
+                34,
+                p1,
+                p2,
+                node,
+                0,
+                0,
+                constants.COMP_MODE_INLINE,
+            )
+            index.append(e)
 
         appendrev(4)
         appendrev(5)
diff --git a/mercurial/unionrepo.py b/mercurial/unionrepo.py
--- a/mercurial/unionrepo.py
+++ b/mercurial/unionrepo.py
@@ -31,6 +31,10 @@
     vfs as vfsmod,
 )
 
+from .revlogutils import (
+    constants as revlog_constants,
+)
+
 
 class unionrevlog(revlog.revlog):
     def __init__(self, opener, radix, revlog2, linkmapper):
@@ -65,6 +69,7 @@
                 node,
                 _sdo,
                 _sds,
+                _dcm,
             ) = rev
             flags = _start & 0xFFFF
 
@@ -99,6 +104,7 @@
                 node,
                 0,  # sidedata offset
                 0,  # sidedata size
+                revlog_constants.COMP_MODE_INLINE,
             )
             self.index.append(e)
             self.bundlerevs.add(n)
diff --git a/mercurial/revlogutils/revlogv0.py b/mercurial/revlogutils/revlogv0.py
--- a/mercurial/revlogutils/revlogv0.py
+++ b/mercurial/revlogutils/revlogv0.py
@@ -9,6 +9,7 @@
 
 from ..node import sha1nodeconstants
 from .constants import (
+    COMP_MODE_INLINE,
     INDEX_ENTRY_V0,
 )
 from ..i18n import _
@@ -42,7 +43,19 @@
 
 class revlogoldindex(list):
     entry_size = INDEX_ENTRY_V0.size
-    null_item = (0, 0, 0, -1, -1, -1, -1, sha1nodeconstants.nullid, 0, 0)
+    null_item = (
+        0,
+        0,
+        0,
+        -1,
+        -1,
+        -1,
+        -1,
+        sha1nodeconstants.nullid,
+        0,
+        0,
+        COMP_MODE_INLINE,
+    )
 
     @property
     def nodemap(self):
@@ -138,6 +151,7 @@
             e[6],
             0,  # no side data support
             0,  # no side data support
+            COMP_MODE_INLINE,
         )
         index.append(e2)
         nodemap[e[6]] = n
diff --git a/mercurial/revlogutils/constants.py b/mercurial/revlogutils/constants.py
--- a/mercurial/revlogutils/constants.py
+++ b/mercurial/revlogutils/constants.py
@@ -1,4 +1,4 @@
-# revlogdeltas.py - constant used for revlog logic
+# revlogdeltas.py - constant used for revlog logic.
 #
 # Copyright 2005-2007 Olivia Mackall <olivia at selenic.com>
 # Copyright 2018 Octobus <contact at octobus.net>
@@ -114,6 +114,14 @@
 # bitmark for flags that could cause rawdata content change
 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
 
+## chunk compression mode constants:
+# These constants are used in revlog version >=2 to denote the compression used
+# for a chunk.
+
+# Chunk use a compression mode stored "inline" at the start of the chunk
+# itself.  This is the mode always used for revlog version "0" and "1"
+COMP_MODE_INLINE = 2
+
 SUPPORTED_FLAGS = {
     REVLOGV0: REVLOGV0_FLAGS,
     REVLOGV1: REVLOGV1_FLAGS,
@@ -152,4 +160,5 @@
     },
 }
 
+
 SPARSE_REVLOG_MAX_CHAIN_LENGTH = 1000
diff --git a/mercurial/revlog.py b/mercurial/revlog.py
--- a/mercurial/revlog.py
+++ b/mercurial/revlog.py
@@ -35,6 +35,7 @@
 from .pycompat import getattr
 from .revlogutils.constants import (
     ALL_KINDS,
+    COMP_MODE_INLINE,
     FEATURES_BY_VERSION,
     FLAG_GENERALDELTA,
     FLAG_INLINE_DATA,
@@ -336,6 +337,12 @@
 
     [9] sidedata chunk length:
             The size, in bytes, of the revision's side-data chunk.
+
+    [10] data compression mode:
+            two bits that detail the way the data chunk is compressed on disk.
+            (see "COMP_MODE_*" constants for details). For revlog version 0 and
+            1 this will always be COMP_MODE_INLINE.
+
     """
 
     _flagserrorclass = error.RevlogError
@@ -2474,6 +2481,7 @@
             node,
             sidedata_offset,
             len(serialized_sidedata),
+            COMP_MODE_INLINE,
         )
 
         self.index.append(e)
diff --git a/mercurial/pure/parsers.py b/mercurial/pure/parsers.py
--- a/mercurial/pure/parsers.py
+++ b/mercurial/pure/parsers.py
@@ -54,7 +54,19 @@
     # Size of a C long int, platform independent
     int_size = struct.calcsize(b'>i')
     # An empty index entry, used as a default value to be overridden, or nullrev
-    null_item = (0, 0, 0, -1, -1, -1, -1, sha1nodeconstants.nullid, 0, 0)
+    null_item = (
+        0,
+        0,
+        0,
+        -1,
+        -1,
+        -1,
+        -1,
+        sha1nodeconstants.nullid,
+        0,
+        0,
+        revlog_constants.COMP_MODE_INLINE,
+    )
 
     @util.propertycache
     def entry_size(self):
@@ -135,7 +147,7 @@
 
     def _unpack_entry(self, data):
         r = self.index_format.unpack(data)
-        r = r + (0, 0)
+        r = r + (0, 0, revlog_constants.COMP_MODE_INLINE)
         return r
 
     def pack_header(self, header):
@@ -303,16 +315,17 @@
             self._extra[rev - self._lgt] = new
 
     def _unpack_entry(self, data):
-        return self.index_format.unpack(data)
+        return self.index_format.unpack(data) + (
+            revlog_constants.COMP_MODE_INLINE,
+        )
 
     def _pack_entry(self, entry):
-        return self.index_format.pack(*entry)
+        return self.index_format.pack(*entry[:10])
 
     def entry_binary(self, rev):
         """return the raw binary string representing a revision"""
         entry = self[rev]
-        p = revlog_constants.INDEX_ENTRY_V2.pack(*entry)
-        return p
+        return self._pack_entry(entry)
 
     def pack_header(self, header):
         """pack header information as binary"""
diff --git a/mercurial/policy.py b/mercurial/policy.py
--- a/mercurial/policy.py
+++ b/mercurial/policy.py
@@ -80,7 +80,7 @@
     ('cext', 'bdiff'): 3,
     ('cext', 'mpatch'): 1,
     ('cext', 'osutil'): 4,
-    ('cext', 'parsers'): 18,
+    ('cext', 'parsers'): 19,
 }
 
 # map import request to other package or module
diff --git a/mercurial/cext/revlog.c b/mercurial/cext/revlog.c
--- a/mercurial/cext/revlog.c
+++ b/mercurial/cext/revlog.c
@@ -118,9 +118,9 @@
 static int index_find_node(indexObject *self, const char *node);
 
 #if LONG_MAX == 0x7fffffffL
-static const char *const tuple_format = PY23("Kiiiiiis#Ki", "Kiiiiiiy#Ki");
+static const char *const tuple_format = PY23("Kiiiiiis#KiB", "Kiiiiiiy#KiB");
 #else
-static const char *const tuple_format = PY23("kiiiiiis#ki", "kiiiiiiy#ki");
+static const char *const tuple_format = PY23("kiiiiiis#kiB", "kiiiiiiy#kiB");
 #endif
 
 /* A RevlogNG v1 index entry is 64 bytes long. */
@@ -132,6 +132,8 @@
 static const long format_v1 = 1; /* Internal only, could be any number */
 static const long format_v2 = 2; /* Internal only, could be any number */
 
+static const char comp_mode_inline = 2;
+
 static void raise_revlog_error(void)
 {
 	PyObject *mod = NULL, *dict = NULL, *errclass = NULL;
@@ -294,6 +296,7 @@
 	uint64_t offset_flags, sidedata_offset;
 	int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2,
 	    sidedata_comp_len;
+	char data_comp_mode;
 	const char *c_node_id;
 	const char *data;
 	Py_ssize_t length = index_length(self);
@@ -340,9 +343,11 @@
 		sidedata_comp_len = getbe32(data + 72);
 	}
 
+	data_comp_mode = comp_mode_inline;
 	return Py_BuildValue(tuple_format, offset_flags, comp_len, uncomp_len,
 	                     base_rev, link_rev, parent_1, parent_2, c_node_id,
-	                     self->nodelen, sidedata_offset, sidedata_comp_len);
+	                     self->nodelen, sidedata_offset, sidedata_comp_len,
+	                     data_comp_mode);
 }
 /*
  * Pack header information in binary
@@ -443,6 +448,7 @@
 {
 	uint64_t offset_flags, sidedata_offset;
 	int rev, comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
+	char data_comp_mode;
 	Py_ssize_t c_node_id_len, sidedata_comp_len;
 	const char *c_node_id;
 	char *data;
@@ -450,8 +456,9 @@
 	if (!PyArg_ParseTuple(obj, tuple_format, &offset_flags, &comp_len,
 	                      &uncomp_len, &base_rev, &link_rev, &parent_1,
 	                      &parent_2, &c_node_id, &c_node_id_len,
-	                      &sidedata_offset, &sidedata_comp_len)) {
-		PyErr_SetString(PyExc_TypeError, "10-tuple required");
+	                      &sidedata_offset, &sidedata_comp_len,
+	                      &data_comp_mode)) {
+		PyErr_SetString(PyExc_TypeError, "11-tuple required");
 		return NULL;
 	}
 
@@ -459,6 +466,12 @@
 		PyErr_SetString(PyExc_TypeError, "invalid node");
 		return NULL;
 	}
+	if (data_comp_mode != comp_mode_inline) {
+		PyErr_Format(PyExc_ValueError,
+		             "invalid data compression mode: %i",
+		             data_comp_mode);
+		return NULL;
+	}
 
 	if (self->new_length == self->added_length) {
 		size_t new_added_length =
@@ -2761,9 +2774,9 @@
 		self->entry_size = v1_entry_size;
 	}
 
-	self->nullentry =
-	    Py_BuildValue(PY23("iiiiiiis#ii", "iiiiiiiy#ii"), 0, 0, 0, -1, -1,
-	                  -1, -1, nullid, self->nodelen, 0, 0);
+	self->nullentry = Py_BuildValue(PY23("iiiiiiis#iiB", "iiiiiiiy#iiB"), 0,
+	                                0, 0, -1, -1, -1, -1, nullid,
+	                                self->nodelen, 0, 0, comp_mode_inline);
 
 	if (!self->nullentry)
 		return -1;
diff --git a/mercurial/cext/parsers.c b/mercurial/cext/parsers.c
--- a/mercurial/cext/parsers.c
+++ b/mercurial/cext/parsers.c
@@ -668,7 +668,7 @@
 void manifest_module_init(PyObject *mod);
 void revlog_module_init(PyObject *mod);
 
-static const int version = 18;
+static const int version = 19;
 
 static void module_init(PyObject *mod)
 {
diff --git a/mercurial/bundlerepo.py b/mercurial/bundlerepo.py
--- a/mercurial/bundlerepo.py
+++ b/mercurial/bundlerepo.py
@@ -105,6 +105,7 @@
                 node,
                 0,
                 0,
+                revlog_constants.COMP_MODE_INLINE,
             )
             self.index.append(e)
             self.bundlerevs.add(n)



To: marmoute, indygreg, #hg-reviewers, Alphare
Cc: joerg.sonnenberger, mercurial-patches
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.mercurial-scm.org/pipermail/mercurial-patches/attachments/20210517/fc7aa2d6/attachment-0002.html>


More information about the Mercurial-patches mailing list