[Request] [+-- ] D8679: manifest: tigher manifest parsing and flag use

joerg.sonnenberger (Joerg Sonnenberger) phabricator at mercurial-scm.org
Mon Jul 6 01:46:16 UTC 2020


joerg.sonnenberger created this revision.
Herald added a reviewer: hg-reviewers.
Herald added a subscriber: mercurial-patches.

REVISION SUMMARY
  In the manifest line, flags are put directly after the hash, so the
  parser has been guessing the presence of flags based on the length of
  the hash. Replace this assumption by an enumeration of the valid flags
  and removing them from the hash first as they are distinct input values.
  Consistently handle the expected 256bit length of the SHA1-replacement
  in the pure Python prser as well. Check that setting flags will use one
  of the blessed values.

REPOSITORY
  rHG Mercurial

BRANCH
  default

REVISION DETAIL
  https://phab.mercurial-scm.org/D8679

AFFECTED FILES
  mercurial/cext/manifest.c
  mercurial/manifest.py

CHANGE DETAILS

diff --git a/mercurial/manifest.py b/mercurial/manifest.py
--- a/mercurial/manifest.py
+++ b/mercurial/manifest.py
@@ -121,8 +121,20 @@
             self.pos += 1
             return data
         zeropos = data.find(b'\x00', pos)
-        hashval = unhexlify(data, self.lm.extrainfo[self.pos], zeropos + 1, 40)
-        flags = self.lm._getflags(data, self.pos, zeropos)
+        nlpos = data.find(b'\n', pos)
+        if zeropos == -1 or nlpos == -1 or nlpos < zeropos:
+            raise error.StorageError(b'Invalid manifest line')
+        flags = data[nlpos - 1 : nlpos]
+        if flags in _manifestflags:
+            hlen = nlpos - zeropos - 2
+        else:
+            hlen = nlpos - zeropos - 1
+            flags = b''
+        if hlen not in (40, 64):
+            raise error.StorageError(b'Invalid manifest line')
+        hashval = unhexlify(
+            data, self.lm.extrainfo[self.pos], zeropos + 1, hlen
+        )
         self.pos += 1
         return (data[pos:zeropos], hashval, flags)
 
@@ -140,6 +152,9 @@
     return (a > b) - (a < b)
 
 
+_manifestflags = {b'', b'l', b't', b'x'}
+
+
 class _lazymanifest(object):
     """A pure python manifest backed by a byte string.  It is supplimented with
     internal lists as it is modified, until it is compacted back to a pure byte
@@ -251,15 +266,6 @@
     def __contains__(self, key):
         return self.bsearch(key) != -1
 
-    def _getflags(self, data, needle, pos):
-        start = pos + 41
-        end = data.find(b"\n", start)
-        if end == -1:
-            end = len(data) - 1
-        if start == end:
-            return b''
-        return self.data[start:end]
-
     def __getitem__(self, key):
         if not isinstance(key, bytes):
             raise TypeError(b"getitem: manifest keys must be a bytes.")
@@ -273,13 +279,17 @@
         nlpos = data.find(b'\n', zeropos)
         assert 0 <= needle <= len(self.positions)
         assert len(self.extrainfo) == len(self.positions)
+        if zeropos == -1 or nlpos == -1 or nlpos < zeropos:
+            raise error.StorageError(b'Invalid manifest line')
         hlen = nlpos - zeropos - 1
-        # Hashes sometimes have an extra byte tucked on the end, so
-        # detect that.
-        if hlen % 2:
+        flags = data[nlpos - 1 : nlpos]
+        if flags in _manifestflags:
             hlen -= 1
+        else:
+            flags = b''
+        if hlen not in (40, 64):
+            raise error.StorageError(b'Invalid manifest line')
         hashval = unhexlify(data, self.extrainfo[needle], zeropos + 1, hlen)
-        flags = self._getflags(data, needle, zeropos)
         return (hashval, flags)
 
     def __delitem__(self, key):
@@ -609,6 +619,8 @@
         return self._lm.diff(m2._lm, clean)
 
     def setflag(self, key, flag):
+        if flag not in _manifestflags:
+            raise TypeError(b"Invalid manifest flag set.")
         self._lm[key] = self[key], flag
 
     def get(self, key, default=None):
@@ -1066,6 +1078,8 @@
 
     def setflag(self, f, flags):
         """Set the flags (symlink, executable) for path f."""
+        if flags not in _manifestflags:
+            raise TypeError(b"Invalid manifest flag set.")
         self._load()
         dir, subpath = _splittopdir(f)
         if dir:
diff --git a/mercurial/cext/manifest.c b/mercurial/cext/manifest.c
--- a/mercurial/cext/manifest.c
+++ b/mercurial/cext/manifest.c
@@ -49,23 +49,37 @@
 }
 
 /* get the node value of a single line */
-static PyObject *nodeof(line *l)
+static PyObject *nodeof(line *l, char *flag)
 {
 	char *s = l->start;
 	Py_ssize_t llen = pathlen(l);
 	Py_ssize_t hlen = l->len - llen - 2;
-	Py_ssize_t hlen_raw = 20;
+	Py_ssize_t hlen_raw;
 	PyObject *hash;
 	if (llen + 1 + 40 + 1 > l->len) { /* path '\0' hash '\n' */
 		PyErr_SetString(PyExc_ValueError, "manifest line too short");
 		return NULL;
 	}
+	/* Detect flags after the hash first. */
+	switch (s[llen + hlen]) {
+	case 'l':
+	case 't':
+	case 'x':
+		if (flag)
+			*flag = s[llen + hlen];
+		--hlen;
+		break;
+	default:
+		if (flag)
+			*flag = '\0';
+		break;
+	}
+
 	switch (hlen) {
 	case 40: /* sha1 */
-	case 41: /* sha1 with cruft for a merge */
+		hlen_raw = 20;
 		break;
 	case 64: /* new hash */
-	case 65: /* new hash with cruft for a merge */
 		hlen_raw = 32;
 		break;
 	default:
@@ -89,9 +103,8 @@
 /* get the node hash and flags of a line as a tuple */
 static PyObject *hashflags(line *l)
 {
-	char *s = l->start;
-	Py_ssize_t plen = pathlen(l);
-	PyObject *hash = nodeof(l);
+	char flag;
+	PyObject *hash = nodeof(l, &flag);
 	ssize_t hlen;
 	Py_ssize_t hplen, flen;
 	PyObject *flags;
@@ -99,14 +112,7 @@
 
 	if (!hash)
 		return NULL;
-	/* hash is either 20 or 21 bytes for an old hash, so we use a
-	   ternary here to get the "real" hexlified sha length. */
-	hlen = PyBytes_GET_SIZE(hash) < 22 ? 40 : 64;
-	/* 1 for null byte, 1 for newline */
-	hplen = plen + hlen + 2;
-	flen = l->len - hplen;
-
-	flags = PyBytes_FromStringAndSize(s + hplen - 1, flen);
+	flags = PyBytes_FromStringAndSize(&flag, flag ? 1 : 0);
 	if (!flags) {
 		Py_DECREF(hash);
 		return NULL;
@@ -299,7 +305,7 @@
 	}
 	pl = pathlen(l);
 	path = PyBytes_FromStringAndSize(l->start, pl);
-	hash = nodeof(l);
+	hash = nodeof(l, NULL);
 	if (!path || !hash) {
 		goto done;
 	}



To: joerg.sonnenberger, #hg-reviewers
Cc: mercurial-patches, mercurial-devel
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mercurial-scm.org/pipermail/mercurial-patches/attachments/20200706/be209513/attachment-0001.html>


More information about the Mercurial-patches mailing list