[Updated] [++- ] D11882: rhg: fix a crash on non-generaldelta revlogs

aalekseyev (Arseniy Alekseyev) phabricator at mercurial-scm.org
Wed Dec 8 17:28:20 UTC 2021


aalekseyev updated this revision to Diff 31397.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D11882?vs=31395&id=31397

BRANCH
  stable

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D11882/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D11882

AFFECTED FILES
  rust/hg-core/src/revlog/index.rs
  rust/hg-core/src/revlog/revlog.rs
  tests/test-rhg-no-generaldelta.t

CHANGE DETAILS

diff --git a/tests/test-rhg-no-generaldelta.t b/tests/test-rhg-no-generaldelta.t
--- a/tests/test-rhg-no-generaldelta.t
+++ b/tests/test-rhg-no-generaldelta.t
@@ -21,9 +21,7 @@
   1 0 prev
   2 1 prev
 
-rhg breaks on non-generaldelta revlogs:
+rhg works on non-generaldelta revlogs:
 
   $ $NO_FALLBACK hg cat f -r . | f --sha256 --size
-  abort: corrupted revlog (rhg !)
-  size=0, sha256=e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 (rhg !)
-  size=58, sha256=0cf0386dd4813cc3b957ea790146627dfc0ec42ad3fcf47221b9842e4d5764c1 (no-rhg !)
+  size=58, sha256=0cf0386dd4813cc3b957ea790146627dfc0ec42ad3fcf47221b9842e4d5764c1
diff --git a/rust/hg-core/src/revlog/revlog.rs b/rust/hg-core/src/revlog/revlog.rs
--- a/rust/hg-core/src/revlog/revlog.rs
+++ b/rust/hg-core/src/revlog/revlog.rs
@@ -191,11 +191,20 @@
         // Todo return -> Cow
         let mut entry = self.get_entry(rev)?;
         let mut delta_chain = vec![];
-        while let Some(base_rev) = entry.base_rev {
-            delta_chain.push(entry);
-            entry = self
-                .get_entry(base_rev)
-                .map_err(|_| RevlogError::corrupted())?;
+
+        if self.index.uses_generaldelta() {
+            while let Some(base_rev) = entry.base_rev_or_base_of_delta_chain {
+                delta_chain.push(entry);
+                entry = self.get_entry_internal(base_rev)?;
+            }
+        } else {
+            if let Some(base_rev) = entry.base_rev_or_base_of_delta_chain {
+                delta_chain.push(entry);
+                entry = self.get_entry_internal(base_rev)?;
+                for rev in (base_rev + 1..rev).rev() {
+                    delta_chain.push(self.get_entry_internal(rev)?);
+                }
+            }
         }
 
         // TODO do not look twice in the index
@@ -291,14 +300,26 @@
             bytes: data,
             compressed_len: index_entry.compressed_len(),
             uncompressed_len: index_entry.uncompressed_len(),
-            base_rev: if index_entry.base_revision() == rev {
+            base_rev_or_base_of_delta_chain: if index_entry
+                .base_revision_or_base_of_delta_chain()
+                == rev
+            {
                 None
             } else {
-                Some(index_entry.base_revision())
+                Some(index_entry.base_revision_or_base_of_delta_chain())
             },
         };
         Ok(entry)
     }
+
+    // when resolving internal references within revlog, any errors
+    // should be reported as corruption, instead of e.g. "invalid revision"
+    fn get_entry_internal(
+        &self,
+        rev: Revision,
+    ) -> Result<RevlogEntry, RevlogError> {
+        return self.get_entry(rev).map_err(|_| RevlogError::corrupted());
+    }
 }
 
 /// The revlog entry's bytes and the necessary informations to extract
@@ -309,7 +330,7 @@
     bytes: &'a [u8],
     compressed_len: usize,
     uncompressed_len: usize,
-    base_rev: Option<Revision>,
+    base_rev_or_base_of_delta_chain: Option<Revision>,
 }
 
 impl<'a> RevlogEntry<'a> {
@@ -375,7 +396,7 @@
     /// Tell if the entry is a snapshot or a delta
     /// (influences on decompression).
     fn is_delta(&self) -> bool {
-        self.base_rev.is_some()
+        self.base_rev_or_base_of_delta_chain.is_some()
     }
 }
 
diff --git a/rust/hg-core/src/revlog/index.rs b/rust/hg-core/src/revlog/index.rs
--- a/rust/hg-core/src/revlog/index.rs
+++ b/rust/hg-core/src/revlog/index.rs
@@ -18,20 +18,20 @@
     flags: u16,
 }
 
-// Corresponds to the high bits of `_format_flags` in python
+/// Corresponds to the high bits of `_format_flags` in python
 impl IndexHeaderFlags {
-    // Corresponds to FLAG_INLINE_DATA in python
+    /// Corresponds to FLAG_INLINE_DATA in python
     pub fn is_inline(self) -> bool {
         return self.flags & 1 != 0;
     }
-    // Corresponds to FLAG_GENERALDELTA in python
+    /// Corresponds to FLAG_GENERALDELTA in python
     pub fn uses_generaldelta(self) -> bool {
         return self.flags & 2 != 0;
     }
 }
 
-// Corresponds to the INDEX_HEADER structure,
-// which is parsed as a `header` variable in `_loadindex` in `revlog.py`
+/// Corresponds to the INDEX_HEADER structure,
+/// which is parsed as a `header` variable in `_loadindex` in `revlog.py`
 impl IndexHeader {
     fn format_flags(&self) -> IndexHeaderFlags {
         // No "unknown flags" check here, unlike in python. Maybe there should
@@ -84,6 +84,7 @@
     /// Offsets of starts of index blocks.
     /// Only needed when the index is interleaved with data.
     offsets: Option<Vec<usize>>,
+    uses_generaldelta: bool,
 }
 
 impl Index {
@@ -100,6 +101,11 @@
             return Err(HgError::corrupted("unsupported revlog version"));
         }
 
+        // This is only correct because we know version is REVLOGV1.
+        // In v2 we always use generaldelta, while in v0 we never use
+        // generaldelta. Similar for [is_inline] (it's only used in v1).
+        let uses_generaldelta = header.format_flags().uses_generaldelta();
+
         if header.format_flags().is_inline() {
             let mut offset: usize = 0;
             let mut offsets = Vec::new();
@@ -119,6 +125,7 @@
                 Ok(Self {
                     bytes,
                     offsets: Some(offsets),
+                    uses_generaldelta,
                 })
             } else {
                 Err(HgError::corrupted("unexpected inline revlog length")
@@ -128,10 +135,15 @@
             Ok(Self {
                 bytes,
                 offsets: None,
+                uses_generaldelta,
             })
         }
     }
 
+    pub fn uses_generaldelta(&self) -> bool {
+        self.uses_generaldelta
+    }
+
     /// Value of the inline flag.
     pub fn is_inline(&self) -> bool {
         self.offsets.is_some()
@@ -259,7 +271,7 @@
     }
 
     /// Return the revision upon which the data has been derived.
-    pub fn base_revision(&self) -> Revision {
+    pub fn base_revision_or_base_of_delta_chain(&self) -> Revision {
         // TODO Maybe return an Option when base_revision == rev?
         //      Requires to add rev to IndexEntry
 
@@ -297,7 +309,7 @@
         offset: usize,
         compressed_len: usize,
         uncompressed_len: usize,
-        base_revision: Revision,
+        base_revision_or_base_of_delta_chain: Revision,
     }
 
     #[cfg(test)]
@@ -311,7 +323,7 @@
                 offset: 0,
                 compressed_len: 0,
                 uncompressed_len: 0,
-                base_revision: 0,
+                base_revision_or_base_of_delta_chain: 0,
             }
         }
 
@@ -350,8 +362,11 @@
             self
         }
 
-        pub fn with_base_revision(&mut self, value: Revision) -> &mut Self {
-            self.base_revision = value;
+        pub fn with_base_revision_or_base_of_delta_chain(
+            &mut self,
+            value: Revision,
+        ) -> &mut Self {
+            self.base_revision_or_base_of_delta_chain = value;
             self
         }
 
@@ -374,7 +389,9 @@
             bytes.extend(&[0u8; 2]); // Revision flags.
             bytes.extend(&(self.compressed_len as u32).to_be_bytes());
             bytes.extend(&(self.uncompressed_len as u32).to_be_bytes());
-            bytes.extend(&self.base_revision.to_be_bytes());
+            bytes.extend(
+                &self.base_revision_or_base_of_delta_chain.to_be_bytes(),
+            );
             bytes
         }
     }
@@ -480,14 +497,16 @@
     }
 
     #[test]
-    fn test_base_revision() {
-        let bytes = IndexEntryBuilder::new().with_base_revision(1).build();
+    fn test_base_revision_or_base_of_delta_chain() {
+        let bytes = IndexEntryBuilder::new()
+            .with_base_revision_or_base_of_delta_chain(1)
+            .build();
         let entry = IndexEntry {
             bytes: &bytes,
             offset_override: None,
         };
 
-        assert_eq!(entry.base_revision(), 1)
+        assert_eq!(entry.base_revision_or_base_of_delta_chain(), 1)
     }
 
     #[test]



To: aalekseyev, #hg-reviewers
Cc: mercurial-patches
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.mercurial-scm.org/pipermail/mercurial-patches/attachments/20211208/f41ce75b/attachment-0002.html>


More information about the Mercurial-patches mailing list