[Request] [++- ] D11881: rhg: centralize index header parsing
aalekseyev (Arseniy Alekseyev)
phabricator at mercurial-scm.org
Tue Dec 7 18:58:20 UTC 2021
aalekseyev created this revision.
Herald added a reviewer: hg-reviewers.
Herald added a subscriber: mercurial-patches.
REVISION SUMMARY
Centralize index header parsing, parse the generaldelta flag,
and leave breadcrumbs to relate the code to python.
REPOSITORY
rHG Mercurial
BRANCH
stable
REVISION DETAIL
https://phab.mercurial-scm.org/D11881
AFFECTED FILES
rust/hg-core/src/revlog/index.rs
rust/hg-core/src/revlog/revlog.rs
CHANGE DETAILS
diff --git a/rust/hg-core/src/revlog/revlog.rs b/rust/hg-core/src/revlog/revlog.rs
--- a/rust/hg-core/src/revlog/revlog.rs
+++ b/rust/hg-core/src/revlog/revlog.rs
@@ -3,7 +3,6 @@
use std::ops::Deref;
use std::path::Path;
-use byteorder::{BigEndian, ByteOrder};
use flate2::read::ZlibDecoder;
use micro_timer::timed;
use sha1::{Digest, Sha1};
@@ -74,13 +73,6 @@
match repo.store_vfs().mmap_open_opt(&index_path)? {
None => Index::new(Box::new(vec![])),
Some(index_mmap) => {
- let version = get_version(&index_mmap)?;
- if version != 1 {
- // A proper new version should have had a repo/store
- // requirement.
- return Err(HgError::corrupted("corrupted revlog"));
- }
-
let index = Index::new(Box::new(index_mmap))?;
Ok(index)
}
@@ -387,19 +379,6 @@
}
}
-/// Format version of the revlog.
-pub fn get_version(index_bytes: &[u8]) -> Result<u16, HgError> {
- if index_bytes.len() == 0 {
- return Ok(1);
- };
- if index_bytes.len() < 4 {
- return Err(HgError::corrupted(
- "corrupted revlog: can't read the index format header",
- ));
- };
- Ok(BigEndian::read_u16(&index_bytes[2..=3]))
-}
-
/// Calculate the hash of a revision given its data and its parents.
fn hash(
data: &[u8],
diff --git a/rust/hg-core/src/revlog/index.rs b/rust/hg-core/src/revlog/index.rs
--- a/rust/hg-core/src/revlog/index.rs
+++ b/rust/hg-core/src/revlog/index.rs
@@ -9,6 +9,76 @@
pub const INDEX_ENTRY_SIZE: usize = 64;
+pub struct IndexHeader {
+ header_bytes: [u8; 4],
+}
+
+#[derive(Copy, Clone)]
+pub struct IndexHeaderFlags {
+ flags: u16,
+}
+
+// Corresponds to the high bits of `_format_flags` in python
+impl IndexHeaderFlags {
+ // Corresponds to FLAG_INLINE_DATA in python
+ pub fn is_inline(self) -> bool {
+ return self.flags & 1 != 0;
+ }
+ // Corresponds to FLAG_GENERALDELTA in python
+ pub fn uses_generaldelta(self) -> bool {
+ return self.flags & 2 != 0;
+ }
+}
+
+// Corresponds to the INDEX_HEADER structure,
+// which is parsed as a `header` variable in `_loadindex` in `revlog.py`
+impl IndexHeader {
+ fn format_flags(&self) -> IndexHeaderFlags {
+ // No "unknown flags" check here, unlike in python. Maybe there should
+ // be.
+ return IndexHeaderFlags {
+ flags: BigEndian::read_u16(&self.header_bytes[0..2]),
+ };
+ }
+
+ // The only revlog version currently supported by rhg.
+ const REVLOGV1: u16 = 1;
+
+ // Corresponds to `_format_version` in Python.
+ // The only curently supported version is
+ fn format_version(&self) -> u16 {
+ return BigEndian::read_u16(&self.header_bytes[2..4]);
+ }
+
+ const EMPTY_INDEX_HEADER: IndexHeader = IndexHeader {
+ // We treat an empty file as a valid index with no entries.
+ // Here we make an arbitrary choice of what we assume the format of the
+ // index to be (V1, using generaldelta).
+ // This doesn't matter too much, since we're only doing read-only
+ // access. but the value corresponds to the `new_header` variable in
+ // `revlog.py`, `_loadindex`
+ header_bytes: [0, 3, 0, 1],
+ };
+
+ fn parse(index_bytes: &[u8]) -> Result<IndexHeader, HgError> {
+ if index_bytes.len() == 0 {
+ return Ok(IndexHeader::EMPTY_INDEX_HEADER);
+ }
+ if index_bytes.len() < 4 {
+ return Err(HgError::corrupted(
+ "corrupted revlog: can't read the index format header",
+ ));
+ }
+ return Ok(IndexHeader {
+ header_bytes: {
+ let bytes: [u8; 4] =
+ index_bytes[0..4].try_into().expect("impossible");
+ bytes
+ },
+ });
+ }
+}
+
/// A Revlog index
pub struct Index {
bytes: Box<dyn Deref<Target = [u8]> + Send>,
@@ -23,7 +93,15 @@
pub fn new(
bytes: Box<dyn Deref<Target = [u8]> + Send>,
) -> Result<Self, HgError> {
- if is_inline(&bytes) {
+ let header = IndexHeader::parse(bytes.as_ref())?;
+
+ if header.format_version() != IndexHeader::REVLOGV1 {
+ // A proper new version should have had a repo/store
+ // requirement.
+ return Err(HgError::corrupted("unsupported revlog version"));
+ }
+
+ if header.format_flags().is_inline() {
let mut offset: usize = 0;
let mut offsets = Vec::new();
@@ -206,17 +284,6 @@
}
}
-/// Value of the inline flag.
-pub fn is_inline(index_bytes: &[u8]) -> bool {
- if index_bytes.len() < 4 {
- return true;
- }
- match &index_bytes[0..=1] {
- [0, 0] | [0, 2] => false,
- _ => true,
- }
-}
-
#[cfg(test)]
mod tests {
use super::*;
To: aalekseyev, #hg-reviewers
Cc: mercurial-patches, mercurial-devel
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mercurial-scm.org/pipermail/mercurial-patches/attachments/20211207/6b7e7b62/attachment-0001.html>
More information about the Mercurial-patches
mailing list