D10068: copies-rust: rewrite ChangedFiles binary parsing

SimonSapin phabricator at mercurial-scm.org
Thu Feb 25 10:32:24 UTC 2021


SimonSapin created this revision.
Herald added a reviewer: hg-reviewers.
Herald added a subscriber: mercurial-patches.

REVISION SUMMARY
  by using the new from-bytes-safe crate and a custom struct
  that encodes the expected data structure.

REPOSITORY
  rHG Mercurial

BRANCH
  default

REVISION DETAIL
  https://phab.mercurial-scm.org/D10068

AFFECTED FILES
  rust/hg-core/src/copy_tracing.rs

CHANGE DETAILS

diff --git a/rust/hg-core/src/copy_tracing.rs b/rust/hg-core/src/copy_tracing.rs
--- a/rust/hg-core/src/copy_tracing.rs
+++ b/rust/hg-core/src/copy_tracing.rs
@@ -3,13 +3,13 @@
 use crate::Revision;
 use crate::NULL_REVISION;
 
+use bytes_cast::{unaligned, BytesCast};
 use im_rc::ordmap::Entry;
 use im_rc::ordmap::OrdMap;
 use im_rc::OrdSet;
 
 use std::cmp::Ordering;
 use std::collections::HashMap;
-use std::convert::TryInto;
 
 pub type PathCopies = HashMap<HgPathBuf, HgPathBuf>;
 
@@ -110,18 +110,6 @@
 /// maps CopyDestination to Copy Source (+ a "timestamp" for the operation)
 type InternalPathCopies = OrdMap<PathToken, CopySource>;
 
-/// represent the files affected by a changesets
-///
-/// This hold a subset of mercurial.metadata.ChangingFiles as we do not need
-/// all the data categories tracked by it.
-/// This hold a subset of mercurial.metadata.ChangingFiles as we do not need
-/// all the data categories tracked by it.
-pub struct ChangedFiles<'a> {
-    nb_items: u32,
-    index: &'a [u8],
-    data: &'a [u8],
-}
-
 /// Represent active changes that affect the copy tracing.
 enum Action<'a> {
     /// The parent ? children edge is removing a file
@@ -148,9 +136,6 @@
     Normal,
 }
 
-type FileChange<'a> = (u8, &'a HgPath, &'a HgPath);
-
-const EMPTY: &[u8] = b"";
 const COPY_MASK: u8 = 3;
 const P1_COPY: u8 = 2;
 const P2_COPY: u8 = 3;
@@ -159,141 +144,94 @@
 const MERGED: u8 = 8;
 const SALVAGED: u8 = 16;
 
-impl<'a> ChangedFiles<'a> {
-    const INDEX_START: usize = 4;
-    const ENTRY_SIZE: u32 = 9;
-    const FILENAME_START: u32 = 1;
-    const COPY_SOURCE_START: u32 = 5;
+#[derive(BytesCast)]
+#[repr(C)]
+struct ChangedFilesIndexEntry {
+    flags: u8,
 
-    pub fn new(data: &'a [u8]) -> Self {
-        assert!(
-            data.len() >= 4,
-            "data size ({}) is too small to contain the header (4)",
-            data.len()
-        );
-        let nb_items_raw: [u8; 4] = (&data[0..=3])
-            .try_into()
-            .expect("failed to turn 4 bytes into 4 bytes");
-        let nb_items = u32::from_be_bytes(nb_items_raw);
+    /// Only the end position is stored. The start is at the end of the
+    /// previous entry.
+    destination_path_end_position: unaligned::U32Be,
 
-        let index_size = (nb_items * Self::ENTRY_SIZE) as usize;
-        let index_end = Self::INDEX_START + index_size;
+    source_index_entry_position: unaligned::U32Be,
+}
+
+fn _static_assert_size_of() {
+    let _ = std::mem::transmute::<ChangedFilesIndexEntry, [u8; 9]>;
+}
 
-        assert!(
-            data.len() >= index_end,
-            "data size ({}) is too small to fit the index_data ({})",
-            data.len(),
-            index_end
-        );
+/// Represents the files affected by a changeset.
+///
+/// This hold a subset of `mercurial.metadata.ChangingFiles` as we do not need
+/// all the data categories tracked by it.
+pub struct ChangedFiles<'a> {
+    index: &'a [ChangedFilesIndexEntry],
+    paths: &'a [u8],
+}
 
-        let ret = ChangedFiles {
-            nb_items,
-            index: &data[Self::INDEX_START..index_end],
-            data: &data[index_end..],
-        };
-        let max_data = ret.filename_end(nb_items - 1) as usize;
-        assert!(
-            ret.data.len() >= max_data,
-            "data size ({}) is too small to fit all data ({})",
-            data.len(),
-            index_end + max_data
-        );
-        ret
+impl<'a> ChangedFiles<'a> {
+    pub fn new(data: &'a [u8]) -> Self {
+        let (header, rest) = unaligned::U32Be::from_bytes(data).unwrap();
+        let nb_index_entries = header.get() as usize;
+        let (index, paths) =
+            ChangedFilesIndexEntry::slice_from_bytes(rest, nb_index_entries)
+                .unwrap();
+        Self { index, paths }
     }
 
     pub fn new_empty() -> Self {
         ChangedFiles {
-            nb_items: 0,
-            index: EMPTY,
-            data: EMPTY,
+            index: &[],
+            paths: &[],
         }
     }
 
-    /// internal function to return an individual entry at a given index
-    fn entry(&'a self, idx: u32) -> FileChange<'a> {
-        if idx >= self.nb_items {
-            panic!(
-                "index for entry is higher that the number of file {} >= {}",
-                idx, self.nb_items
-            )
-        }
-        let flags = self.flags(idx);
-        let filename = self.filename(idx);
-        let copy_idx = self.copy_idx(idx);
-        let copy_source = self.filename(copy_idx);
-        (flags, filename, copy_source)
-    }
-
-    /// internal function to return the filename of the entry at a given index
-    fn filename(&self, idx: u32) -> &HgPath {
-        let filename_start;
-        if idx == 0 {
-            filename_start = 0;
+    /// Internal function to return the filename of the entry at a given index
+    fn path(&self, idx: usize) -> &HgPath {
+        let start = if idx == 0 {
+            0
         } else {
-            filename_start = self.filename_end(idx - 1)
-        }
-        let filename_end = self.filename_end(idx);
-        let filename_start = filename_start as usize;
-        let filename_end = filename_end as usize;
-        HgPath::new(&self.data[filename_start..filename_end])
-    }
-
-    /// internal function to return the flag field of the entry at a given
-    /// index
-    fn flags(&self, idx: u32) -> u8 {
-        let idx = idx as usize;
-        self.index[idx * (Self::ENTRY_SIZE as usize)]
-    }
-
-    /// internal function to return the end of a filename part at a given index
-    fn filename_end(&self, idx: u32) -> u32 {
-        let start = (idx * Self::ENTRY_SIZE) + Self::FILENAME_START;
-        let end = (idx * Self::ENTRY_SIZE) + Self::COPY_SOURCE_START;
-        let start = start as usize;
-        let end = end as usize;
-        let raw = (&self.index[start..end])
-            .try_into()
-            .expect("failed to turn 4 bytes into 4 bytes");
-        u32::from_be_bytes(raw)
-    }
-
-    /// internal function to return index of the copy source of the entry at a
-    /// given index
-    fn copy_idx(&self, idx: u32) -> u32 {
-        let start = (idx * Self::ENTRY_SIZE) + Self::COPY_SOURCE_START;
-        let end = (idx + 1) * Self::ENTRY_SIZE;
-        let start = start as usize;
-        let end = end as usize;
-        let raw = (&self.index[start..end])
-            .try_into()
-            .expect("failed to turn 4 bytes into 4 bytes");
-        u32::from_be_bytes(raw)
+            self.index[idx - 1].destination_path_end_position.get() as usize
+        };
+        let end = self.index[idx].destination_path_end_position.get() as usize;
+        HgPath::new(&self.paths[start..end])
     }
 
     /// Return an iterator over all the `Action` in this instance.
-    fn iter_actions(&self) -> ActionsIterator {
-        ActionsIterator {
-            changes: &self,
-            current: 0,
-        }
+    fn iter_actions(&self) -> impl Iterator<Item = Action> {
+        self.index.iter().enumerate().flat_map(move |(idx, entry)| {
+            let path = self.path(idx);
+            if (entry.flags & ACTION_MASK) == REMOVED {
+                Some(Action::Removed(path))
+            } else if (entry.flags & COPY_MASK) == P1_COPY {
+                let source_idx =
+                    entry.source_index_entry_position.get() as usize;
+                Some(Action::CopiedFromP1(path, self.path(source_idx)))
+            } else if (entry.flags & COPY_MASK) == P2_COPY {
+                let source_idx =
+                    entry.source_index_entry_position.get() as usize;
+                Some(Action::CopiedFromP2(path, self.path(source_idx)))
+            } else {
+                None
+            }
+        })
     }
 
     /// return the MergeCase value associated with a filename
     fn get_merge_case(&self, path: &HgPath) -> MergeCase {
-        if self.nb_items == 0 {
+        if self.index.is_empty() {
             return MergeCase::Normal;
         }
         let mut low_part = 0;
-        let mut high_part = self.nb_items;
+        let mut high_part = self.index.len();
 
         while low_part < high_part {
             let cursor = (low_part + high_part - 1) / 2;
-            let (flags, filename, _source) = self.entry(cursor);
-            match path.cmp(filename) {
+            match path.cmp(self.path(cursor)) {
                 Ordering::Less => low_part = cursor + 1,
                 Ordering::Greater => high_part = cursor,
                 Ordering::Equal => {
-                    return match flags & ACTION_MASK {
+                    return match self.index[cursor].flags & ACTION_MASK {
                         MERGED => MergeCase::Merged,
                         SALVAGED => MergeCase::Salvaged,
                         _ => MergeCase::Normal,
@@ -305,32 +243,6 @@
     }
 }
 
-struct ActionsIterator<'a> {
-    changes: &'a ChangedFiles<'a>,
-    current: u32,
-}
-
-impl<'a> Iterator for ActionsIterator<'a> {
-    type Item = Action<'a>;
-
-    fn next(&mut self) -> Option<Action<'a>> {
-        while self.current < self.changes.nb_items {
-            let (flags, file, source) = self.changes.entry(self.current);
-            self.current += 1;
-            if (flags & ACTION_MASK) == REMOVED {
-                return Some(Action::Removed(file));
-            }
-            let copy = flags & COPY_MASK;
-            if copy == P1_COPY {
-                return Some(Action::CopiedFromP1(file, source));
-            } else if copy == P2_COPY {
-                return Some(Action::CopiedFromP2(file, source));
-            }
-        }
-        return None;
-    }
-}
-
 /// A small "tokenizer" responsible of turning full HgPath into lighter
 /// PathToken
 ///



To: SimonSapin, #hg-reviewers
Cc: mercurial-patches, mercurial-devel


More information about the Mercurial-devel mailing list