D10005: rust: Rewrite dirstate parsing usin the `bytes-cast` crate
SimonSapin
phabricator at mercurial-scm.org
Wed Feb 17 12:59:17 UTC 2021
SimonSapin created this revision.
Herald added a reviewer: hg-reviewers.
Herald added a subscriber: mercurial-patches.
REPOSITORY
rHG Mercurial
BRANCH
default
REVISION DETAIL
https://phab.mercurial-scm.org/D10005
AFFECTED FILES
rust/hg-core/src/dirstate.rs
rust/hg-core/src/dirstate/dirstate_map.rs
rust/hg-core/src/dirstate/parsers.rs
CHANGE DETAILS
diff --git a/rust/hg-core/src/dirstate/parsers.rs b/rust/hg-core/src/dirstate/parsers.rs
--- a/rust/hg-core/src/dirstate/parsers.rs
+++ b/rust/hg-core/src/dirstate/parsers.rs
@@ -6,13 +6,13 @@
use crate::errors::HgError;
use crate::utils::hg_path::HgPath;
use crate::{
- dirstate::{CopyMap, EntryState, StateMap},
+ dirstate::{CopyMap, EntryState, RawEntry, StateMap},
DirstateEntry, DirstateParents,
};
-use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt};
+use byteorder::{BigEndian, WriteBytesExt};
+use bytes_cast::BytesCast;
use micro_timer::timed;
use std::convert::{TryFrom, TryInto};
-use std::io::Cursor;
use std::time::Duration;
/// Parents are stored in the dirstate as byte hashes.
@@ -21,65 +21,45 @@
const MIN_ENTRY_SIZE: usize = 17;
type ParseResult<'a> = (
- DirstateParents,
+ &'a DirstateParents,
Vec<(&'a HgPath, DirstateEntry)>,
Vec<(&'a HgPath, &'a HgPath)>,
);
#[timed]
-pub fn parse_dirstate(contents: &[u8]) -> Result<ParseResult, HgError> {
- if contents.len() < PARENT_SIZE * 2 {
- return Err(HgError::corrupted("Too little data for dirstate."));
- }
- let mut copies = vec![];
- let mut entries = vec![];
+pub fn parse_dirstate(mut contents: &[u8]) -> Result<ParseResult, HgError> {
+ let mut copies = Vec::new();
+ let mut entries = Vec::new();
- let mut curr_pos = PARENT_SIZE * 2;
- let parents = DirstateParents {
- p1: contents[..PARENT_SIZE].try_into().unwrap(),
- p2: contents[PARENT_SIZE..curr_pos].try_into().unwrap(),
- };
+ let (parents, rest) = DirstateParents::from_bytes(contents)
+ .map_err(|_| HgError::corrupted("Too little data for dirstate."))?;
+ contents = rest;
+ while !contents.is_empty() {
+ let (raw_entry, rest) = RawEntry::from_bytes(contents)
+ .map_err(|_| HgError::corrupted("Overflow in dirstate."))?;
- while curr_pos < contents.len() {
- if curr_pos + MIN_ENTRY_SIZE > contents.len() {
- return Err(HgError::corrupted("Overflow in dirstate."));
- }
- let entry_bytes = &contents[curr_pos..];
+ let entry = DirstateEntry {
+ state: EntryState::try_from(raw_entry.state)?,
+ mode: raw_entry.mode.get(),
+ mtime: raw_entry.mtime.get(),
+ size: raw_entry.size.get(),
+ };
+ let (paths, rest) =
+ u8::slice_from_bytes(rest, raw_entry.length.get() as usize)
+ .map_err(|_| HgError::corrupted("Overflow in dirstate."))?;
- let mut cursor = Cursor::new(entry_bytes);
- // Unwraping errors from `byteorder` as weâve already checked
- // `MIN_ENTRY_SIZE` so the input should never be too short.
- let state = EntryState::try_from(cursor.read_u8().unwrap())?;
- let mode = cursor.read_i32::<BigEndian>().unwrap();
- let size = cursor.read_i32::<BigEndian>().unwrap();
- let mtime = cursor.read_i32::<BigEndian>().unwrap();
- let path_len = cursor.read_i32::<BigEndian>().unwrap() as usize;
-
- if path_len > contents.len() - curr_pos {
- return Err(HgError::corrupted("Overflow in dirstate."));
+ // `paths` is either a single path, or two paths separated by a NULL
+ // byte
+ let mut iter = paths.splitn(2, |&byte| byte == b'\0');
+ let path = HgPath::new(
+ iter.next().expect("splitn always yields at least one item"),
+ );
+ if let Some(copy_source) = iter.next() {
+ copies.push((path, HgPath::new(copy_source)));
}
- // Slice instead of allocating a Vec needed for `read_exact`
- let path = &entry_bytes[MIN_ENTRY_SIZE..MIN_ENTRY_SIZE + (path_len)];
-
- let (path, copy) = match memchr::memchr(0, path) {
- None => (path, None),
- Some(i) => (&path[..i], Some(&path[(i + 1)..])),
- };
-
- if let Some(copy_path) = copy {
- copies.push((HgPath::new(path), HgPath::new(copy_path)));
- };
- entries.push((
- HgPath::new(path),
- DirstateEntry {
- state,
- mode,
- size,
- mtime,
- },
- ));
- curr_pos = curr_pos + MIN_ENTRY_SIZE + (path_len);
+ entries.push((path, entry));
+ contents = rest;
}
Ok((parents, entries, copies))
}
@@ -374,7 +354,7 @@
.collect();
assert_eq!(
- (parents, state_map, copymap),
+ (&parents, state_map, copymap),
(new_parents, new_state_map, new_copy_map)
)
}
@@ -452,7 +432,7 @@
.collect();
assert_eq!(
- (parents, state_map, copymap),
+ (&parents, state_map, copymap),
(new_parents, new_state_map, new_copy_map)
)
}
@@ -499,7 +479,7 @@
assert_eq!(
(
- parents,
+ &parents,
[(
HgPathBuf::from_bytes(b"f1"),
DirstateEntry {
diff --git a/rust/hg-core/src/dirstate/dirstate_map.rs b/rust/hg-core/src/dirstate/dirstate_map.rs
--- a/rust/hg-core/src/dirstate/dirstate_map.rs
+++ b/rust/hg-core/src/dirstate/dirstate_map.rs
@@ -386,10 +386,10 @@
}
#[timed]
- pub fn read(
+ pub fn read<'a>(
&mut self,
- file_contents: &[u8],
- ) -> Result<Option<DirstateParents>, DirstateError> {
+ file_contents: &'a [u8],
+ ) -> Result<Option<&'a DirstateParents>, DirstateError> {
if file_contents.is_empty() {
return Ok(None);
}
diff --git a/rust/hg-core/src/dirstate.rs b/rust/hg-core/src/dirstate.rs
--- a/rust/hg-core/src/dirstate.rs
+++ b/rust/hg-core/src/dirstate.rs
@@ -7,6 +7,7 @@
use crate::errors::HgError;
use crate::{utils::hg_path::HgPathBuf, FastHashMap};
+use bytes_cast::{unaligned, BytesCast};
use std::collections::hash_map;
use std::convert::TryFrom;
@@ -17,7 +18,8 @@
pub mod parsers;
pub mod status;
-#[derive(Debug, PartialEq, Clone)]
+#[derive(Debug, PartialEq, Clone, BytesCast)]
+#[repr(C)]
pub struct DirstateParents {
pub p1: [u8; 20],
pub p2: [u8; 20],
@@ -34,6 +36,16 @@
pub size: i32,
}
+#[derive(BytesCast)]
+#[repr(C)]
+struct RawEntry {
+ state: u8,
+ mode: unaligned::I32Be,
+ size: unaligned::I32Be,
+ mtime: unaligned::I32Be,
+ length: unaligned::I32Be,
+}
+
/// A `DirstateEntry` with a size of `-2` means that it was merged from the
/// other parent. This allows revert to pick the right status back during a
/// merge.
To: SimonSapin, #hg-reviewers
Cc: mercurial-patches, mercurial-devel
More information about the Mercurial-devel
mailing list