D11090: dirstate-v2: Rename Header to Root, move it to the end of the data file
SimonSapin
phabricator at mercurial-scm.org
Mon Jul 12 21:10:59 UTC 2021
SimonSapin created this revision.
Herald added a reviewer: hg-reviewers.
Herald added a subscriber: mercurial-patches.
REVISION SUMMARY
Now that they donât have to be at the start, a given data file may contain
multiple "roots". A docket only points to one of them, and previous ones
are left unused to allow allow append-only in-place writing to an existing
data file.
REPOSITORY
rHG Mercurial
BRANCH
default
REVISION DETAIL
https://phab.mercurial-scm.org/D11090
AFFECTED FILES
rust/hg-core/src/dirstate_tree/on_disk.rs
CHANGE DETAILS
diff --git a/rust/hg-core/src/dirstate_tree/on_disk.rs b/rust/hg-core/src/dirstate_tree/on_disk.rs
--- a/rust/hg-core/src/dirstate_tree/on_disk.rs
+++ b/rust/hg-core/src/dirstate_tree/on_disk.rs
@@ -2,8 +2,17 @@
//!
//! # File format
//!
-//! The file starts with a fixed-sized header, whose layout is defined by the
-//! `Header` struct. Its `root` field contains the slice (offset and length) to
+//! In dirstate-v2Â format, the `.hg/dirstate` file is a "docket that starts
+//! with a fixed-sized header whose layout is defined by the `DocketHeader`
+//! struct, followed by the data file identifier.
+//!
+//! A separate `.hg/dirstate.{uuid}.d` file contains most of the data. That
+//! file may be longer than the size given in the docket, but not shorter. Only
+//! the start of the data file up to the given size is considered. The
+//! fixed-size "root" of the dirstate tree whose layout is defined by the
+//! `Root` struct is found at the end of that slice of data.
+//!
+//! Its `root_nodes` field contains the slice (offset and length) to
//! the nodes representing the files and directories at the root of the
//! repository. Each node is also fixed-size, defined by the `Node` struct.
//! Nodes in turn contain slices to variable-size paths, and to their own child
@@ -56,8 +65,8 @@
#[derive(BytesCast)]
#[repr(C)]
-struct Header {
- root: ChildNodes,
+struct Root {
+ root_nodes: ChildNodes,
nodes_with_entry_count: Size,
nodes_with_copy_source_count: Size,
@@ -119,7 +128,7 @@
/// - All direct children of this directory (as returned by
/// `std::fs::read_dir`) either have a corresponding dirstate node, or
/// are ignored by ignore patterns whose hash is in
- /// `Header::ignore_patterns_hash`.
+ /// `Root::ignore_patterns_hash`.
///
/// This means that if `std::fs::symlink_metadata` later reports the
/// same modification time and ignored patterns havenât changed, a run
@@ -190,7 +199,7 @@
/// Make sure that size-affecting changes are made knowingly
fn _static_assert_size_of() {
let _ = std::mem::transmute::<DocketHeader, [u8; 81]>;
- let _ = std::mem::transmute::<Header, [u8; 36]>;
+ let _ = std::mem::transmute::<Root, [u8; 36]>;
let _ = std::mem::transmute::<Node, [u8; 49]>;
}
@@ -253,19 +262,23 @@
if on_disk.is_empty() {
return Ok(DirstateMap::empty(on_disk));
}
- let (header, _) =
- Header::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?;
+ // Find the `Root` at the end of the given slice
+ let root_offset = on_disk
+ .len()
+ .checked_sub(std::mem::size_of::<Root>())
+ // A non-empty slice too short is an error
+ .ok_or(DirstateV2ParseError)?;
+ let (root, _) = Root::from_bytes(&on_disk[root_offset..])
+ .map_err(|_| DirstateV2ParseError)?;
let dirstate_map = DirstateMap {
on_disk,
root: dirstate_map::ChildNodes::OnDisk(read_slice::<Node>(
on_disk,
- header.root,
+ root.root_nodes,
)?),
- nodes_with_entry_count: header.nodes_with_entry_count.get(),
- nodes_with_copy_source_count: header
- .nodes_with_copy_source_count
- .get(),
- ignore_patterns_hash: header.ignore_patterns_hash,
+ nodes_with_entry_count: root.nodes_with_entry_count.get(),
+ nodes_with_copy_source_count: root.nodes_with_copy_source_count.get(),
+ ignore_patterns_hash: root.ignore_patterns_hash,
};
Ok(dirstate_map)
}
@@ -491,8 +504,8 @@
on_disk: &'on_disk [u8],
mut f: impl FnMut(&'on_disk HgPath),
) -> Result<(), DirstateV2ParseError> {
- let (header, _) =
- Header::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?;
+ let (root, _) =
+ Root::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?;
fn recur<'on_disk>(
on_disk: &'on_disk [u8],
nodes: Slice,
@@ -508,37 +521,33 @@
}
Ok(())
}
- recur(on_disk, header.root, &mut f)
+ recur(on_disk, root.root_nodes, &mut f)
}
pub(super) fn write(
dirstate_map: &mut DirstateMap,
) -> Result<Vec<u8>, DirstateError> {
- let header_len = std::mem::size_of::<Header>();
+ let root_len = std::mem::size_of::<Root>();
// This ignores the space for paths, and for nodes without an entry.
// TODO: better estimate? Skip the `Vec` and write to a file directly?
- let size_guess = header_len
+ let size_guess = root_len
+ std::mem::size_of::<Node>()
* dirstate_map.nodes_with_entry_count as usize;
let mut out = Vec::with_capacity(size_guess);
- // Keep space for the header. Weâll fill it out at the end when we know the
- // actual offset for the root nodes.
- out.resize(header_len, 0_u8);
-
- let root =
+ let root_nodes =
write_nodes(dirstate_map, dirstate_map.root.as_ref(), &mut out)?;
- let header = Header {
- root,
+ let root = Root {
+ root_nodes,
nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),
nodes_with_copy_source_count: dirstate_map
.nodes_with_copy_source_count
.into(),
ignore_patterns_hash: dirstate_map.ignore_patterns_hash,
};
- out[..header_len].copy_from_slice(header.as_bytes());
+ out.extend(root.as_bytes());
Ok(out)
}
To: SimonSapin, #hg-reviewers
Cc: mercurial-patches, mercurial-devel
More information about the Mercurial-devel
mailing list