[Commented On] D11097: dirstate-v2: Add heuristic for when to create a new data file
baymax (Baymax, Your Personal Patch-care Companion)
phabricator at mercurial-scm.org
Fri Jul 16 09:39:37 UTC 2021
baymax added a comment.
baymax updated this revision to Diff 29322.
✅ refresh by Heptapod after a successful CI run (🐙 💚)
REPOSITORY
rHG Mercurial
CHANGES SINCE LAST UPDATE
https://phab.mercurial-scm.org/D11097?vs=29292&id=29322
BRANCH
default
CHANGES SINCE LAST ACTION
https://phab.mercurial-scm.org/D11097/new/
REVISION DETAIL
https://phab.mercurial-scm.org/D11097
AFFECTED FILES
rust/hg-core/src/dirstate_tree/dirstate_map.rs
rust/hg-core/src/dirstate_tree/on_disk.rs
CHANGE DETAILS
diff --git a/rust/hg-core/src/dirstate_tree/on_disk.rs b/rust/hg-core/src/dirstate_tree/on_disk.rs
--- a/rust/hg-core/src/dirstate_tree/on_disk.rs
+++ b/rust/hg-core/src/dirstate_tree/on_disk.rs
@@ -73,6 +73,9 @@
nodes_with_entry_count: Size,
nodes_with_copy_source_count: Size,
+ /// How many bytes of this data file are not used anymore
+ unreachable_bytes: Size,
+
/// If non-zero, a hash of ignore files that were used for some previous
/// run of the `status` algorithm.
///
@@ -205,7 +208,7 @@
/// Make sure that size-affecting changes are made knowingly
fn _static_assert_size_of() {
let _ = std::mem::transmute::<DocketHeader, [u8; 81]>;
- let _ = std::mem::transmute::<Root, [u8; 36]>;
+ let _ = std::mem::transmute::<Root, [u8; 40]>;
let _ = std::mem::transmute::<Node, [u8; 43]>;
}
@@ -283,6 +286,9 @@
return Ok(DirstateMap::empty(on_disk));
}
let root = read_root(on_disk)?;
+ let mut unreachable_bytes = root.unreachable_bytes.get();
+ // Each append writes a new `Root`, so it’s never reused
+ unreachable_bytes += std::mem::size_of::<Root>() as u32;
let dirstate_map = DirstateMap {
on_disk,
root: dirstate_map::ChildNodes::OnDisk(read_nodes(
@@ -292,6 +298,7 @@
nodes_with_entry_count: root.nodes_with_entry_count.get(),
nodes_with_copy_source_count: root.nodes_with_copy_source_count.get(),
ignore_patterns_hash: root.ignore_patterns_hash,
+ unreachable_bytes,
};
Ok(dirstate_map)
}
@@ -573,6 +580,7 @@
nodes_with_copy_source_count: dirstate_map
.nodes_with_copy_source_count
.into(),
+ unreachable_bytes: dirstate_map.unreachable_bytes.into(),
ignore_patterns_hash: dirstate_map.ignore_patterns_hash,
};
writer.out.extend(root.as_bytes());
diff --git a/rust/hg-core/src/dirstate_tree/dirstate_map.rs b/rust/hg-core/src/dirstate_tree/dirstate_map.rs
--- a/rust/hg-core/src/dirstate_tree/dirstate_map.rs
+++ b/rust/hg-core/src/dirstate_tree/dirstate_map.rs
@@ -44,6 +44,9 @@
/// See on_disk::Header
pub(super) ignore_patterns_hash: on_disk::IgnorePatternsHash,
+
+ /// How many bytes of `on_disk` are not used anymore
+ pub(super) unreachable_bytes: u32,
}
/// Using a plain `HgPathBuf` of the full path from the repository root as a
@@ -118,9 +121,10 @@
}
}
- pub(super) fn make_mut(
+ fn make_mut(
&mut self,
on_disk: &'on_disk [u8],
+ unreachable_bytes: &mut u32,
) -> Result<
&mut FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>,
DirstateV2ParseError,
@@ -128,6 +132,8 @@
match self {
ChildNodes::InMemory(nodes) => Ok(nodes),
ChildNodes::OnDisk(nodes) => {
+ *unreachable_bytes +=
+ std::mem::size_of_val::<[on_disk::Node]>(nodes) as u32;
let nodes = nodes
.iter()
.map(|node| {
@@ -406,6 +412,7 @@
nodes_with_entry_count: 0,
nodes_with_copy_source_count: 0,
ignore_patterns_hash: [0; on_disk::IGNORE_PATTERNS_HASH_LEN],
+ unreachable_bytes: 0,
}
}
@@ -436,6 +443,7 @@
let tracked = entry.state.is_tracked();
let node = Self::get_or_insert_node(
map.on_disk,
+ &mut map.unreachable_bytes,
&mut map.root,
path,
WithBasename::to_cow_borrowed,
@@ -472,18 +480,8 @@
/// append to the existing data file that contains `self.on_disk` (true),
/// or create a new data file from scratch (false).
pub(super) fn write_should_append(&self) -> bool {
- // Soon this will be a heuristic based on the amount of unreachable
- // data. For now it’s pseudo-random in order to make tests exercise
- // both code paths.
-
- fn bad_rng() -> u32 {
- std::time::SystemTime::now()
- .duration_since(std::time::UNIX_EPOCH)
- .unwrap()
- .subsec_millis()
- }
-
- bad_rng() % 2 == 0
+ // Append if more than half of the existing data is still relevant
+ self.unreachable_bytes as usize <= (self.on_disk.len() / 2)
}
fn get_node<'tree>(
@@ -514,6 +512,7 @@
/// other fields while the returned borrow is still valid
fn get_node_mut<'tree>(
on_disk: &'on_disk [u8],
+ unreachable_bytes: &mut u32,
root: &'tree mut ChildNodes<'on_disk>,
path: &HgPath,
) -> Result<Option<&'tree mut Node<'on_disk>>, DirstateV2ParseError> {
@@ -522,7 +521,9 @@
let mut component =
components.next().expect("expected at least one components");
loop {
- if let Some(child) = children.make_mut(on_disk)?.get_mut(component)
+ if let Some(child) = children
+ .make_mut(on_disk, unreachable_bytes)?
+ .get_mut(component)
{
if let Some(next_component) = components.next() {
component = next_component;
@@ -542,6 +543,7 @@
) -> Result<&'tree mut Node<'on_disk>, DirstateV2ParseError> {
Self::get_or_insert_node(
self.on_disk,
+ &mut self.unreachable_bytes,
&mut self.root,
path,
WithBasename::to_cow_owned,
@@ -549,8 +551,9 @@
)
}
- pub(super) fn get_or_insert_node<'tree, 'path>(
+ fn get_or_insert_node<'tree, 'path>(
on_disk: &'on_disk [u8],
+ unreachable_bytes: &mut u32,
root: &'tree mut ChildNodes<'on_disk>,
path: &'path HgPath,
to_cow: impl Fn(
@@ -569,7 +572,7 @@
// map already contains that key, without introducing double
// lookup?
let child_node = child_nodes
- .make_mut(on_disk)?
+ .make_mut(on_disk, unreachable_bytes)?
.entry(to_cow(ancestor_path))
.or_default();
if let Some(next) = inclusive_ancestor_paths.next() {
@@ -598,6 +601,7 @@
let node = Self::get_or_insert_node(
self.on_disk,
+ &mut self.unreachable_bytes,
&mut self.root,
path,
WithBasename::to_cow_owned,
@@ -681,6 +685,7 @@
for path in paths {
if let Some(node) = Self::get_node_mut(
self.on_disk,
+ &mut self.unreachable_bytes,
&mut self.root,
path.as_ref(),
)? {
@@ -712,6 +717,12 @@
Ok(None)
})
}
+
+ fn count_dropped_path(unreachable_bytes: &mut u32, path: &Cow<HgPath>) {
+ if let Cow::Borrowed(path) = path {
+ *unreachable_bytes += path.len() as u32
+ }
+ }
}
/// Like `Iterator::filter_map`, but over a fallible iterator of `Result`s.
@@ -844,13 +855,14 @@
/// removed a node in `nodes`.
fn recur<'on_disk>(
on_disk: &'on_disk [u8],
+ unreachable_bytes: &mut u32,
nodes: &mut ChildNodes<'on_disk>,
path: &HgPath,
) -> Result<Option<(Dropped, bool)>, DirstateV2ParseError> {
let (first_path_component, rest_of_path) =
path.split_first_component();
- let node = if let Some(node) =
- nodes.make_mut(on_disk)?.get_mut(first_path_component)
+ let nodes = nodes.make_mut(on_disk, unreachable_bytes)?;
+ let node = if let Some(node) = nodes.get_mut(first_path_component)
{
node
} else {
@@ -858,9 +870,12 @@
};
let dropped;
if let Some(rest) = rest_of_path {
- if let Some((d, removed)) =
- recur(on_disk, &mut node.children, rest)?
- {
+ if let Some((d, removed)) = recur(
+ on_disk,
+ unreachable_bytes,
+ &mut node.children,
+ rest,
+ )? {
dropped = d;
if dropped.had_entry {
node.descendants_with_entry_count -= 1;
@@ -884,6 +899,9 @@
if had_entry {
node.data = NodeData::None
}
+ if let Some(source) = &node.copy_source {
+ DirstateMap::count_dropped_path(unreachable_bytes, source)
+ }
dropped = Dropped {
was_tracked: node
.data
@@ -899,14 +917,22 @@
&& node.copy_source.is_none()
&& node.children.is_empty();
if remove {
- nodes.make_mut(on_disk)?.remove(first_path_component);
+ let (key, _) =
+ nodes.remove_entry(first_path_component).unwrap();
+ DirstateMap::count_dropped_path(
+ unreachable_bytes,
+ key.full_path(),
+ )
}
Ok(Some((dropped, remove)))
}
- if let Some((dropped, _removed)) =
- recur(self.on_disk, &mut self.root, filename)?
- {
+ if let Some((dropped, _removed)) = recur(
+ self.on_disk,
+ &mut self.unreachable_bytes,
+ &mut self.root,
+ filename,
+ )? {
if dropped.had_entry {
self.nodes_with_entry_count -= 1
}
@@ -926,9 +952,12 @@
now: i32,
) -> Result<(), DirstateV2ParseError> {
for filename in filenames {
- if let Some(node) =
- Self::get_node_mut(self.on_disk, &mut self.root, &filename)?
- {
+ if let Some(node) = Self::get_node_mut(
+ self.on_disk,
+ &mut self.unreachable_bytes,
+ &mut self.root,
+ &filename,
+ )? {
if let NodeData::Entry(entry) = &mut node.data {
entry.clear_ambiguous_mtime(now);
}
@@ -1144,16 +1173,20 @@
key: &HgPath,
) -> Result<Option<HgPathBuf>, DirstateV2ParseError> {
let count = &mut self.nodes_with_copy_source_count;
- Ok(
- Self::get_node_mut(self.on_disk, &mut self.root, key)?.and_then(
- |node| {
- if node.copy_source.is_some() {
- *count -= 1
- }
- node.copy_source.take().map(Cow::into_owned)
- },
- ),
- )
+ let unreachable_bytes = &mut self.unreachable_bytes;
+ Ok(Self::get_node_mut(
+ self.on_disk,
+ unreachable_bytes,
+ &mut self.root,
+ key,
+ )?
+ .and_then(|node| {
+ if let Some(source) = &node.copy_source {
+ *count -= 1;
+ Self::count_dropped_path(unreachable_bytes, source);
+ }
+ node.copy_source.take().map(Cow::into_owned)
+ }))
}
fn copy_map_insert(
@@ -1163,6 +1196,7 @@
) -> Result<Option<HgPathBuf>, DirstateV2ParseError> {
let node = Self::get_or_insert_node(
self.on_disk,
+ &mut self.unreachable_bytes,
&mut self.root,
&key,
WithBasename::to_cow_owned,
To: SimonSapin, #hg-reviewers
Cc: mercurial-patches
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.mercurial-scm.org/pipermail/mercurial-patches/attachments/20210716/2ac7d7d6/attachment-0002.html>
More information about the Mercurial-patches
mailing list