D11097: dirstate-v2: Add heuristic for when to a new data file
SimonSapin
phabricator at mercurial-scm.org
Thu Jul 15 15:29:00 UTC 2021
SimonSapin created this revision.
Herald added a reviewer: hg-reviewers.
Herald added a subscriber: mercurial-patches.
REVISION SUMMARY
⦠instead of appending to the existing one. This is based on keeping track
of how much of the existing data is not used anymore.
REPOSITORY
rHG Mercurial
BRANCH
default
REVISION DETAIL
https://phab.mercurial-scm.org/D11097
AFFECTED FILES
rust/hg-core/src/dirstate_tree/dirstate_map.rs
rust/hg-core/src/dirstate_tree/on_disk.rs
CHANGE DETAILS
diff --git a/rust/hg-core/src/dirstate_tree/on_disk.rs b/rust/hg-core/src/dirstate_tree/on_disk.rs
--- a/rust/hg-core/src/dirstate_tree/on_disk.rs
+++ b/rust/hg-core/src/dirstate_tree/on_disk.rs
@@ -283,6 +283,8 @@
return Ok(DirstateMap::empty(on_disk));
}
let root = read_root(on_disk)?;
+ // Each append writes a new `Root`, so itâs never reused
+ let unreachable_bytes = std::mem::size_of::<Root>();
let dirstate_map = DirstateMap {
on_disk,
root: dirstate_map::ChildNodes::OnDisk(read_nodes(
@@ -292,6 +294,7 @@
nodes_with_entry_count: root.nodes_with_entry_count.get(),
nodes_with_copy_source_count: root.nodes_with_copy_source_count.get(),
ignore_patterns_hash: root.ignore_patterns_hash,
+ unreachable_bytes,
};
Ok(dirstate_map)
}
diff --git a/rust/hg-core/src/dirstate_tree/dirstate_map.rs b/rust/hg-core/src/dirstate_tree/dirstate_map.rs
--- a/rust/hg-core/src/dirstate_tree/dirstate_map.rs
+++ b/rust/hg-core/src/dirstate_tree/dirstate_map.rs
@@ -44,6 +44,9 @@
/// See on_disk::Header
pub(super) ignore_patterns_hash: on_disk::IgnorePatternsHash,
+
+ /// How many bytes of `on_disk` are not used anymore
+ pub(super) unreachable_bytes: usize,
}
/// Using a plain `HgPathBuf` of the full path from the repository root as a
@@ -118,9 +121,10 @@
}
}
- pub(super) fn make_mut(
+ fn make_mut(
&mut self,
on_disk: &'on_disk [u8],
+ unreachable_bytes: &mut usize,
) -> Result<
&mut FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>,
DirstateV2ParseError,
@@ -128,6 +132,8 @@
match self {
ChildNodes::InMemory(nodes) => Ok(nodes),
ChildNodes::OnDisk(nodes) => {
+ *unreachable_bytes +=
+ std::mem::size_of_val::<[on_disk::Node]>(nodes);
let nodes = nodes
.iter()
.map(|node| {
@@ -406,6 +412,7 @@
nodes_with_entry_count: 0,
nodes_with_copy_source_count: 0,
ignore_patterns_hash: [0; on_disk::IGNORE_PATTERNS_HASH_LEN],
+ unreachable_bytes: 0,
}
}
@@ -436,6 +443,7 @@
let tracked = entry.state.is_tracked();
let node = Self::get_or_insert_node(
map.on_disk,
+ &mut map.unreachable_bytes,
&mut map.root,
path,
WithBasename::to_cow_borrowed,
@@ -472,18 +480,8 @@
/// append to the existing data file that contains `self.on_disk` (true),
/// or create a new data file from scratch (false).
pub(super) fn write_should_append(&self) -> bool {
- // Soon this will be a heuristic based on the amount of unreachable
- // data. For now itâs pseudo-random in order to make tests exercise
- // both code paths.
-
- fn bad_rng() -> u32 {
- std::time::SystemTime::now()
- .duration_since(std::time::UNIX_EPOCH)
- .unwrap()
- .subsec_millis()
- }
-
- bad_rng() % 2 == 0
+ // Append if more than half of the existing data is still relevant
+ self.unreachable_bytes <= (self.on_disk.len() / 2)
}
fn get_node<'tree>(
@@ -514,6 +512,7 @@
/// other fields while the returned borrow is still valid
fn get_node_mut<'tree>(
on_disk: &'on_disk [u8],
+ unreachable_bytes: &mut usize,
root: &'tree mut ChildNodes<'on_disk>,
path: &HgPath,
) -> Result<Option<&'tree mut Node<'on_disk>>, DirstateV2ParseError> {
@@ -522,7 +521,9 @@
let mut component =
components.next().expect("expected at least one components");
loop {
- if let Some(child) = children.make_mut(on_disk)?.get_mut(component)
+ if let Some(child) = children
+ .make_mut(on_disk, unreachable_bytes)?
+ .get_mut(component)
{
if let Some(next_component) = components.next() {
component = next_component;
@@ -542,6 +543,7 @@
) -> Result<&'tree mut Node<'on_disk>, DirstateV2ParseError> {
Self::get_or_insert_node(
self.on_disk,
+ &mut self.unreachable_bytes,
&mut self.root,
path,
WithBasename::to_cow_owned,
@@ -549,8 +551,9 @@
)
}
- pub(super) fn get_or_insert_node<'tree, 'path>(
+ fn get_or_insert_node<'tree, 'path>(
on_disk: &'on_disk [u8],
+ unreachable_bytes: &mut usize,
root: &'tree mut ChildNodes<'on_disk>,
path: &'path HgPath,
to_cow: impl Fn(
@@ -569,7 +572,7 @@
// map already contains that key, without introducing double
// lookup?
let child_node = child_nodes
- .make_mut(on_disk)?
+ .make_mut(on_disk, unreachable_bytes)?
.entry(to_cow(ancestor_path))
.or_default();
if let Some(next) = inclusive_ancestor_paths.next() {
@@ -598,6 +601,7 @@
let node = Self::get_or_insert_node(
self.on_disk,
+ &mut self.unreachable_bytes,
&mut self.root,
path,
WithBasename::to_cow_owned,
@@ -681,6 +685,7 @@
for path in paths {
if let Some(node) = Self::get_node_mut(
self.on_disk,
+ &mut self.unreachable_bytes,
&mut self.root,
path.as_ref(),
)? {
@@ -712,6 +717,12 @@
Ok(None)
})
}
+
+ fn count_dropped_path(unreachable_bytes: &mut usize, path: &Cow<HgPath>) {
+ if let Cow::Borrowed(path) = path {
+ *unreachable_bytes += path.len()
+ }
+ }
}
/// Like `Iterator::filter_map`, but over a fallible iterator of `Result`s.
@@ -844,13 +855,14 @@
/// removed a node in `nodes`.
fn recur<'on_disk>(
on_disk: &'on_disk [u8],
+ unreachable_bytes: &mut usize,
nodes: &mut ChildNodes<'on_disk>,
path: &HgPath,
) -> Result<Option<(Dropped, bool)>, DirstateV2ParseError> {
let (first_path_component, rest_of_path) =
path.split_first_component();
- let node = if let Some(node) =
- nodes.make_mut(on_disk)?.get_mut(first_path_component)
+ let nodes = nodes.make_mut(on_disk, unreachable_bytes)?;
+ let node = if let Some(node) = nodes.get_mut(first_path_component)
{
node
} else {
@@ -858,9 +870,12 @@
};
let dropped;
if let Some(rest) = rest_of_path {
- if let Some((d, removed)) =
- recur(on_disk, &mut node.children, rest)?
- {
+ if let Some((d, removed)) = recur(
+ on_disk,
+ unreachable_bytes,
+ &mut node.children,
+ rest,
+ )? {
dropped = d;
if dropped.had_entry {
node.descendants_with_entry_count -= 1;
@@ -884,6 +899,9 @@
if had_entry {
node.data = NodeData::None
}
+ if let Some(source) = &node.copy_source {
+ DirstateMap::count_dropped_path(unreachable_bytes, source)
+ }
dropped = Dropped {
was_tracked: node
.data
@@ -899,14 +917,22 @@
&& node.copy_source.is_none()
&& node.children.is_empty();
if remove {
- nodes.make_mut(on_disk)?.remove(first_path_component);
+ let (key, _) =
+ nodes.remove_entry(first_path_component).unwrap();
+ DirstateMap::count_dropped_path(
+ unreachable_bytes,
+ key.full_path(),
+ )
}
Ok(Some((dropped, remove)))
}
- if let Some((dropped, _removed)) =
- recur(self.on_disk, &mut self.root, filename)?
- {
+ if let Some((dropped, _removed)) = recur(
+ self.on_disk,
+ &mut self.unreachable_bytes,
+ &mut self.root,
+ filename,
+ )? {
if dropped.had_entry {
self.nodes_with_entry_count -= 1
}
@@ -926,9 +952,12 @@
now: i32,
) -> Result<(), DirstateV2ParseError> {
for filename in filenames {
- if let Some(node) =
- Self::get_node_mut(self.on_disk, &mut self.root, &filename)?
- {
+ if let Some(node) = Self::get_node_mut(
+ self.on_disk,
+ &mut self.unreachable_bytes,
+ &mut self.root,
+ &filename,
+ )? {
if let NodeData::Entry(entry) = &mut node.data {
entry.clear_ambiguous_mtime(now);
}
@@ -1144,16 +1173,20 @@
key: &HgPath,
) -> Result<Option<HgPathBuf>, DirstateV2ParseError> {
let count = &mut self.nodes_with_copy_source_count;
- Ok(
- Self::get_node_mut(self.on_disk, &mut self.root, key)?.and_then(
- |node| {
- if node.copy_source.is_some() {
- *count -= 1
- }
- node.copy_source.take().map(Cow::into_owned)
- },
- ),
- )
+ let unreachable_bytes = &mut self.unreachable_bytes;
+ Ok(Self::get_node_mut(
+ self.on_disk,
+ unreachable_bytes,
+ &mut self.root,
+ key,
+ )?
+ .and_then(|node| {
+ if let Some(source) = &node.copy_source {
+ *count -= 1;
+ Self::count_dropped_path(unreachable_bytes, source);
+ }
+ node.copy_source.take().map(Cow::into_owned)
+ }))
}
fn copy_map_insert(
@@ -1163,6 +1196,7 @@
) -> Result<Option<HgPathBuf>, DirstateV2ParseError> {
let node = Self::get_or_insert_node(
self.on_disk,
+ &mut self.unreachable_bytes,
&mut self.root,
&key,
WithBasename::to_cow_owned,
To: SimonSapin, #hg-reviewers
Cc: mercurial-patches, mercurial-devel
More information about the Mercurial-devel
mailing list