[Request] [++-- ] D10824: dirstate-tree: Change status() results to not borrow DirstateMap
SimonSapin
phabricator at mercurial-scm.org
Tue Jun 1 16:52:33 UTC 2021
SimonSapin created this revision.
Herald added a reviewer: hg-reviewers.
Herald added a subscriber: mercurial-patches.
REVISION SUMMARY
The `status` function takes a `&'tree mut DirstateMap<'on_disk>` parameter.
`'on_disk` borrows a read-only byte buffer with the contents of the
`.hg/dirstate` file. `DirstateMap` internally uses represents file paths as
`std::borrow::Cow<'on_disk, HgPath>`, which borrows the byte buffer when
possible and allocates an owned string if not, such as for files added to the
dirstate after it was loaded from disk.
Previously the return type of of `status` has a `'tree`Â lifetime, meaning it
could borrow all paths from the `DirstateMap`. With this changeset, that
lifetime is changed to `'on_disk` meaning that only paths from the byte buffer
can be borrowed, and paths allocated by `DirstateMap` must be copied.
Usually most paths are in the byte buffer, and most paths are not part of the
return value of `status`, so the number of extra copies should be small.
This change will enable `status` to mutate the `DirstateMap` after it has
finished constructing its return value. Previously such mutation would be
prevented by possible on-going borrows.
REPOSITORY
rHG Mercurial
BRANCH
default
REVISION DETAIL
https://phab.mercurial-scm.org/D10824
AFFECTED FILES
rust/hg-core/src/dirstate/status.rs
rust/hg-core/src/dirstate_tree/dirstate_map.rs
rust/hg-core/src/dirstate_tree/status.rs
rust/hg-core/src/operations/dirstate_status.rs
CHANGE DETAILS
diff --git a/rust/hg-core/src/operations/dirstate_status.rs b/rust/hg-core/src/operations/dirstate_status.rs
--- a/rust/hg-core/src/operations/dirstate_status.rs
+++ b/rust/hg-core/src/operations/dirstate_status.rs
@@ -61,7 +61,10 @@
}
drop(traversed_sender);
- let traversed = traversed_receiver.into_iter().collect();
+ let traversed = traversed_receiver
+ .into_iter()
+ .map(std::borrow::Cow::Owned)
+ .collect();
Ok(build_response(results, traversed))
}
diff --git a/rust/hg-core/src/dirstate_tree/status.rs b/rust/hg-core/src/dirstate_tree/status.rs
--- a/rust/hg-core/src/dirstate_tree/status.rs
+++ b/rust/hg-core/src/dirstate_tree/status.rs
@@ -1,4 +1,5 @@
use crate::dirstate::status::IgnoreFnType;
+use crate::dirstate_tree::dirstate_map::BorrowedPath;
use crate::dirstate_tree::dirstate_map::ChildNodesRef;
use crate::dirstate_tree::dirstate_map::DirstateMap;
use crate::dirstate_tree::dirstate_map::NodeRef;
@@ -17,7 +18,6 @@
use crate::StatusOptions;
use micro_timer::timed;
use rayon::prelude::*;
-use std::borrow::Cow;
use std::io;
use std::path::Path;
use std::path::PathBuf;
@@ -39,7 +39,7 @@
root_dir: PathBuf,
ignore_files: Vec<PathBuf>,
options: StatusOptions,
-) -> Result<(DirstateStatus<'tree>, Vec<PatternFileWarning>), StatusError> {
+) -> Result<(DirstateStatus<'on_disk>, Vec<PatternFileWarning>), StatusError> {
let (ignore_fn, warnings): (IgnoreFnType, _) =
if options.list_ignored || options.list_unknown {
get_ignore_function(ignore_files, &root_dir)?
@@ -55,7 +55,7 @@
outcome: Mutex::new(DirstateStatus::default()),
};
let is_at_repo_root = true;
- let hg_path = HgPath::new("");
+ let hg_path = &BorrowedPath::OnDisk(HgPath::new(""));
let has_ignored_ancestor = false;
common.traverse_fs_directory_and_dirstate(
has_ignored_ancestor,
@@ -69,15 +69,15 @@
/// Bag of random things needed by various parts of the algorithm. Reduces the
/// number of parameters passed to functions.
-struct StatusCommon<'tree, 'a, 'on_disk: 'tree> {
+struct StatusCommon<'a, 'tree, 'on_disk: 'tree> {
dmap: &'tree DirstateMap<'on_disk>,
options: StatusOptions,
matcher: &'a (dyn Matcher + Sync),
ignore_fn: IgnoreFnType<'a>,
- outcome: Mutex<DirstateStatus<'tree>>,
+ outcome: Mutex<DirstateStatus<'on_disk>>,
}
-impl<'tree, 'a> StatusCommon<'tree, 'a, '_> {
+impl<'a, 'tree, 'on_disk> StatusCommon<'a, 'tree, 'on_disk> {
fn read_dir(
&self,
hg_path: &HgPath,
@@ -100,8 +100,8 @@
fn traverse_fs_directory_and_dirstate(
&self,
has_ignored_ancestor: bool,
- dirstate_nodes: ChildNodesRef<'tree, '_>,
- directory_hg_path: &'tree HgPath,
+ dirstate_nodes: ChildNodesRef<'tree, 'on_disk>,
+ directory_hg_path: &BorrowedPath<'tree, 'on_disk>,
directory_fs_path: &Path,
is_at_repo_root: bool,
) -> Result<(), DirstateV2ParseError> {
@@ -199,10 +199,10 @@
&self,
fs_path: &Path,
fs_metadata: &std::fs::Metadata,
- dirstate_node: NodeRef<'tree, '_>,
+ dirstate_node: NodeRef<'tree, 'on_disk>,
has_ignored_ancestor: bool,
) -> Result<(), DirstateV2ParseError> {
- let hg_path = dirstate_node.full_path(self.dmap.on_disk)?;
+ let hg_path = &dirstate_node.full_path_borrowed(self.dmap.on_disk)?;
let file_type = fs_metadata.file_type();
let file_or_symlink = file_type.is_file() || file_type.is_symlink();
if !file_or_symlink {
@@ -210,13 +210,17 @@
// `hg rm` or similar) or deleted before it could be
// replaced by a directory or something else.
self.mark_removed_or_deleted_if_file(
- hg_path,
+ &hg_path,
dirstate_node.state()?,
);
}
if file_type.is_dir() {
if self.options.collect_traversed_dirs {
- self.outcome.lock().unwrap().traversed.push(hg_path.into())
+ self.outcome
+ .lock()
+ .unwrap()
+ .traversed
+ .push(hg_path.detach_from_tree())
}
let is_ignored = has_ignored_ancestor || (self.ignore_fn)(hg_path);
let is_at_repo_root = false;
@@ -229,24 +233,26 @@
)?
} else {
if file_or_symlink && self.matcher.matches(hg_path) {
- let full_path = Cow::from(hg_path);
if let Some(state) = dirstate_node.state()? {
match state {
- EntryState::Added => {
- self.outcome.lock().unwrap().added.push(full_path)
- }
+ EntryState::Added => self
+ .outcome
+ .lock()
+ .unwrap()
+ .added
+ .push(hg_path.detach_from_tree()),
EntryState::Removed => self
.outcome
.lock()
.unwrap()
.removed
- .push(full_path),
+ .push(hg_path.detach_from_tree()),
EntryState::Merged => self
.outcome
.lock()
.unwrap()
.modified
- .push(full_path),
+ .push(hg_path.detach_from_tree()),
EntryState::Normal => self
.handle_normal_file(&dirstate_node, fs_metadata)?,
// This variant is not used in DirstateMap
@@ -256,10 +262,7 @@
} else {
// `node.entry.is_none()` indicates a "directory"
// node, but the filesystem has a file
- self.mark_unknown_or_ignored(
- has_ignored_ancestor,
- full_path,
- )
+ self.mark_unknown_or_ignored(has_ignored_ancestor, hg_path)
}
}
@@ -275,7 +278,7 @@
/// filesystem
fn handle_normal_file(
&self,
- dirstate_node: &NodeRef<'tree, '_>,
+ dirstate_node: &NodeRef<'tree, 'on_disk>,
fs_metadata: &std::fs::Metadata,
) -> Result<(), DirstateV2ParseError> {
// Keep the low 31 bits
@@ -289,7 +292,7 @@
let entry = dirstate_node
.entry()?
.expect("handle_normal_file called with entry-less node");
- let full_path = Cow::from(dirstate_node.full_path(self.dmap.on_disk)?);
+ let hg_path = &dirstate_node.full_path_borrowed(self.dmap.on_disk)?;
let mode_changed =
|| self.options.check_exec && entry.mode_changed(fs_metadata);
let size_changed = entry.size != truncate_u64(fs_metadata.len());
@@ -299,20 +302,36 @@
{
// issue6456: Size returned may be longer due to encryption
// on EXT-4 fscrypt. TODO maybe only do it on EXT4?
- self.outcome.lock().unwrap().unsure.push(full_path)
+ self.outcome
+ .lock()
+ .unwrap()
+ .unsure
+ .push(hg_path.detach_from_tree())
} else if dirstate_node.has_copy_source()
|| entry.is_from_other_parent()
|| (entry.size >= 0 && (size_changed || mode_changed()))
{
- self.outcome.lock().unwrap().modified.push(full_path)
+ self.outcome
+ .lock()
+ .unwrap()
+ .modified
+ .push(hg_path.detach_from_tree())
} else {
let mtime = mtime_seconds(fs_metadata);
if truncate_i64(mtime) != entry.mtime
|| mtime == self.options.last_normal_time
{
- self.outcome.lock().unwrap().unsure.push(full_path)
+ self.outcome
+ .lock()
+ .unwrap()
+ .unsure
+ .push(hg_path.detach_from_tree())
} else if self.options.list_clean {
- self.outcome.lock().unwrap().clean.push(full_path)
+ self.outcome
+ .lock()
+ .unwrap()
+ .clean
+ .push(hg_path.detach_from_tree())
}
}
Ok(())
@@ -321,10 +340,10 @@
/// A node in the dirstate tree has no corresponding filesystem entry
fn traverse_dirstate_only(
&self,
- dirstate_node: NodeRef<'tree, '_>,
+ dirstate_node: NodeRef<'tree, 'on_disk>,
) -> Result<(), DirstateV2ParseError> {
self.mark_removed_or_deleted_if_file(
- dirstate_node.full_path(self.dmap.on_disk)?,
+ &dirstate_node.full_path_borrowed(self.dmap.on_disk)?,
dirstate_node.state()?,
);
dirstate_node
@@ -340,15 +359,23 @@
/// Does nothing on a "directory" node
fn mark_removed_or_deleted_if_file(
&self,
- hg_path: &'tree HgPath,
+ hg_path: &BorrowedPath<'tree, 'on_disk>,
dirstate_node_state: Option<EntryState>,
) {
if let Some(state) = dirstate_node_state {
if self.matcher.matches(hg_path) {
if let EntryState::Removed = state {
- self.outcome.lock().unwrap().removed.push(hg_path.into())
+ self.outcome
+ .lock()
+ .unwrap()
+ .removed
+ .push(hg_path.detach_from_tree())
} else {
- self.outcome.lock().unwrap().deleted.push(hg_path.into())
+ self.outcome
+ .lock()
+ .unwrap()
+ .deleted
+ .push(hg_path.detach_from_tree())
}
}
}
@@ -395,23 +422,34 @@
self.outcome.lock().unwrap().traversed.push(hg_path.into())
}
} else if file_or_symlink && self.matcher.matches(&hg_path) {
- self.mark_unknown_or_ignored(has_ignored_ancestor, hg_path.into())
+ self.mark_unknown_or_ignored(
+ has_ignored_ancestor,
+ &BorrowedPath::InMemory(&hg_path),
+ )
}
}
fn mark_unknown_or_ignored(
&self,
has_ignored_ancestor: bool,
- hg_path: Cow<'tree, HgPath>,
+ hg_path: &BorrowedPath<'_, 'on_disk>,
) {
let is_ignored = has_ignored_ancestor || (self.ignore_fn)(&hg_path);
if is_ignored {
if self.options.list_ignored {
- self.outcome.lock().unwrap().ignored.push(hg_path)
+ self.outcome
+ .lock()
+ .unwrap()
+ .ignored
+ .push(hg_path.detach_from_tree())
}
} else {
if self.options.list_unknown {
- self.outcome.lock().unwrap().unknown.push(hg_path)
+ self.outcome
+ .lock()
+ .unwrap()
+ .unknown
+ .push(hg_path.detach_from_tree())
}
}
}
diff --git a/rust/hg-core/src/dirstate_tree/dirstate_map.rs b/rust/hg-core/src/dirstate_tree/dirstate_map.rs
--- a/rust/hg-core/src/dirstate_tree/dirstate_map.rs
+++ b/rust/hg-core/src/dirstate_tree/dirstate_map.rs
@@ -46,6 +46,13 @@
/// string prefix.
pub(super) type NodeKey<'on_disk> = WithBasename<Cow<'on_disk, HgPath>>;
+/// Similar to `&'tree Cow<'on_disk, HgPath>`, but can also be returned
+/// for on-disk nodes that donât actually have a `Cow` to borrow.
+pub(super) enum BorrowedPath<'tree, 'on_disk> {
+ InMemory(&'tree HgPathBuf),
+ OnDisk(&'on_disk HgPath),
+}
+
pub(super) enum ChildNodes<'on_disk> {
InMemory(FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>),
OnDisk(&'on_disk [on_disk::Node]),
@@ -61,6 +68,26 @@
OnDisk(&'on_disk on_disk::Node),
}
+impl<'tree, 'on_disk> BorrowedPath<'tree, 'on_disk> {
+ pub fn detach_from_tree(&self) -> Cow<'on_disk, HgPath> {
+ match *self {
+ BorrowedPath::InMemory(in_memory) => Cow::Owned(in_memory.clone()),
+ BorrowedPath::OnDisk(on_disk) => Cow::Borrowed(on_disk),
+ }
+ }
+}
+
+impl<'tree, 'on_disk> std::ops::Deref for BorrowedPath<'tree, 'on_disk> {
+ type Target = HgPath;
+
+ fn deref(&self) -> &HgPath {
+ match *self {
+ BorrowedPath::InMemory(in_memory) => in_memory,
+ BorrowedPath::OnDisk(on_disk) => on_disk,
+ }
+ }
+}
+
impl Default for ChildNodes<'_> {
fn default() -> Self {
ChildNodes::InMemory(Default::default())
@@ -210,15 +237,19 @@
}
}
- /// Returns a `Cow` that can borrow 'on_disk but is detached from 'tree
- pub(super) fn full_path_cow(
+ /// Returns a `BorrowedPath`, which can be turned into a `Cow<'on_disk,
+ /// HgPath>` detached from `'tree`
+ pub(super) fn full_path_borrowed(
&self,
on_disk: &'on_disk [u8],
- ) -> Result<Cow<'on_disk, HgPath>, DirstateV2ParseError> {
+ ) -> Result<BorrowedPath<'tree, 'on_disk>, DirstateV2ParseError> {
match self {
- NodeRef::InMemory(path, _node) => Ok(path.full_path().clone()),
+ NodeRef::InMemory(path, _node) => match path.full_path() {
+ Cow::Borrowed(on_disk) => Ok(BorrowedPath::OnDisk(on_disk)),
+ Cow::Owned(in_memory) => Ok(BorrowedPath::InMemory(in_memory)),
+ },
NodeRef::OnDisk(node) => {
- Ok(Cow::Borrowed(node.full_path(on_disk)?))
+ Ok(BorrowedPath::OnDisk(node.full_path(on_disk)?))
}
}
}
@@ -819,7 +850,10 @@
node.copy_source(self.on_disk)?,
);
if entry.mtime_is_ambiguous(now) {
- ambiguous_mtimes.push(node.full_path_cow(self.on_disk)?)
+ ambiguous_mtimes.push(
+ node.full_path_borrowed(self.on_disk)?
+ .detach_from_tree(),
+ )
}
}
}
@@ -855,7 +889,10 @@
let node = node?;
if let Some(entry) = node.entry()? {
if entry.mtime_is_ambiguous(now) {
- paths.push(node.full_path_cow(self.on_disk)?)
+ paths.push(
+ node.full_path_borrowed(self.on_disk)?
+ .detach_from_tree(),
+ )
}
}
}
diff --git a/rust/hg-core/src/dirstate/status.rs b/rust/hg-core/src/dirstate/status.rs
--- a/rust/hg-core/src/dirstate/status.rs
+++ b/rust/hg-core/src/dirstate/status.rs
@@ -292,7 +292,7 @@
pub unsure: Vec<HgPathCow<'a>>,
/// Only filled if `collect_traversed_dirs` is `true`
- pub traversed: Vec<HgPathBuf>,
+ pub traversed: Vec<HgPathCow<'a>>,
}
#[derive(Debug, derive_more::From)]
@@ -880,7 +880,7 @@
#[timed]
pub fn build_response<'a>(
results: impl IntoIterator<Item = DispatchedPath<'a>>,
- traversed: Vec<HgPathBuf>,
+ traversed: Vec<HgPathCow<'a>>,
) -> DirstateStatus<'a> {
let mut unsure = vec![];
let mut modified = vec![];
To: SimonSapin, #hg-reviewers
Cc: mercurial-patches, mercurial-devel
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mercurial-scm.org/pipermail/mercurial-patches/attachments/20210601/5ff0b3f2/attachment-0001.html>
More information about the Mercurial-patches
mailing list