[Request] [++-- ] D11731: rhg: lazily get filesystem metadata
aalekseyev (Arseniy Alekseyev)
phabricator at mercurial-scm.org
Tue Nov 2 12:06:34 UTC 2021
aalekseyev created this revision.
Herald added a reviewer: hg-reviewers.
Herald added a subscriber: mercurial-patches.
REVISION SUMMARY
rhg: use openat library for more efficient filesystem access
REPOSITORY
rHG Mercurial
BRANCH
default
REVISION DETAIL
https://phab.mercurial-scm.org/D11731
AFFECTED FILES
rust/Cargo.lock
rust/hg-core/Cargo.toml
rust/hg-core/src/dirstate/entry.rs
rust/hg-core/src/dirstate_tree/status.rs
CHANGE DETAILS
diff --git a/rust/hg-core/src/dirstate_tree/status.rs b/rust/hg-core/src/dirstate_tree/status.rs
--- a/rust/hg-core/src/dirstate_tree/status.rs
+++ b/rust/hg-core/src/dirstate_tree/status.rs
@@ -8,13 +8,10 @@
use crate::dirstate_tree::on_disk::DirstateV2ParseError;
use crate::matchers::get_ignore_function;
use crate::matchers::Matcher;
-use crate::utils::files::get_bytes_from_os_string;
-use crate::utils::files::get_path_from_bytes;
use crate::utils::hg_path::HgPath;
use crate::BadMatch;
use crate::DirstateStatus;
use crate::EntryState;
-use crate::HgPathBuf;
use crate::PatternFileWarning;
use crate::StatusError;
use crate::StatusOptions;
@@ -27,6 +24,11 @@
use std::path::PathBuf;
use std::sync::Mutex;
use std::time::SystemTime;
+use openat::{Dir};
+use std::ffi::CStr;
+use std::os::unix::ffi::OsStrExt;
+use std::ffi::CString;
+use std::convert::TryInto;
/// Returns the status of the working directory compared to its parent
/// changeset.
@@ -80,6 +82,7 @@
// If the path we have for the repository root is a symlink, do follow it.
// (As opposed to symlinks within the working directory which are not
// followed, using `std::fs::symlink_metadata`.)
+ let root_dir = openat::Dir::open(&root_dir)?;
common.traverse_fs_directory_and_dirstate(
has_ignored_ancestor,
dmap.root.as_ref(),
@@ -141,14 +144,94 @@
filesystem_time_at_status_start: Option<SystemTime>,
}
+enum MetadataOrType<'a> {
+ Metadata(&'a openat::Metadata),
+ FileType(openat::SimpleType)
+}
+
+struct FilesystemEntry<'a> {
+ dir : &'a Dir,
+ name : &'a CStr,
+ metadata_or_type : MetadataOrType<'a>
+}
+
+enum TORREF <'a, T> {
+ Owned(T),
+ Borrowed(&'a T)
+}
+
+impl <'a, T> TORREF <'a, T> {
+ fn borrow(&self) -> &T {
+ match self {
+ TORREF::Owned(t) => &t,
+ TORREF::Borrowed(r) => r
+ }
+ }
+}
+
+impl <'a> FilesystemEntry<'a> {
+
+ fn of_read_dir(dir : &'a Dir, entry : &'a openat::Entry) -> Self {
+ FilesystemEntry {
+ dir, name : <& openat::Entry as openat::AsPath>::to_path(entry).unwrap(), metadata_or_type : MetadataOrType::FileType(entry.simple_type().unwrap()) // TODO: not unwrap
+ }
+ }
+
+ fn of_metadata(dir : &'a Dir, name : &'a CStr, metadata : &'a openat::Metadata) -> Self {
+ FilesystemEntry {
+ dir, name, metadata_or_type : MetadataOrType::Metadata(metadata)
+ }
+ }
+
+ fn file_type(&self) -> openat::SimpleType {
+ match self.metadata_or_type {
+ MetadataOrType::Metadata(metadata) => metadata.simple_type (),
+ MetadataOrType::FileType(file_type) => file_type
+ }
+ }
+
+ // Returns None if the entry is not a directory
+ fn as_directory(&self) -> io::Result<Option<(Dir, openat::Metadata)>> {
+ match self.file_type() {
+ openat::SimpleType::Dir => {
+ let dir = self.dir.sub_dir(self.name)?;
+ let metadata = dir.self_metadata()?;
+ Ok(Some((dir, metadata)))
+ },
+ _ => Ok(None)
+ }
+ }
+
+ fn metadata(&self) -> io::Result<TORREF<'a, openat::Metadata>> {
+ match self.metadata_or_type {
+ MetadataOrType::Metadata(m) => Ok(TORREF::Borrowed(m)),
+ MetadataOrType::FileType(_) => {
+ let metadata = self.dir.metadata(self.name)?;
+ Ok(TORREF::Owned(metadata)) }
+ }
+ }
+}
+
+ fn stat_mtime(metadata : &openat::Metadata) -> SystemTime {
+ let stat = metadata.stat();
+ let nsec : i64 = stat.st_mtime_nsec as libc::c_long;
+ let nsec : u32 = nsec.try_into().unwrap();
+ let sec : i64 = stat.st_mtime as libc::time_t;
+ let sec : u64 = sec.try_into().unwrap();
+ let dur = std::time::Duration::new(sec, nsec);
+ SystemTime::UNIX_EPOCH.checked_add(dur).unwrap()
+ }
+
+
+
impl<'a, 'tree, 'on_disk> StatusCommon<'a, 'tree, 'on_disk> {
fn read_dir(
&self,
hg_path: &HgPath,
- fs_path: &Path,
+ fs_dir: &Dir,
is_at_repo_root: bool,
- ) -> Result<Vec<DirEntry>, ()> {
- DirEntry::read_dir(fs_path, is_at_repo_root)
+ ) -> Result<Vec<openat::Entry>, ()> {
+ read_dir(fs_dir, is_at_repo_root)
.map_err(|error| self.io_error(error, hg_path))
}
@@ -182,7 +265,7 @@
/// need to call `read_dir`.
fn can_skip_fs_readdir(
&self,
- directory_metadata: Option<&std::fs::Metadata>,
+ directory_metadata: Option<&openat::Metadata>,
cached_directory_mtime: Option<TruncatedTimestamp>,
) -> bool {
if !self.options.list_unknown && !self.options.list_ignored {
@@ -221,8 +304,8 @@
has_ignored_ancestor: bool,
dirstate_nodes: ChildNodesRef<'tree, 'on_disk>,
directory_hg_path: &BorrowedPath<'tree, 'on_disk>,
- directory_fs_path: &Path,
- directory_metadata: Option<&std::fs::Metadata>,
+ directory_fd: &Dir,
+ directory_metadata: Option<&openat::Metadata>,
cached_directory_mtime: Option<TruncatedTimestamp>,
is_at_repo_root: bool,
) -> Result<bool, DirstateV2ParseError> {
@@ -231,13 +314,11 @@
dirstate_nodes
.par_iter()
.map(|dirstate_node| {
- let fs_path = directory_fs_path.join(get_path_from_bytes(
- dirstate_node.base_name(self.dmap.on_disk)?.as_bytes(),
- ));
- match std::fs::symlink_metadata(&fs_path) {
+ let base_name = dirstate_node.base_name(self.dmap.on_disk)?;
+ let base_name_cstring = CString::new(base_name.as_bytes()).expect("NULL BYTE");
+ match directory_fd.metadata(base_name_cstring.as_ref()) {
Ok(fs_metadata) => self.traverse_fs_and_dirstate(
- &fs_path,
- &fs_metadata,
+ &FilesystemEntry::of_metadata(directory_fd, base_name_cstring.as_ref(), &fs_metadata),
dirstate_node,
has_ignored_ancestor,
),
@@ -261,7 +342,7 @@
let mut fs_entries = if let Ok(entries) = self.read_dir(
directory_hg_path,
- directory_fs_path,
+ directory_fd,
is_at_repo_root,
) {
entries
@@ -277,7 +358,7 @@
let dirstate_nodes = dirstate_nodes.sorted();
// `sort_unstable_by_key` doesnât allow keys borrowing from the value:
// https://github.com/rust-lang/rust/issues/34162
- fs_entries.sort_unstable_by(|e1, e2| e1.base_name.cmp(&e2.base_name));
+ fs_entries.sort_unstable_by(|e1, e2| e1.file_name().as_bytes().cmp(&e2.file_name().as_bytes()));
// Propagate here any error that would happen inside the comparison
// callback below
@@ -293,7 +374,8 @@
dirstate_node
.base_name(self.dmap.on_disk)
.unwrap()
- .cmp(&fs_entry.base_name)
+ .as_bytes()
+ .cmp(&fs_entry.file_name().as_bytes())
},
)
.par_bridge()
@@ -303,8 +385,7 @@
match pair {
Both(dirstate_node, fs_entry) => {
self.traverse_fs_and_dirstate(
- &fs_entry.full_path,
- &fs_entry.metadata,
+ &FilesystemEntry::of_read_dir(directory_fd, &fs_entry),
dirstate_node,
has_ignored_ancestor,
)?;
@@ -318,6 +399,7 @@
has_dirstate_node_or_is_ignored = self.traverse_fs_only(
has_ignored_ancestor,
directory_hg_path,
+ directory_fd,
fs_entry,
)
}
@@ -329,15 +411,18 @@
fn traverse_fs_and_dirstate(
&self,
- fs_path: &Path,
- fs_metadata: &std::fs::Metadata,
+ fs_entry: &FilesystemEntry,
dirstate_node: NodeRef<'tree, 'on_disk>,
has_ignored_ancestor: bool,
) -> Result<(), DirstateV2ParseError> {
self.check_for_outdated_directory_cache(&dirstate_node)?;
let hg_path = &dirstate_node.full_path_borrowed(self.dmap.on_disk)?;
- let file_type = fs_metadata.file_type();
- let file_or_symlink = file_type.is_file() || file_type.is_symlink();
+ let file_type = fs_entry.file_type();
+ let file_or_symlink =
+ match file_type {
+ openat::SimpleType::File | openat::SimpleType::Symlink => true,
+ openat::SimpleType::Dir | openat::SimpleType::Other => false,
+ };
if !file_or_symlink {
// If we previously had a file here, it was removed (with
// `hg rm` or similar) or deleted before it could be
@@ -347,7 +432,8 @@
dirstate_node.state()?,
);
}
- if file_type.is_dir() {
+ match fs_entry.as_directory().expect("TODO: handle") {
+ Some ((dir, fs_metadata)) => {
if self.options.collect_traversed_dirs {
self.outcome
.lock()
@@ -362,17 +448,18 @@
is_ignored,
dirstate_node.children(self.dmap.on_disk)?,
hg_path,
- fs_path,
- Some(fs_metadata),
+ &dir,
+ Some(&fs_metadata),
dirstate_node.cached_directory_mtime()?,
is_at_repo_root,
)?;
self.maybe_save_directory_mtime(
children_all_have_dirstate_node_or_are_ignored,
- fs_metadata,
+ &fs_metadata,
dirstate_node,
)?
- } else {
+ },
+ None => {
if file_or_symlink && self.matcher.matches(hg_path) {
if let Some(state) = dirstate_node.state()? {
match state {
@@ -394,8 +481,13 @@
.unwrap()
.modified
.push(hg_path.detach_from_tree()),
- EntryState::Normal => self
- .handle_normal_file(&dirstate_node, fs_metadata)?,
+ EntryState::Normal => {
+ let fs_metadata = fs_entry.metadata().expect("TODO");
+ self.handle_normal_file(
+ &dirstate_node,
+ fs_metadata.borrow(),
+ )?
+ }
}
} else {
// `node.entry.is_none()` indicates a "directory"
@@ -411,23 +503,25 @@
{
self.traverse_dirstate_only(child_node)?
}
- }
+ },
+ };
Ok(())
}
+
fn maybe_save_directory_mtime(
&self,
children_all_have_dirstate_node_or_are_ignored: bool,
- directory_metadata: &std::fs::Metadata,
+ directory_metadata: &openat::Metadata,
dirstate_node: NodeRef<'tree, 'on_disk>,
) -> Result<(), DirstateV2ParseError> {
if children_all_have_dirstate_node_or_are_ignored {
// All filesystem directory entries from `read_dir` have a
// corresponding node in the dirstate, so we can reconstitute the
// names of those entries without calling `read_dir` again.
- if let (Some(status_start), Ok(directory_mtime)) = (
+ if let (Some(status_start), directory_mtime) = (
&self.filesystem_time_at_status_start,
- directory_metadata.modified(),
+ stat_mtime(directory_metadata),
) {
// Although the Rust standard libraryâs `SystemTime` type
// has nanosecond precision, the times reported for a
@@ -495,7 +589,7 @@
fn handle_normal_file(
&self,
dirstate_node: &NodeRef<'tree, 'on_disk>,
- fs_metadata: &std::fs::Metadata,
+ fs_metadata: &openat::Metadata,
) -> Result<(), DirstateV2ParseError> {
// Keep the low 31 bits
fn truncate_u64(value: u64) -> i32 {
@@ -510,7 +604,7 @@
|| self.options.check_exec && entry.mode_changed(fs_metadata);
let size = entry.size();
let size_changed = size != truncate_u64(fs_metadata.len());
- if size >= 0 && size_changed && fs_metadata.file_type().is_symlink() {
+ if size >= 0 && size_changed && fs_metadata.simple_type() == openat::SimpleType::Symlink {
// issue6456: Size returned may be longer due to encryption
// on EXT-4 fscrypt. TODO maybe only do it on EXT4?
self.outcome
@@ -607,12 +701,15 @@
&self,
has_ignored_ancestor: bool,
directory_hg_path: &HgPath,
- fs_entry: &DirEntry,
+ parent_dir : &Dir,
+ fs_entry: &openat::Entry,
) -> bool {
- let hg_path = directory_hg_path.join(&fs_entry.base_name);
- let file_type = fs_entry.metadata.file_type();
- let file_or_symlink = file_type.is_file() || file_type.is_symlink();
- if file_type.is_dir() {
+ let hg_path = directory_hg_path.join(HgPath::new(&fs_entry.file_name ().as_bytes()));
+ let file_type = fs_entry.simple_type().unwrap(); // TODO: fix unwrap
+ let file_or_symlink = match file_type {
+ openat::SimpleType::File | openat::SimpleType::Symlink => true,
+ _ => false };
+ if openat::SimpleType::Dir == file_type {
let is_ignored =
has_ignored_ancestor || (self.ignore_fn)(&hg_path);
let traverse_children = if is_ignored {
@@ -625,15 +722,17 @@
};
if traverse_children {
let is_at_repo_root = false;
+ let dir = parent_dir.sub_dir(fs_entry.file_name()).expect("TODO");
if let Ok(children_fs_entries) = self.read_dir(
&hg_path,
- &fs_entry.full_path,
+ &dir,
is_at_repo_root,
) {
children_fs_entries.par_iter().for_each(|child_fs_entry| {
self.traverse_fs_only(
is_ignored,
&hg_path,
+ &dir,
child_fs_entry,
);
})
@@ -694,46 +793,34 @@
}
}
-struct DirEntry {
- base_name: HgPathBuf,
- full_path: PathBuf,
- metadata: std::fs::Metadata,
-}
-
-impl DirEntry {
- /// Returns **unsorted** entries in the given directory, with name and
- /// metadata.
- ///
- /// If a `.hg` sub-directory is encountered:
- ///
- /// * At the repository root, ignore that sub-directory
- /// * Elsewhere, weâre listing the content of a sub-repo. Return an empty
- /// list instead.
- fn read_dir(path: &Path, is_at_repo_root: bool) -> io::Result<Vec<Self>> {
- let mut results = Vec::new();
- for entry in path.read_dir()? {
- let entry = entry?;
- let metadata = entry.metadata()?;
- let name = get_bytes_from_os_string(entry.file_name());
- // FIXME don't do this when cached
- if name == b".hg" {
- if is_at_repo_root {
- // Skip the repoâs own .hg (might be a symlink)
- continue;
- } else if metadata.is_dir() {
- // A .hg sub-directory at another location means a subrepo,
- // skip it entirely.
- return Ok(Vec::new());
- }
+/// Returns **unsorted** entries in the given directory, with name and
+/// metadata.
+///
+/// If a `.hg` sub-directory is encountered:
+///
+/// * At the repository root, ignore that sub-directory
+/// * Elsewhere, weâre listing the content of a sub-repo. Return an empty
+/// list instead.
+fn read_dir(dir: &openat::Dir, is_at_repo_root: bool) -> io::Result<Vec<openat::Entry>> {
+ let mut results = Vec::new();
+ for entry in dir.list_dir(".")? {
+ let entry = entry?;
+ let name = entry.file_name();
+ // FIXME don't do this when cached
+ if name.as_bytes() == b".hg" {
+ if is_at_repo_root {
+ // Skip the repoâs own .hg (might be a symlink)
+ continue;
+ // TODO: handle simple_type returning None
+ } else if Some(openat::SimpleType::Dir) == entry.simple_type() {
+ // A .hg sub-directory at another location means a subrepo,
+ // skip it entirely.
+ return Ok(Vec::new());
}
- results.push(DirEntry {
- base_name: name.into(),
- full_path: entry.path(),
- metadata,
- })
}
- Ok(results)
+ results.push(entry)
}
+ Ok(results)
}
/// Return the `mtime` of a temporary file newly-created in the `.hg` directory
diff --git a/rust/hg-core/src/dirstate/entry.rs b/rust/hg-core/src/dirstate/entry.rs
--- a/rust/hg-core/src/dirstate/entry.rs
+++ b/rust/hg-core/src/dirstate/entry.rs
@@ -2,7 +2,6 @@
use crate::errors::HgError;
use bitflags::bitflags;
use std::convert::{TryFrom, TryInto};
-use std::fs;
use std::io;
use std::time::{SystemTime, UNIX_EPOCH};
@@ -76,18 +75,18 @@
}
}
- pub fn for_mtime_of(metadata: &fs::Metadata) -> io::Result<Self> {
+ pub fn for_mtime_of(metadata: &openat::Metadata) -> io::Result<Self> {
#[cfg(unix)]
{
- use std::os::unix::fs::MetadataExt;
- let seconds = metadata.mtime();
- // i64Â -> u32 with value always in the `0 .. NSEC_PER_SEC` range
- let nanoseconds = metadata.mtime_nsec().try_into().unwrap();
+ let stat = metadata.stat();
+ let seconds = stat.st_mtime as libc::time_t;
+ let nanoseconds = stat.st_mtime_nsec as libc::c_long;
+ let nanoseconds : u32 = nanoseconds.try_into().unwrap();
Ok(Self::new_truncate(seconds, nanoseconds))
}
#[cfg(not(unix))]
{
- metadata.modified().map(Self::from)
+ panic!("not supported")
}
}
@@ -130,10 +129,11 @@
pub fn likely_equal_to_mtime_of(
self,
- metadata: &fs::Metadata,
+ metadata: &openat::Metadata,
) -> io::Result<bool> {
Ok(self.likely_equal(Self::for_mtime_of(metadata)?))
}
+
}
impl From<SystemTime> for TruncatedTimestamp {
@@ -578,12 +578,11 @@
#[cfg(unix)]
pub fn mode_changed(
&self,
- filesystem_metadata: &std::fs::Metadata,
+ filesystem_metadata: &openat::Metadata,
) -> bool {
- use std::os::unix::fs::MetadataExt;
const EXEC_BIT_MASK: u32 = 0o100;
let dirstate_exec_bit = (self.mode() as u32) & EXEC_BIT_MASK;
- let fs_exec_bit = filesystem_metadata.mode() & EXEC_BIT_MASK;
+ let fs_exec_bit = filesystem_metadata.stat().st_mode & EXEC_BIT_MASK;
dirstate_exec_bit != fs_exec_bit
}
diff --git a/rust/hg-core/Cargo.toml b/rust/hg-core/Cargo.toml
--- a/rust/hg-core/Cargo.toml
+++ b/rust/hg-core/Cargo.toml
@@ -34,6 +34,7 @@
memmap2 = {version = "0.4", features = ["stable_deref_trait"]}
zstd = "0.5.3"
format-bytes = "0.2.2"
+openat = "0.1.21"
# We don't use the `miniz-oxide` backend to not change rhg benchmarks and until
# we have a clearer view of which backend is the fastest.
diff --git a/rust/Cargo.lock b/rust/Cargo.lock
--- a/rust/Cargo.lock
+++ b/rust/Cargo.lock
@@ -1,5 +1,7 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
+version = 3
+
[[package]]
name = "adler"
version = "0.2.3"
@@ -390,6 +392,7 @@
"log",
"memmap2",
"micro-timer",
+ "openat",
"pretty_assertions",
"rand",
"rand_distr",
@@ -598,6 +601,15 @@
checksum = "624a8340c38c1b80fd549087862da4ba43e08858af025b236e509b6649fc13d5"
[[package]]
+name = "openat"
+version = "0.1.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "95aa7c05907b3ebde2610d602f4ddd992145cc6a84493647c30396f30ba83abe"
+dependencies = [
+ "libc",
+]
+
+[[package]]
name = "output_vt100"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
To: aalekseyev, #hg-reviewers
Cc: mercurial-patches, mercurial-devel
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mercurial-scm.org/pipermail/mercurial-patches/attachments/20211102/e5361dda/attachment-0001.html>
More information about the Mercurial-patches
mailing list