D11099: dirstate-v2: Separate iterators for dirfoldmap and debugdirstate
SimonSapin
phabricator at mercurial-scm.org
Fri Jul 16 12:40:14 UTC 2021
SimonSapin created this revision.
Herald added a reviewer: hg-reviewers.
Herald added a subscriber: mercurial-patches.
REVISION SUMMARY
`dirstatemap.dirfoldmap` was recently changed to re-use a Rust iterator
that was added for the `hg debugdirstate` command.
That iterator was based on all nodes in the tree dirstate without an entry
only existing to hold child nodes, and therefore being directories.
However to optimize status further we may want to store additional nodes
for unknown or ignored files and directories. At that point the two users
of this iterator will want different things, so letâs make two iterators
instead.
See doc-comments in `dispatch.rs`.
REPOSITORY
rHG Mercurial
BRANCH
default
REVISION DETAIL
https://phab.mercurial-scm.org/D11099
AFFECTED FILES
mercurial/debugcommands.py
mercurial/dirstatemap.py
rust/hg-core/src/dirstate.rs
rust/hg-core/src/dirstate_tree/dirstate_map.rs
rust/hg-core/src/dirstate_tree/dispatch.rs
rust/hg-cpython/src/dirstate.rs
rust/hg-cpython/src/dirstate/dirstate_map.rs
rust/hg-cpython/src/dirstate/dispatch.rs
tests/test-completion.t
tests/test-status.t
CHANGE DETAILS
diff --git a/tests/test-status.t b/tests/test-status.t
--- a/tests/test-status.t
+++ b/tests/test-status.t
@@ -929,23 +929,23 @@
The cached mtime is initially unset
- $ hg debugdirstate --dirs --no-dates | grep '^d'
- d 0 0 unset subdir
+ $ hg debugdirstate --all --no-dates | grep '^ '
+ 0 -1 unset subdir
It is still not set when there are unknown files
$ touch subdir/unknown
$ hg status
? subdir/unknown
- $ hg debugdirstate --dirs --no-dates | grep '^d'
- d 0 0 unset subdir
+ $ hg debugdirstate --all --no-dates | grep '^ '
+ 0 -1 unset subdir
Now the directory is eligible for caching, so its mtime is save in the dirstate
$ rm subdir/unknown
$ hg status
- $ hg debugdirstate --dirs --no-dates | grep '^d'
- d 0 0 set subdir
+ $ hg debugdirstate --all --no-dates | grep '^ '
+ 0 -1 set subdir
This time the command should be ever so slightly faster since it does not need `read_dir("subdir")`
@@ -963,11 +963,11 @@
Removing a node from the dirstate resets the cache for its parent directory
$ hg forget subdir/a
- $ hg debugdirstate --dirs --no-dates | grep '^d'
- d 0 0 set subdir
+ $ hg debugdirstate --all --no-dates | grep '^ '
+ 0 -1 set subdir
$ hg ci -qm '#1'
- $ hg debugdirstate --dirs --no-dates | grep '^d'
- d 0 0 unset subdir
+ $ hg debugdirstate --all --no-dates | grep '^ '
+ 0 -1 unset subdir
$ hg status
? subdir/a
diff --git a/tests/test-completion.t b/tests/test-completion.t
--- a/tests/test-completion.t
+++ b/tests/test-completion.t
@@ -284,7 +284,7 @@
debugdate: extended
debugdeltachain: changelog, manifest, dir, template
debugdirstateignorepatternshash:
- debugdirstate: nodates, dates, datesort, dirs
+ debugdirstate: nodates, dates, datesort, all
debugdiscovery: old, nonheads, rev, seed, local-as-revs, remote-as-revs, ssh, remotecmd, insecure, template
debugdownload: output
debugextensions: template
diff --git a/rust/hg-cpython/src/dirstate/dispatch.rs b/rust/hg-cpython/src/dirstate/dispatch.rs
--- a/rust/hg-cpython/src/dirstate/dispatch.rs
+++ b/rust/hg-cpython/src/dirstate/dispatch.rs
@@ -203,17 +203,30 @@
self.get().iter()
}
- fn iter_directories(
+ fn iter_tracked_dirs(
+ &mut self,
+ ) -> Result<
+ Box<
+ dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>>
+ + Send
+ + '_,
+ >,
+ DirstateError,
+ > {
+ self.get_mut().iter_tracked_dirs()
+ }
+
+ fn debug_iter(
&self,
) -> Box<
dyn Iterator<
Item = Result<
- (&HgPath, Option<Timestamp>),
+ (&HgPath, (u8, i32, i32, i32)),
DirstateV2ParseError,
>,
> + Send
+ '_,
> {
- self.get().iter_directories()
+ self.get().debug_iter()
}
}
diff --git a/rust/hg-cpython/src/dirstate/dirstate_map.rs b/rust/hg-cpython/src/dirstate/dirstate_map.rs
--- a/rust/hg-cpython/src/dirstate/dirstate_map.rs
+++ b/rust/hg-cpython/src/dirstate/dirstate_map.rs
@@ -19,8 +19,8 @@
use crate::{
dirstate::copymap::{CopyMap, CopyMapItemsIterator, CopyMapKeysIterator},
- dirstate::make_directory_item,
dirstate::make_dirstate_item,
+ dirstate::make_dirstate_item_raw,
dirstate::non_normal_entries::{
NonNormalEntries, NonNormalEntriesIterator,
},
@@ -61,17 +61,14 @@
use_dirstate_tree: bool,
on_disk: PyBytes,
) -> PyResult<PyObject> {
- let dirstate_error = |e: DirstateError| {
- PyErr::new::<exc::OSError, _>(py, format!("Dirstate error: {:?}", e))
- };
let (inner, parents) = if use_dirstate_tree {
let (map, parents) = OwningDirstateMap::new_v1(py, on_disk)
- .map_err(dirstate_error)?;
+ .map_err(|e| dirstate_error(py, e))?;
(Box::new(map) as _, parents)
} else {
let bytes = on_disk.data(py);
let mut map = RustDirstateMap::default();
- let parents = map.read(bytes).map_err(dirstate_error)?;
+ let parents = map.read(bytes).map_err(|e| dirstate_error(py, e))?;
(Box::new(map) as _, parents)
};
let map = Self::create_instance(py, inner)?;
@@ -550,19 +547,29 @@
)
}
- def directories(&self) -> PyResult<PyList> {
+ def tracked_dirs(&self) -> PyResult<PyList> {
let dirs = PyList::new(py, &[]);
- for item in self.inner(py).borrow().iter_directories() {
- let (path, mtime) = item.map_err(|e| v2_error(py, e))?;
+ for path in self.inner(py).borrow_mut().iter_tracked_dirs()
+ .map_err(|e |dirstate_error(py, e))?
+ {
+ let path = path.map_err(|e| v2_error(py, e))?;
let path = PyBytes::new(py, path.as_bytes());
- let mtime = mtime.map(|t| t.0).unwrap_or(-1);
- let item = make_directory_item(py, mtime as i32)?;
- let tuple = (path, item);
- dirs.append(py, tuple.to_py_object(py).into_object())
+ dirs.append(py, path.into_object())
}
Ok(dirs)
}
+ def debug_iter(&self) -> PyResult<PyList> {
+ let dirs = PyList::new(py, &[]);
+ for item in self.inner(py).borrow().debug_iter() {
+ let (path, (state, mode, size, mtime)) =
+ item.map_err(|e| v2_error(py, e))?;
+ let path = PyBytes::new(py, path.as_bytes());
+ let item = make_dirstate_item_raw(py, state, mode, size, mtime)?;
+ dirs.append(py, (path, item).to_py_object(py).into_object())
+ }
+ Ok(dirs)
+ }
});
impl DirstateMap {
@@ -616,3 +623,7 @@
pub(super) fn v2_error(py: Python<'_>, _: DirstateV2ParseError) -> PyErr {
PyErr::new::<exc::ValueError, _>(py, "corrupted dirstate-v2")
}
+
+fn dirstate_error(py: Python<'_>, e: DirstateError) -> PyErr {
+ PyErr::new::<exc::OSError, _>(py, format!("Dirstate error: {:?}", e))
+}
diff --git a/rust/hg-cpython/src/dirstate.rs b/rust/hg-cpython/src/dirstate.rs
--- a/rust/hg-cpython/src/dirstate.rs
+++ b/rust/hg-cpython/src/dirstate.rs
@@ -52,9 +52,6 @@
py: Python,
entry: &DirstateEntry,
) -> PyResult<PyObject> {
- // might be silly to retrieve capsule function in hot loop
- let make = make_dirstate_item_capi::retrieve(py)?;
-
let &DirstateEntry {
state,
mode,
@@ -65,22 +62,19 @@
// because Into<u8> has a specific implementation while `as c_char` would
// just do a naive enum cast.
let state_code: u8 = state.into();
-
- let maybe_obj = unsafe {
- let ptr = make(state_code as c_char, mode, size, mtime);
- PyObject::from_owned_ptr_opt(py, ptr)
- };
- maybe_obj.ok_or_else(|| PyErr::fetch(py))
+ make_dirstate_item_raw(py, state_code, mode, size, mtime)
}
-// XXX a bit strange to have a dedicated function, but directory are not
-// treated as dirstate node by hg-core for now soâ¦
-pub fn make_directory_item(py: Python, mtime: i32) -> PyResult<PyObject> {
- // might be silly to retrieve capsule function in hot loop
+pub fn make_dirstate_item_raw(
+ py: Python,
+ state: u8,
+ mode: i32,
+ size: i32,
+ mtime: i32,
+) -> PyResult<PyObject> {
let make = make_dirstate_item_capi::retrieve(py)?;
-
let maybe_obj = unsafe {
- let ptr = make(b'd' as c_char, 0 as i32, 0 as i32, mtime);
+ let ptr = make(state as c_char, mode, size, mtime);
PyObject::from_owned_ptr_opt(py, ptr)
};
maybe_obj.ok_or_else(|| PyErr::fetch(py))
diff --git a/rust/hg-core/src/dirstate_tree/dispatch.rs b/rust/hg-core/src/dirstate_tree/dispatch.rs
--- a/rust/hg-core/src/dirstate_tree/dispatch.rs
+++ b/rust/hg-core/src/dirstate_tree/dispatch.rs
@@ -259,20 +259,40 @@
/// are `Result`s.
fn iter(&self) -> StateMapIter<'_>;
- /// In the tree dirstate, return an iterator of "directory" (entry-less)
- /// nodes with the data stored for them. This is for `hg debugdirstate
- /// --dirs`.
+ /// Returns an iterator of tracked directories.
///
- /// In the flat dirstate, returns an empty iterator.
+ /// This is the paths for which `has_tracked_dir` would return true.
+ /// Or, in other words, the union of ancestor paths of all paths that have
+ /// an associated entry in a "tracked" state in this dirstate map.
///
/// Because parse errors can happen during iteration, the iterated items
/// are `Result`s.
- fn iter_directories(
+ fn iter_tracked_dirs(
+ &mut self,
+ ) -> Result<
+ Box<
+ dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>>
+ + Send
+ + '_,
+ >,
+ DirstateError,
+ >;
+
+ /// Return an iterator of `(path, (state, mode, size, mtime))` for every
+ /// node stored in this dirstate map, for the purpose of the `hg
+ /// debugdirstate` command.
+ ///
+ /// For nodes that donât have an entry, `state` is the ASCII space.
+ /// An `mtime` may still be present. It is used to optimize `status`.
+ ///
+ /// Because parse errors can happen during iteration, the iterated items
+ /// are `Result`s.
+ fn debug_iter(
&self,
) -> Box<
dyn Iterator<
Item = Result<
- (&HgPath, Option<Timestamp>),
+ (&HgPath, (u8, i32, i32, i32)),
DirstateV2ParseError,
>,
> + Send
@@ -476,17 +496,41 @@
Box::new((&**self).iter().map(|(key, value)| Ok((&**key, *value))))
}
- fn iter_directories(
+ fn iter_tracked_dirs(
+ &mut self,
+ ) -> Result<
+ Box<
+ dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>>
+ + Send
+ + '_,
+ >,
+ DirstateError,
+ > {
+ self.set_all_dirs()?;
+ Ok(Box::new(
+ self.all_dirs
+ .as_ref()
+ .unwrap()
+ .iter()
+ .map(|path| Ok(&**path)),
+ ))
+ }
+
+ fn debug_iter(
&self,
) -> Box<
dyn Iterator<
Item = Result<
- (&HgPath, Option<Timestamp>),
+ (&HgPath, (u8, i32, i32, i32)),
DirstateV2ParseError,
>,
> + Send
+ '_,
> {
- Box::new(std::iter::empty())
+ Box::new(
+ (&**self)
+ .iter()
+ .map(|(path, entry)| Ok((&**path, entry.debug_tuple()))),
+ )
}
}
diff --git a/rust/hg-core/src/dirstate_tree/dirstate_map.rs b/rust/hg-core/src/dirstate_tree/dirstate_map.rs
--- a/rust/hg-core/src/dirstate_tree/dirstate_map.rs
+++ b/rust/hg-core/src/dirstate_tree/dirstate_map.rs
@@ -1246,27 +1246,50 @@
}))
}
- fn iter_directories(
+ fn iter_tracked_dirs(
+ &mut self,
+ ) -> Result<
+ Box<
+ dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>>
+ + Send
+ + '_,
+ >,
+ DirstateError,
+ > {
+ let on_disk = self.on_disk;
+ Ok(Box::new(filter_map_results(
+ self.iter_nodes(),
+ move |node| {
+ Ok(if node.tracked_descendants_count() > 0 {
+ Some(node.full_path(on_disk)?)
+ } else {
+ None
+ })
+ },
+ )))
+ }
+
+ fn debug_iter(
&self,
) -> Box<
dyn Iterator<
Item = Result<
- (&HgPath, Option<Timestamp>),
+ (&HgPath, (u8, i32, i32, i32)),
DirstateV2ParseError,
>,
> + Send
+ '_,
> {
- Box::new(filter_map_results(self.iter_nodes(), move |node| {
- Ok(if node.state()?.is_none() {
- Some((
- node.full_path(self.on_disk)?,
- node.cached_directory_mtime()
- .map(|mtime| Timestamp(mtime.seconds())),
- ))
+ Box::new(self.iter_nodes().map(move |node| {
+ let node = node?;
+ let debug_tuple = if let Some(entry) = node.entry()? {
+ entry.debug_tuple()
+ } else if let Some(mtime) = node.cached_directory_mtime() {
+ (b' ', 0, -1, mtime.seconds() as i32)
} else {
- None
- })
+ (b' ', 0, -1, -1)
+ };
+ Ok((node.full_path(self.on_disk)?, debug_tuple))
}))
}
}
diff --git a/rust/hg-core/src/dirstate.rs b/rust/hg-core/src/dirstate.rs
--- a/rust/hg-core/src/dirstate.rs
+++ b/rust/hg-core/src/dirstate.rs
@@ -65,6 +65,12 @@
let fs_exec_bit = filesystem_metadata.mode() & EXEC_BIT_MASK;
dirstate_exec_bit != fs_exec_bit
}
+
+ /// Returns a `(state, mode, size, mtime)` tuple as for
+ /// `DirstateMapMethods::debug_iter`.
+ pub fn debug_tuple(&self) -> (u8, i32, i32, i32) {
+ (self.state.into(), self.mode, self.size, self.mtime)
+ }
}
#[derive(BytesCast)]
diff --git a/mercurial/dirstatemap.py b/mercurial/dirstatemap.py
--- a/mercurial/dirstatemap.py
+++ b/mercurial/dirstatemap.py
@@ -105,10 +105,6 @@
self._map
return self.copymap
- def directories(self):
- # Rust / dirstate-v2 only
- return []
-
def clear(self):
self._map.clear()
self.copymap.clear()
@@ -126,6 +122,8 @@
# forward for python2,3 compat
iteritems = items
+ debug_iter = items
+
def __len__(self):
return len(self._map)
@@ -527,6 +525,9 @@
def directories(self):
return self._rustmap.directories()
+ def debug_iter(self):
+ return self._rustmap.debug_iter()
+
def preload(self):
self._rustmap
@@ -737,6 +738,6 @@
def dirfoldmap(self):
f = {}
normcase = util.normcase
- for name, _pseudo_entry in self.directories():
+ for name in self._rustmap.tracked_dirs():
f[normcase(name)] = name
return f
diff --git a/mercurial/debugcommands.py b/mercurial/debugcommands.py
--- a/mercurial/debugcommands.py
+++ b/mercurial/debugcommands.py
@@ -942,7 +942,7 @@
),
(b'', b'dates', True, _(b'display the saved mtime')),
(b'', b'datesort', None, _(b'sort by saved mtime')),
- (b'', b'dirs', False, _(b'display directories')),
+ (b'', b'all', False, _(b'display dirstate-v2 tree nodes that would not exist in v1')),
],
_(b'[OPTION]...'),
)
@@ -961,9 +961,10 @@
) # sort by mtime, then by filename
else:
keyfunc = None # sort by filename
- entries = list(pycompat.iteritems(repo.dirstate))
- if opts['dirs']:
- entries.extend(repo.dirstate.directories())
+ if opts['all']:
+ entries = list(repo.dirstate._map.debug_iter())
+ else:
+ entries = list(pycompat.iteritems(repo.dirstate))
entries.sort(key=keyfunc)
for file_, ent in entries:
if ent.v1_mtime() == -1:
To: SimonSapin, #hg-reviewers
Cc: mercurial-patches, mercurial-devel
More information about the Mercurial-devel
mailing list