D11099: dirstate-v2: Separate iterators for dirfoldmap and debugdirstate

SimonSapin phabricator at mercurial-scm.org
Fri Jul 16 12:40:14 UTC 2021


SimonSapin created this revision.
Herald added a reviewer: hg-reviewers.
Herald added a subscriber: mercurial-patches.

REVISION SUMMARY
  `dirstatemap.dirfoldmap` was recently changed to re-use a Rust iterator
  that was added for the `hg debugdirstate` command.
  
  That iterator was based on all nodes in the tree dirstate without an entry
  only existing to hold child nodes, and therefore being directories.
  However to optimize status further we may want to store additional nodes
  for unknown or ignored files and directories. At that point the two users
  of this iterator will want different things, so let’s make two iterators
  instead.
  
  See doc-comments in `dispatch.rs`.

REPOSITORY
  rHG Mercurial

BRANCH
  default

REVISION DETAIL
  https://phab.mercurial-scm.org/D11099

AFFECTED FILES
  mercurial/debugcommands.py
  mercurial/dirstatemap.py
  rust/hg-core/src/dirstate.rs
  rust/hg-core/src/dirstate_tree/dirstate_map.rs
  rust/hg-core/src/dirstate_tree/dispatch.rs
  rust/hg-cpython/src/dirstate.rs
  rust/hg-cpython/src/dirstate/dirstate_map.rs
  rust/hg-cpython/src/dirstate/dispatch.rs
  tests/test-completion.t
  tests/test-status.t

CHANGE DETAILS

diff --git a/tests/test-status.t b/tests/test-status.t
--- a/tests/test-status.t
+++ b/tests/test-status.t
@@ -929,23 +929,23 @@
 
 The cached mtime is initially unset
 
-  $ hg debugdirstate --dirs --no-dates | grep '^d'
-  d   0          0 unset               subdir
+  $ hg debugdirstate --all --no-dates | grep '^ '
+      0         -1 unset               subdir
 
 It is still not set when there are unknown files
 
   $ touch subdir/unknown
   $ hg status
   ? subdir/unknown
-  $ hg debugdirstate --dirs --no-dates | grep '^d'
-  d   0          0 unset               subdir
+  $ hg debugdirstate --all --no-dates | grep '^ '
+      0         -1 unset               subdir
 
 Now the directory is eligible for caching, so its mtime is save in the dirstate
 
   $ rm subdir/unknown
   $ hg status
-  $ hg debugdirstate --dirs --no-dates | grep '^d'
-  d   0          0 set                 subdir
+  $ hg debugdirstate --all --no-dates | grep '^ '
+      0         -1 set                 subdir
 
 This time the command should be ever so slightly faster since it does not need `read_dir("subdir")`
 
@@ -963,11 +963,11 @@
 Removing a node from the dirstate resets the cache for its parent directory
 
   $ hg forget subdir/a
-  $ hg debugdirstate --dirs --no-dates | grep '^d'
-  d   0          0 set                 subdir
+  $ hg debugdirstate --all --no-dates | grep '^ '
+      0         -1 set                 subdir
   $ hg ci -qm '#1'
-  $ hg debugdirstate --dirs --no-dates | grep '^d'
-  d   0          0 unset               subdir
+  $ hg debugdirstate --all --no-dates | grep '^ '
+      0         -1 unset               subdir
   $ hg status
   ? subdir/a
 
diff --git a/tests/test-completion.t b/tests/test-completion.t
--- a/tests/test-completion.t
+++ b/tests/test-completion.t
@@ -284,7 +284,7 @@
   debugdate: extended
   debugdeltachain: changelog, manifest, dir, template
   debugdirstateignorepatternshash: 
-  debugdirstate: nodates, dates, datesort, dirs
+  debugdirstate: nodates, dates, datesort, all
   debugdiscovery: old, nonheads, rev, seed, local-as-revs, remote-as-revs, ssh, remotecmd, insecure, template
   debugdownload: output
   debugextensions: template
diff --git a/rust/hg-cpython/src/dirstate/dispatch.rs b/rust/hg-cpython/src/dirstate/dispatch.rs
--- a/rust/hg-cpython/src/dirstate/dispatch.rs
+++ b/rust/hg-cpython/src/dirstate/dispatch.rs
@@ -203,17 +203,30 @@
         self.get().iter()
     }
 
-    fn iter_directories(
+    fn iter_tracked_dirs(
+        &mut self,
+    ) -> Result<
+        Box<
+            dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>>
+                + Send
+                + '_,
+        >,
+        DirstateError,
+    > {
+        self.get_mut().iter_tracked_dirs()
+    }
+
+    fn debug_iter(
         &self,
     ) -> Box<
         dyn Iterator<
                 Item = Result<
-                    (&HgPath, Option<Timestamp>),
+                    (&HgPath, (u8, i32, i32, i32)),
                     DirstateV2ParseError,
                 >,
             > + Send
             + '_,
     > {
-        self.get().iter_directories()
+        self.get().debug_iter()
     }
 }
diff --git a/rust/hg-cpython/src/dirstate/dirstate_map.rs b/rust/hg-cpython/src/dirstate/dirstate_map.rs
--- a/rust/hg-cpython/src/dirstate/dirstate_map.rs
+++ b/rust/hg-cpython/src/dirstate/dirstate_map.rs
@@ -19,8 +19,8 @@
 
 use crate::{
     dirstate::copymap::{CopyMap, CopyMapItemsIterator, CopyMapKeysIterator},
-    dirstate::make_directory_item,
     dirstate::make_dirstate_item,
+    dirstate::make_dirstate_item_raw,
     dirstate::non_normal_entries::{
         NonNormalEntries, NonNormalEntriesIterator,
     },
@@ -61,17 +61,14 @@
         use_dirstate_tree: bool,
         on_disk: PyBytes,
     ) -> PyResult<PyObject> {
-        let dirstate_error = |e: DirstateError| {
-            PyErr::new::<exc::OSError, _>(py, format!("Dirstate error: {:?}", e))
-        };
         let (inner, parents) = if use_dirstate_tree {
             let (map, parents) = OwningDirstateMap::new_v1(py, on_disk)
-                .map_err(dirstate_error)?;
+                .map_err(|e| dirstate_error(py, e))?;
             (Box::new(map) as _, parents)
         } else {
             let bytes = on_disk.data(py);
             let mut map = RustDirstateMap::default();
-            let parents = map.read(bytes).map_err(dirstate_error)?;
+            let parents = map.read(bytes).map_err(|e| dirstate_error(py, e))?;
             (Box::new(map) as _, parents)
         };
         let map = Self::create_instance(py, inner)?;
@@ -550,19 +547,29 @@
         )
     }
 
-    def directories(&self) -> PyResult<PyList> {
+    def tracked_dirs(&self) -> PyResult<PyList> {
         let dirs = PyList::new(py, &[]);
-        for item in self.inner(py).borrow().iter_directories() {
-            let (path, mtime) = item.map_err(|e| v2_error(py, e))?;
+        for path in self.inner(py).borrow_mut().iter_tracked_dirs()
+            .map_err(|e |dirstate_error(py, e))?
+        {
+            let path = path.map_err(|e| v2_error(py, e))?;
             let path = PyBytes::new(py, path.as_bytes());
-            let mtime = mtime.map(|t| t.0).unwrap_or(-1);
-            let item = make_directory_item(py, mtime as i32)?;
-            let tuple = (path, item);
-            dirs.append(py, tuple.to_py_object(py).into_object())
+            dirs.append(py, path.into_object())
         }
         Ok(dirs)
     }
 
+    def debug_iter(&self) -> PyResult<PyList> {
+        let dirs = PyList::new(py, &[]);
+        for item in self.inner(py).borrow().debug_iter() {
+            let (path, (state, mode, size, mtime)) =
+                item.map_err(|e| v2_error(py, e))?;
+            let path = PyBytes::new(py, path.as_bytes());
+            let item = make_dirstate_item_raw(py, state, mode, size, mtime)?;
+            dirs.append(py, (path, item).to_py_object(py).into_object())
+        }
+        Ok(dirs)
+    }
 });
 
 impl DirstateMap {
@@ -616,3 +623,7 @@
 pub(super) fn v2_error(py: Python<'_>, _: DirstateV2ParseError) -> PyErr {
     PyErr::new::<exc::ValueError, _>(py, "corrupted dirstate-v2")
 }
+
+fn dirstate_error(py: Python<'_>, e: DirstateError) -> PyErr {
+    PyErr::new::<exc::OSError, _>(py, format!("Dirstate error: {:?}", e))
+}
diff --git a/rust/hg-cpython/src/dirstate.rs b/rust/hg-cpython/src/dirstate.rs
--- a/rust/hg-cpython/src/dirstate.rs
+++ b/rust/hg-cpython/src/dirstate.rs
@@ -52,9 +52,6 @@
     py: Python,
     entry: &DirstateEntry,
 ) -> PyResult<PyObject> {
-    // might be silly to retrieve capsule function in hot loop
-    let make = make_dirstate_item_capi::retrieve(py)?;
-
     let &DirstateEntry {
         state,
         mode,
@@ -65,22 +62,19 @@
     // because Into<u8> has a specific implementation while `as c_char` would
     // just do a naive enum cast.
     let state_code: u8 = state.into();
-
-    let maybe_obj = unsafe {
-        let ptr = make(state_code as c_char, mode, size, mtime);
-        PyObject::from_owned_ptr_opt(py, ptr)
-    };
-    maybe_obj.ok_or_else(|| PyErr::fetch(py))
+    make_dirstate_item_raw(py, state_code, mode, size, mtime)
 }
 
-// XXX a bit strange to have a dedicated function, but directory are not
-// treated as dirstate node by hg-core for now so…
-pub fn make_directory_item(py: Python, mtime: i32) -> PyResult<PyObject> {
-    // might be silly to retrieve capsule function in hot loop
+pub fn make_dirstate_item_raw(
+    py: Python,
+    state: u8,
+    mode: i32,
+    size: i32,
+    mtime: i32,
+) -> PyResult<PyObject> {
     let make = make_dirstate_item_capi::retrieve(py)?;
-
     let maybe_obj = unsafe {
-        let ptr = make(b'd' as c_char, 0 as i32, 0 as i32, mtime);
+        let ptr = make(state as c_char, mode, size, mtime);
         PyObject::from_owned_ptr_opt(py, ptr)
     };
     maybe_obj.ok_or_else(|| PyErr::fetch(py))
diff --git a/rust/hg-core/src/dirstate_tree/dispatch.rs b/rust/hg-core/src/dirstate_tree/dispatch.rs
--- a/rust/hg-core/src/dirstate_tree/dispatch.rs
+++ b/rust/hg-core/src/dirstate_tree/dispatch.rs
@@ -259,20 +259,40 @@
     /// are `Result`s.
     fn iter(&self) -> StateMapIter<'_>;
 
-    /// In the tree dirstate, return an iterator of "directory" (entry-less)
-    /// nodes with the data stored for them. This is for `hg debugdirstate
-    /// --dirs`.
+    /// Returns an iterator of tracked directories.
     ///
-    /// In the flat dirstate, returns an empty iterator.
+    /// This is the paths for which `has_tracked_dir` would return true.
+    /// Or, in other words, the union of ancestor paths of all paths that have
+    /// an associated entry in a "tracked" state in this dirstate map.
     ///
     /// Because parse errors can happen during iteration, the iterated items
     /// are `Result`s.
-    fn iter_directories(
+    fn iter_tracked_dirs(
+        &mut self,
+    ) -> Result<
+        Box<
+            dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>>
+                + Send
+                + '_,
+        >,
+        DirstateError,
+    >;
+
+    /// Return an iterator of `(path, (state, mode, size, mtime))` for every
+    /// node stored in this dirstate map, for the purpose of the `hg
+    /// debugdirstate` command.
+    ///
+    /// For nodes that don’t have an entry, `state` is the ASCII space.
+    /// An `mtime` may still be present. It is used to optimize `status`.
+    ///
+    /// Because parse errors can happen during iteration, the iterated items
+    /// are `Result`s.
+    fn debug_iter(
         &self,
     ) -> Box<
         dyn Iterator<
                 Item = Result<
-                    (&HgPath, Option<Timestamp>),
+                    (&HgPath, (u8, i32, i32, i32)),
                     DirstateV2ParseError,
                 >,
             > + Send
@@ -476,17 +496,41 @@
         Box::new((&**self).iter().map(|(key, value)| Ok((&**key, *value))))
     }
 
-    fn iter_directories(
+    fn iter_tracked_dirs(
+        &mut self,
+    ) -> Result<
+        Box<
+            dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>>
+                + Send
+                + '_,
+        >,
+        DirstateError,
+    > {
+        self.set_all_dirs()?;
+        Ok(Box::new(
+            self.all_dirs
+                .as_ref()
+                .unwrap()
+                .iter()
+                .map(|path| Ok(&**path)),
+        ))
+    }
+
+    fn debug_iter(
         &self,
     ) -> Box<
         dyn Iterator<
                 Item = Result<
-                    (&HgPath, Option<Timestamp>),
+                    (&HgPath, (u8, i32, i32, i32)),
                     DirstateV2ParseError,
                 >,
             > + Send
             + '_,
     > {
-        Box::new(std::iter::empty())
+        Box::new(
+            (&**self)
+                .iter()
+                .map(|(path, entry)| Ok((&**path, entry.debug_tuple()))),
+        )
     }
 }
diff --git a/rust/hg-core/src/dirstate_tree/dirstate_map.rs b/rust/hg-core/src/dirstate_tree/dirstate_map.rs
--- a/rust/hg-core/src/dirstate_tree/dirstate_map.rs
+++ b/rust/hg-core/src/dirstate_tree/dirstate_map.rs
@@ -1246,27 +1246,50 @@
         }))
     }
 
-    fn iter_directories(
+    fn iter_tracked_dirs(
+        &mut self,
+    ) -> Result<
+        Box<
+            dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>>
+                + Send
+                + '_,
+        >,
+        DirstateError,
+    > {
+        let on_disk = self.on_disk;
+        Ok(Box::new(filter_map_results(
+            self.iter_nodes(),
+            move |node| {
+                Ok(if node.tracked_descendants_count() > 0 {
+                    Some(node.full_path(on_disk)?)
+                } else {
+                    None
+                })
+            },
+        )))
+    }
+
+    fn debug_iter(
         &self,
     ) -> Box<
         dyn Iterator<
                 Item = Result<
-                    (&HgPath, Option<Timestamp>),
+                    (&HgPath, (u8, i32, i32, i32)),
                     DirstateV2ParseError,
                 >,
             > + Send
             + '_,
     > {
-        Box::new(filter_map_results(self.iter_nodes(), move |node| {
-            Ok(if node.state()?.is_none() {
-                Some((
-                    node.full_path(self.on_disk)?,
-                    node.cached_directory_mtime()
-                        .map(|mtime| Timestamp(mtime.seconds())),
-                ))
+        Box::new(self.iter_nodes().map(move |node| {
+            let node = node?;
+            let debug_tuple = if let Some(entry) = node.entry()? {
+                entry.debug_tuple()
+            } else if let Some(mtime) = node.cached_directory_mtime() {
+                (b' ', 0, -1, mtime.seconds() as i32)
             } else {
-                None
-            })
+                (b' ', 0, -1, -1)
+            };
+            Ok((node.full_path(self.on_disk)?, debug_tuple))
         }))
     }
 }
diff --git a/rust/hg-core/src/dirstate.rs b/rust/hg-core/src/dirstate.rs
--- a/rust/hg-core/src/dirstate.rs
+++ b/rust/hg-core/src/dirstate.rs
@@ -65,6 +65,12 @@
         let fs_exec_bit = filesystem_metadata.mode() & EXEC_BIT_MASK;
         dirstate_exec_bit != fs_exec_bit
     }
+
+    /// Returns a `(state, mode, size, mtime)` tuple as for
+    /// `DirstateMapMethods::debug_iter`.
+    pub fn debug_tuple(&self) -> (u8, i32, i32, i32) {
+        (self.state.into(), self.mode, self.size, self.mtime)
+    }
 }
 
 #[derive(BytesCast)]
diff --git a/mercurial/dirstatemap.py b/mercurial/dirstatemap.py
--- a/mercurial/dirstatemap.py
+++ b/mercurial/dirstatemap.py
@@ -105,10 +105,6 @@
         self._map
         return self.copymap
 
-    def directories(self):
-        # Rust / dirstate-v2 only
-        return []
-
     def clear(self):
         self._map.clear()
         self.copymap.clear()
@@ -126,6 +122,8 @@
     # forward for python2,3 compat
     iteritems = items
 
+    debug_iter = items
+
     def __len__(self):
         return len(self._map)
 
@@ -527,6 +525,9 @@
         def directories(self):
             return self._rustmap.directories()
 
+        def debug_iter(self):
+            return self._rustmap.debug_iter()
+
         def preload(self):
             self._rustmap
 
@@ -737,6 +738,6 @@
         def dirfoldmap(self):
             f = {}
             normcase = util.normcase
-            for name, _pseudo_entry in self.directories():
+            for name in self._rustmap.tracked_dirs():
                 f[normcase(name)] = name
             return f
diff --git a/mercurial/debugcommands.py b/mercurial/debugcommands.py
--- a/mercurial/debugcommands.py
+++ b/mercurial/debugcommands.py
@@ -942,7 +942,7 @@
         ),
         (b'', b'dates', True, _(b'display the saved mtime')),
         (b'', b'datesort', None, _(b'sort by saved mtime')),
-        (b'', b'dirs', False, _(b'display directories')),
+        (b'', b'all', False, _(b'display dirstate-v2 tree nodes that would not exist in v1')),
     ],
     _(b'[OPTION]...'),
 )
@@ -961,9 +961,10 @@
         )  # sort by mtime, then by filename
     else:
         keyfunc = None  # sort by filename
-    entries = list(pycompat.iteritems(repo.dirstate))
-    if opts['dirs']:
-        entries.extend(repo.dirstate.directories())
+    if opts['all']:
+        entries = list(repo.dirstate._map.debug_iter())
+    else:
+        entries = list(pycompat.iteritems(repo.dirstate))
     entries.sort(key=keyfunc)
     for file_, ent in entries:
         if ent.v1_mtime() == -1:



To: SimonSapin, #hg-reviewers
Cc: mercurial-patches, mercurial-devel


More information about the Mercurial-devel mailing list