[Request] [++- ] D10907: dirstate-v2: Drop cached read_dir results after .hgignore changes

SimonSapin phabricator at mercurial-scm.org
Thu Jun 24 20:52:37 UTC 2021


SimonSapin created this revision.
Herald added a reviewer: hg-reviewers.
Herald added a subscriber: mercurial-patches.

REVISION SUMMARY
  Soon we’ll want the status algorithm to be able to skip `std::fs::read_dir` in
  more cases, notabling when listing unknown files but not ignored files.
  
  When ignore patterns change (which we detect by their hash, added to the
  dirstate-v2 format in a previous changeset), a formerly-ignored file could
  become unknown without changing its parent directory’s modification time.
  Therefore we remove any directory mtime from the dirstate, effictively
  invalidating the existing caches.

REPOSITORY
  rHG Mercurial

BRANCH
  default

REVISION DETAIL
  https://phab.mercurial-scm.org/D10907

AFFECTED FILES
  rust/hg-core/src/dirstate_tree/dirstate_map.rs
  rust/hg-core/src/dirstate_tree/status.rs

CHANGE DETAILS

diff --git a/rust/hg-core/src/dirstate_tree/status.rs b/rust/hg-core/src/dirstate_tree/status.rs
--- a/rust/hg-core/src/dirstate_tree/status.rs
+++ b/rust/hg-core/src/dirstate_tree/status.rs
@@ -6,7 +6,6 @@
 use crate::dirstate_tree::dirstate_map::NodeRef;
 use crate::dirstate_tree::on_disk::DirstateV2ParseError;
 use crate::dirstate_tree::on_disk::Timestamp;
-use crate::dirstate_tree::path_with_basename::WithBasename;
 use crate::matchers::get_ignore_function;
 use crate::matchers::Matcher;
 use crate::utils::files::get_bytes_from_os_string;
@@ -70,6 +69,7 @@
         outcome: Default::default(),
         ignore_patterns_have_changed: patterns_changed,
         new_cachable_directories: Default::default(),
+        outated_cached_directories: Default::default(),
         filesystem_time_at_status_start: filesystem_now(&root_dir).ok(),
     };
     let is_at_repo_root = true;
@@ -91,18 +91,22 @@
     )?;
     let mut outcome = common.outcome.into_inner().unwrap();
     let new_cachable = common.new_cachable_directories.into_inner().unwrap();
+    let outdated = common.outated_cached_directories.into_inner().unwrap();
 
     outcome.dirty = common.ignore_patterns_have_changed == Some(true)
+        || !outdated.is_empty()
         || !new_cachable.is_empty();
 
+    // Remove outdated mtimes before adding new mtimes, in case a given
+    // directory is both
+    for path in &outdated {
+        let node = dmap.get_or_insert(path)?;
+        if let NodeData::CachedDirectory { .. } = &node.data {
+            node.data = NodeData::None
+        }
+    }
     for (path, mtime) in &new_cachable {
-        let node = DirstateMap::get_or_insert_node(
-            dmap.on_disk,
-            &mut dmap.root,
-            path,
-            WithBasename::to_cow_owned,
-            |_| {},
-        )?;
+        let node = dmap.get_or_insert(path)?;
         match &node.data {
             NodeData::Entry(_) => {} // Don’t overwrite an entry
             NodeData::CachedDirectory { .. } | NodeData::None => {
@@ -123,6 +127,7 @@
     ignore_fn: IgnoreFnType<'a>,
     outcome: Mutex<DirstateStatus<'on_disk>>,
     new_cachable_directories: Mutex<Vec<(Cow<'on_disk, HgPath>, Timestamp)>>,
+    outated_cached_directories: Mutex<Vec<Cow<'on_disk, HgPath>>>,
 
     /// Whether ignore files like `.hgignore` have changed since the previous
     /// time a `status()` call wrote their hash to the dirstate. `None` means
@@ -155,6 +160,22 @@
             .push((hg_path.to_owned().into(), BadMatch::OsError(errno)))
     }
 
+    fn check_for_outdated_directory_cache(
+        &self,
+        dirstate_node: &NodeRef<'tree, 'on_disk>,
+    ) -> Result<(), DirstateV2ParseError> {
+        if self.ignore_patterns_have_changed == Some(true)
+            && dirstate_node.cached_directory_mtime().is_some()
+        {
+            self.outated_cached_directories.lock().unwrap().push(
+                dirstate_node
+                    .full_path_borrowed(self.dmap.on_disk)?
+                    .detach_from_tree(),
+            )
+        }
+        Ok(())
+    }
+
     /// If this returns true, we can get accurate results by only using
     /// `symlink_metadata` for child nodes that exist in the dirstate and don’t
     /// need to call `read_dir`.
@@ -304,6 +325,7 @@
         dirstate_node: NodeRef<'tree, 'on_disk>,
         has_ignored_ancestor: bool,
     ) -> Result<(), DirstateV2ParseError> {
+        self.check_for_outdated_directory_cache(&dirstate_node)?;
         let hg_path = &dirstate_node.full_path_borrowed(self.dmap.on_disk)?;
         let file_type = fs_metadata.file_type();
         let file_or_symlink = file_type.is_file() || file_type.is_symlink();
@@ -521,6 +543,7 @@
         &self,
         dirstate_node: NodeRef<'tree, 'on_disk>,
     ) -> Result<(), DirstateV2ParseError> {
+        self.check_for_outdated_directory_cache(&dirstate_node)?;
         self.mark_removed_or_deleted_if_file(
             &dirstate_node.full_path_borrowed(self.dmap.on_disk)?,
             dirstate_node.state()?,
diff --git a/rust/hg-core/src/dirstate_tree/dirstate_map.rs b/rust/hg-core/src/dirstate_tree/dirstate_map.rs
--- a/rust/hg-core/src/dirstate_tree/dirstate_map.rs
+++ b/rust/hg-core/src/dirstate_tree/dirstate_map.rs
@@ -495,6 +495,19 @@
         }
     }
 
+    pub(super) fn get_or_insert<'tree, 'path>(
+        &'tree mut self,
+        path: &HgPath,
+    ) -> Result<&'tree mut Node<'on_disk>, DirstateV2ParseError> {
+        Self::get_or_insert_node(
+            self.on_disk,
+            &mut self.root,
+            path,
+            WithBasename::to_cow_owned,
+            |_| {},
+        )
+    }
+
     pub(super) fn get_or_insert_node<'tree, 'path>(
         on_disk: &'on_disk [u8],
         root: &'tree mut ChildNodes<'on_disk>,



To: SimonSapin, #hg-reviewers
Cc: mercurial-patches, mercurial-devel
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mercurial-scm.org/pipermail/mercurial-patches/attachments/20210624/38a99dc4/attachment-0001.html>


More information about the Mercurial-patches mailing list