[Updated] [++- ] D11840: rhg: Update the dirstate on disk after status

SimonSapin phabricator at mercurial-scm.org
Mon Dec 6 23:49:23 UTC 2021


SimonSapin updated this revision to Diff 31326.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D11840?vs=31321&id=31326

BRANCH
  default

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D11840/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D11840

AFFECTED FILES
  mercurial/dirstateutils/timestamp.py
  rust/hg-core/src/dirstate/entry.rs
  rust/hg-core/src/dirstate/status.rs
  rust/hg-core/src/dirstate_tree/status.rs
  rust/rhg/src/commands/status.rs
  tests/test-backout.t
  tests/test-dirstate-race2.t
  tests/test-dirstate.t

CHANGE DETAILS

diff --git a/tests/test-dirstate.t b/tests/test-dirstate.t
--- a/tests/test-dirstate.t
+++ b/tests/test-dirstate.t
@@ -9,10 +9,6 @@
   > EOF
 #endif
 
-TODO: fix rhg bugs that make this test fail when status is enabled
-  $ unset RHG_STATUS
-
-
 ------ Test dirstate._dirs refcounting
 
   $ hg init t
diff --git a/tests/test-dirstate-race2.t b/tests/test-dirstate-race2.t
--- a/tests/test-dirstate-race2.t
+++ b/tests/test-dirstate-race2.t
@@ -9,9 +9,6 @@
   > EOF
 #endif
 
-TODO: fix rhg bugs that make this test fail when status is enabled
-  $ unset RHG_STATUS
-
 Checking the size/permissions/file-type of files stored in the
 dirstate after an update where the files are changed concurrently
 outside of hg's control.
diff --git a/tests/test-backout.t b/tests/test-backout.t
--- a/tests/test-backout.t
+++ b/tests/test-backout.t
@@ -172,8 +172,7 @@
   $ hg status -A
   C c
   $ hg debugstate --no-dates
-  n 644         12 set                 c (no-rhg !)
-  n   0         -1 unset               c (rhg known-bad-output !)
+  n 644         12 set                 c
   $ hg backout -d '6 0' -m 'to be rollback-ed soon' -r .
   removing c
   adding b
diff --git a/rust/rhg/src/commands/status.rs b/rust/rhg/src/commands/status.rs
--- a/rust/rhg/src/commands/status.rs
+++ b/rust/rhg/src/commands/status.rs
@@ -13,14 +13,18 @@
 use hg;
 use hg::config::Config;
 use hg::dirstate::has_exec_bit;
-use hg::errors::HgError;
+use hg::dirstate::TruncatedTimestamp;
+use hg::dirstate::RANGE_MASK_31BIT;
+use hg::errors::{HgError, IoResultExt};
+use hg::lock::LockError;
 use hg::manifest::Manifest;
 use hg::matchers::AlwaysMatcher;
 use hg::repo::Repo;
 use hg::utils::files::get_bytes_from_os_string;
-use hg::utils::hg_path::{hg_path_to_os_string, HgPath};
+use hg::utils::hg_path::{hg_path_to_path_buf, HgPath};
 use hg::{HgPathCow, StatusOptions};
 use log::{info, warn};
+use std::io;
 
 pub const HELP_TEXT: &str = "
 Show changed files in the working directory
@@ -229,6 +233,7 @@
             &ds_status.unsure
         );
     }
+    let mut fixup = Vec::new();
     if !ds_status.unsure.is_empty()
         && (display_states.modified || display_states.clean)
     {
@@ -243,8 +248,9 @@
                 }
             } else {
                 if display_states.clean {
-                    ds_status.clean.push(to_check);
+                    ds_status.clean.push(to_check.clone());
                 }
+                fixup.push(to_check.into_owned())
             }
         }
     }
@@ -318,6 +324,71 @@
             b"C",
         )?;
     }
+
+    let mut dirstate_write_needed = ds_status.dirty;
+    let filesystem_time_at_status_start = ds_status
+        .filesystem_time_at_status_start
+        .map(TruncatedTimestamp::from);
+
+    if (fixup.is_empty() || filesystem_time_at_status_start.is_none())
+        && !dirstate_write_needed
+    {
+        // Nothing to update
+        return Ok(());
+    }
+
+    // Update the dirstate on disk if we can
+    let with_lock_result =
+        repo.try_with_wlock_no_wait(|| -> Result<(), CommandError> {
+            if let Some(mtime_boundary) = filesystem_time_at_status_start {
+                for hg_path in fixup {
+                    use std::os::unix::fs::MetadataExt;
+                    let fs_path = hg_path_to_path_buf(&hg_path)
+                        .expect("HgPath conversion");
+                    // Specifically do not reuse `fs_metadata` from
+                    // `unsure_is_clean` which was needed before reading
+                    // contents. Here we access metadata again after reading
+                    // content, in case it changed in the meantime.
+                    let fs_metadata = repo
+                        .working_directory_vfs()
+                        .symlink_metadata(&fs_path)?;
+                    let mtime = TruncatedTimestamp::for_mtime_of(&fs_metadata)
+                        .when_reading_file(&fs_path)?;
+                    if mtime.is_reliable_mtime(&mtime_boundary) {
+                        let mode = fs_metadata.mode();
+                        let size = fs_metadata.len() as u32 & RANGE_MASK_31BIT;
+                        let mut entry = dmap
+                            .get(&hg_path)?
+                            .expect("ambiguous file not in dirstate");
+                        entry.set_clean(mode, size, mtime);
+                        dmap.add_file(&hg_path, entry)?;
+                        dirstate_write_needed = true
+                    }
+                }
+            }
+            drop(dmap); // Avoid "already mutably borrowed" RefCell panics
+            if dirstate_write_needed {
+                repo.write_dirstate()?
+            }
+            Ok(())
+        });
+    match with_lock_result {
+        Ok(closure_result) => closure_result?,
+        Err(LockError::AlreadyHeld) => {
+            // Not updating the dirstate is not ideal but not critical:
+            // don’t keep our caller waiting until some other Mercurial
+            // process releases the lock.
+        }
+        Err(LockError::Other(HgError::IoError { error, .. }))
+            if error.kind() == io::ErrorKind::PermissionDenied =>
+        {
+            // `hg status` on a read-only repository is fine
+        }
+        Err(LockError::Other(error)) => {
+            // Report other I/O errors
+            Err(error)?
+        }
+    }
     Ok(())
 }
 
@@ -368,7 +439,7 @@
     hg_path: &HgPath,
 ) -> Result<bool, HgError> {
     let vfs = repo.working_directory_vfs();
-    let fs_path = hg_path_to_os_string(hg_path).expect("HgPath conversion");
+    let fs_path = hg_path_to_path_buf(hg_path).expect("HgPath conversion");
     let fs_metadata = vfs.symlink_metadata(&fs_path)?;
     let is_symlink = fs_metadata.file_type().is_symlink();
     // TODO: Also account for `FALLBACK_SYMLINK` and `FALLBACK_EXEC` from the
@@ -399,5 +470,5 @@
     } else {
         vfs.read(fs_path)?
     };
-    return Ok(contents_in_p1 != &*fs_contents);
+    Ok(contents_in_p1 != &*fs_contents)
 }
diff --git a/rust/hg-core/src/dirstate_tree/status.rs b/rust/hg-core/src/dirstate_tree/status.rs
--- a/rust/hg-core/src/dirstate_tree/status.rs
+++ b/rust/hg-core/src/dirstate_tree/status.rs
@@ -61,16 +61,21 @@
             (Box::new(|&_| true), vec![], None)
         };
 
+    let filesystem_time_at_status_start = filesystem_now(&root_dir).ok();
+    let outcome = DirstateStatus {
+        filesystem_time_at_status_start,
+        ..Default::default()
+    };
     let common = StatusCommon {
         dmap,
         options,
         matcher,
         ignore_fn,
-        outcome: Default::default(),
+        outcome: Mutex::new(outcome),
         ignore_patterns_have_changed: patterns_changed,
         new_cachable_directories: Default::default(),
         outated_cached_directories: Default::default(),
-        filesystem_time_at_status_start: filesystem_now(&root_dir).ok(),
+        filesystem_time_at_status_start,
     };
     let is_at_repo_root = true;
     let hg_path = &BorrowedPath::OnDisk(HgPath::new(""));
diff --git a/rust/hg-core/src/dirstate/status.rs b/rust/hg-core/src/dirstate/status.rs
--- a/rust/hg-core/src/dirstate/status.rs
+++ b/rust/hg-core/src/dirstate/status.rs
@@ -73,6 +73,10 @@
 
 #[derive(Debug, Default)]
 pub struct DirstateStatus<'a> {
+    /// The current time at the start of the `status()` algorithm, as measured
+    /// and possibly truncated by the filesystem.
+    pub filesystem_time_at_status_start: Option<std::time::SystemTime>,
+
     /// Tracked files whose contents have changed since the parent revision
     pub modified: Vec<HgPathCow<'a>>,
 
diff --git a/rust/hg-core/src/dirstate/entry.rs b/rust/hg-core/src/dirstate/entry.rs
--- a/rust/hg-core/src/dirstate/entry.rs
+++ b/rust/hg-core/src/dirstate/entry.rs
@@ -102,6 +102,35 @@
         }
     }
 
+    /// Returns whether this timestamp is reliable as the "mtime" of a file.
+    ///
+    /// A modification time is reliable if it is older than `boundary` (or
+    /// sufficiently in the future).
+    ///
+    /// Otherwise a concurrent modification might happens with the same mtime.
+    pub fn is_reliable_mtime(&self, boundary: &Self) -> bool {
+        // If the mtime of the ambiguous file is younger (or equal) to the
+        // starting point of the `status` walk, we cannot garantee that
+        // another, racy, write will not happen right after with the same mtime
+        // and we cannot cache the information.
+        //
+        // However if the mtime is far away in the future, this is likely some
+        // mismatch between the current clock and previous file system
+        // operation. So mtime more than one days in the future are considered
+        // fine.
+        if self.truncated_seconds == boundary.truncated_seconds {
+            self.nanoseconds != 0
+                && boundary.nanoseconds != 0
+                && self.nanoseconds < boundary.nanoseconds
+        } else {
+            // `truncated_seconds` is less than 2**31,
+            // so this does not overflow `u32`:
+            let one_day_later = boundary.truncated_seconds + 24 * 3600;
+            self.truncated_seconds < boundary.truncated_seconds
+                || self.truncated_seconds > one_day_later
+        }
+    }
+
     /// The lower 31 bits of the number of seconds since the epoch.
     pub fn truncated_seconds(&self) -> u32 {
         self.truncated_seconds
@@ -191,7 +220,7 @@
 }
 
 const NSEC_PER_SEC: u32 = 1_000_000_000;
-const RANGE_MASK_31BIT: u32 = 0x7FFF_FFFF;
+pub const RANGE_MASK_31BIT: u32 = 0x7FFF_FFFF;
 
 pub const MTIME_UNSET: i32 = -1;
 
diff --git a/mercurial/dirstateutils/timestamp.py b/mercurial/dirstateutils/timestamp.py
--- a/mercurial/dirstateutils/timestamp.py
+++ b/mercurial/dirstateutils/timestamp.py
@@ -96,7 +96,7 @@
     """same as `mtime_of`, but return None if the date might be ambiguous
 
     A modification time is reliable if it is older than "present_time" (or
-    sufficiently in the futur).
+    sufficiently in the future).
 
     Otherwise a concurrent modification might happens with the same mtime.
     """



To: SimonSapin, #hg-reviewers, Alphare
Cc: Alphare, mercurial-patches
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.mercurial-scm.org/pipermail/mercurial-patches/attachments/20211206/09d4266a/attachment-0002.html>


More information about the Mercurial-patches mailing list