D11796: dirstate: remove need_delay logic

marmoute (Pierre-Yves David) phabricator at mercurial-scm.org
Wed Nov 24 11:16:28 UTC 2021


marmoute created this revision.
Herald added a reviewer: hg-reviewers.
Herald added a subscriber: mercurial-patches.

REVISION SUMMARY
  Now that all¹ stored mtime are non ambiguous, we no longer need to apply the `need_delay` step.
  The need delay logic was not great are mtime gathered during longer operation
  could be ambiguous but younger than the `dirstate.write` call time.
  
  So, we don't need that logic anymore and can drop it
  
  [1] except the ones from `hg update`, but `need_delay` no longer help for them
  either.

REPOSITORY
  rHG Mercurial

BRANCH
  default

REVISION DETAIL
  https://phab.mercurial-scm.org/D11796

AFFECTED FILES
  mercurial/cext/parsers.c
  mercurial/dirstate.py
  mercurial/dirstatemap.py
  mercurial/dirstateutils/v2.py
  mercurial/pure/parsers.py
  rust/hg-core/src/dirstate/entry.rs
  rust/hg-core/src/dirstate_tree/dirstate_map.rs
  rust/hg-cpython/src/dirstate/dirstate_map.rs
  rust/hg-cpython/src/dirstate/item.rs
  tests/fakedirstatewritetime.py
  tests/test-merge1.t
  tests/test-subrepo.t

CHANGE DETAILS

diff --git a/tests/test-subrepo.t b/tests/test-subrepo.t
--- a/tests/test-subrepo.t
+++ b/tests/test-subrepo.t
@@ -1021,37 +1021,19 @@
 
 test if untracked file is not overwritten
 
-(this also tests that updated .hgsubstate is treated as "modified",
-when 'merge.update()' is aborted before 'merge.recordupdates()', even
-if none of mode, size and timestamp of it isn't changed on the
-filesystem (see also issue4583))
+(this tests also has a change to update .hgsubstate and merge it within the same second. It should mark is are modified , even if none of mode, size and timestamp of it isn't changed on the filesystem (see also issue4583))
 
   $ echo issue3276_ok > repo/s/b
   $ hg -R repo2 push -f -q
-  $ touch -t 200001010000 repo/.hgsubstate
 
-  $ cat >> repo/.hg/hgrc <<EOF
-  > [fakedirstatewritetime]
-  > # emulate invoking dirstate.write() via repo.status()
-  > # at 2000-01-01 00:00
-  > fakenow = 200001010000
-  > 
-  > [extensions]
-  > fakedirstatewritetime = $TESTDIR/fakedirstatewritetime.py
-  > EOF
   $ hg -R repo update
   b: untracked file differs
   abort: untracked files in working directory differ from files in requested revision (in subrepository "s")
   [255]
-  $ cat >> repo/.hg/hgrc <<EOF
-  > [extensions]
-  > fakedirstatewritetime = !
-  > EOF
 
   $ cat repo/s/b
   issue3276_ok
   $ rm repo/s/b
-  $ touch -t 200001010000 repo/.hgsubstate
   $ hg -R repo revert --all
   reverting repo/.hgsubstate
   reverting subrepo s
diff --git a/tests/test-merge1.t b/tests/test-merge1.t
--- a/tests/test-merge1.t
+++ b/tests/test-merge1.t
@@ -349,6 +349,8 @@
 aren't changed), even if none of mode, size and timestamp of them
 isn't changed on the filesystem (see also issue4583).
 
+This test is now "best effort" as the mechanism to prevent such race are getting better, it get more complicated to test a specific scénarion that would trigger it. If you see flakyness here, there is a race.
+
   $ cat > $TESTTMP/abort.py <<EOF
   > from __future__ import absolute_import
   > # emulate aborting before "recordupdates()". in this case, files
@@ -365,13 +367,6 @@
   >     extensions.wrapfunction(merge, "applyupdates", applyupdates)
   > EOF
 
-  $ cat >> .hg/hgrc <<EOF
-  > [fakedirstatewritetime]
-  > # emulate invoking dirstate.write() via repo.status()
-  > # at 2000-01-01 00:00
-  > fakenow = 200001010000
-  > EOF
-
 (file gotten from other revision)
 
   $ hg update -q -C 2
@@ -381,12 +376,8 @@
   $ hg update -q -C 3
   $ cat b
   This is file b1
-  $ touch -t 200001010000 b
-  $ hg debugrebuildstate
-
   $ cat >> .hg/hgrc <<EOF
   > [extensions]
-  > fakedirstatewritetime = $TESTDIR/fakedirstatewritetime.py
   > abort = $TESTTMP/abort.py
   > EOF
   $ hg merge 5
@@ -394,13 +385,11 @@
   [255]
   $ cat >> .hg/hgrc <<EOF
   > [extensions]
-  > fakedirstatewritetime = !
   > abort = !
   > EOF
 
   $ cat b
   THIS IS FILE B5
-  $ touch -t 200001010000 b
   $ hg status -A b
   M b
 
@@ -413,12 +402,10 @@
 
   $ cat b
   this is file b6
-  $ touch -t 200001010000 b
-  $ hg debugrebuildstate
+  $ hg status
 
   $ cat >> .hg/hgrc <<EOF
   > [extensions]
-  > fakedirstatewritetime = $TESTDIR/fakedirstatewritetime.py
   > abort = $TESTTMP/abort.py
   > EOF
   $ hg merge --tool internal:other 5
@@ -426,13 +413,11 @@
   [255]
   $ cat >> .hg/hgrc <<EOF
   > [extensions]
-  > fakedirstatewritetime = !
   > abort = !
   > EOF
 
   $ cat b
   THIS IS FILE B5
-  $ touch -t 200001010000 b
   $ hg status -A b
   M b
 
diff --git a/tests/fakedirstatewritetime.py b/tests/fakedirstatewritetime.py
--- a/tests/fakedirstatewritetime.py
+++ b/tests/fakedirstatewritetime.py
@@ -37,14 +37,8 @@
 has_rust_dirstate = policy.importrust('dirstate') is not None
 
 
-def pack_dirstate(fakenow, orig, dmap, copymap, pl, now):
-    # execute what original parsers.pack_dirstate should do actually
-    # for consistency
-    for f, e in dmap.items():
-        if e.need_delay(now):
-            e.set_possibly_dirty()
-
-    return orig(dmap, copymap, pl, fakenow)
+def pack_dirstate(orig, dmap, copymap, pl):
+    return orig(dmap, copymap, pl)
 
 
 def fakewrite(ui, func):
@@ -67,19 +61,19 @@
         # The Rust implementation does not use public parse/pack dirstate
         # to prevent conversion round-trips
         orig_dirstatemap_write = dirstatemapmod.dirstatemap.write
-        wrapper = lambda self, tr, st, now: orig_dirstatemap_write(
-            self, tr, st, fakenow
-        )
+        wrapper = lambda self, tr, st: orig_dirstatemap_write(self, tr, st)
         dirstatemapmod.dirstatemap.write = wrapper
 
     orig_get_fs_now = timestamp.get_fs_now
-    wrapper = lambda *args: pack_dirstate(fakenow, orig_pack_dirstate, *args)
+    wrapper = lambda *args: pack_dirstate(orig_pack_dirstate, *args)
 
     orig_module = parsers
     orig_pack_dirstate = parsers.pack_dirstate
 
     orig_module.pack_dirstate = wrapper
-    timestamp.get_fs_now = lambda *args: fakenow
+    timestamp.get_fs_now = (
+        lambda *args: fakenow
+    )  # XXX useless for this purpose now
     try:
         return func()
     finally:
diff --git a/rust/hg-cpython/src/dirstate/item.rs b/rust/hg-cpython/src/dirstate/item.rs
--- a/rust/hg-cpython/src/dirstate/item.rs
+++ b/rust/hg-cpython/src/dirstate/item.rs
@@ -194,11 +194,6 @@
         Ok(mtime)
     }
 
-    def need_delay(&self, now: (u32, u32)) -> PyResult<bool> {
-        let now = timestamp(py, now)?;
-        Ok(self.entry(py).get().need_delay(now))
-    }
-
     def mtime_likely_equal_to(&self, other: (u32, u32)) -> PyResult<bool> {
         if let Some(mtime) = self.entry(py).get().truncated_mtime() {
             Ok(mtime.likely_equal(timestamp(py, other)?))
diff --git a/rust/hg-cpython/src/dirstate/dirstate_map.rs b/rust/hg-cpython/src/dirstate/dirstate_map.rs
--- a/rust/hg-cpython/src/dirstate/dirstate_map.rs
+++ b/rust/hg-cpython/src/dirstate/dirstate_map.rs
@@ -18,7 +18,7 @@
 
 use crate::{
     dirstate::copymap::{CopyMap, CopyMapItemsIterator, CopyMapKeysIterator},
-    dirstate::item::{timestamp, DirstateItem},
+    dirstate::item::DirstateItem,
     pybytes_deref::PyBytesDeref,
 };
 use hg::{
@@ -194,16 +194,13 @@
         &self,
         p1: PyObject,
         p2: PyObject,
-        now: (u32, u32)
     ) -> PyResult<PyBytes> {
-        let now = timestamp(py, now)?;
-
         let mut inner = self.inner(py).borrow_mut();
         let parents = DirstateParents {
             p1: extract_node_id(py, &p1)?,
             p2: extract_node_id(py, &p2)?,
         };
-        let result = inner.pack_v1(parents, now);
+        let result = inner.pack_v1(parents);
         match result {
             Ok(packed) => Ok(PyBytes::new(py, &packed)),
             Err(_) => Err(PyErr::new::<exc::OSError, _>(
@@ -218,13 +215,10 @@
     /// instead of written to a new data file (False).
     def write_v2(
         &self,
-        now: (u32, u32),
         can_append: bool,
     ) -> PyResult<PyObject> {
-        let now = timestamp(py, now)?;
-
         let mut inner = self.inner(py).borrow_mut();
-        let result = inner.pack_v2(now, can_append);
+        let result = inner.pack_v2(can_append);
         match result {
             Ok((packed, tree_metadata, append)) => {
                 let packed = PyBytes::new(py, &packed);
diff --git a/rust/hg-core/src/dirstate_tree/dirstate_map.rs b/rust/hg-core/src/dirstate_tree/dirstate_map.rs
--- a/rust/hg-core/src/dirstate_tree/dirstate_map.rs
+++ b/rust/hg-core/src/dirstate_tree/dirstate_map.rs
@@ -677,25 +677,6 @@
         })
     }
 
-    fn clear_known_ambiguous_mtimes(
-        &mut self,
-        paths: &[impl AsRef<HgPath>],
-    ) -> Result<(), DirstateV2ParseError> {
-        for path in paths {
-            if let Some(node) = Self::get_node_mut(
-                self.on_disk,
-                &mut self.unreachable_bytes,
-                &mut self.root,
-                path.as_ref(),
-            )? {
-                if let NodeData::Entry(entry) = &mut node.data {
-                    entry.set_possibly_dirty();
-                }
-            }
-        }
-        Ok(())
-    }
-
     fn count_dropped_path(unreachable_bytes: &mut u32, path: &Cow<HgPath>) {
         if let Cow::Borrowed(path) = path {
             *unreachable_bytes += path.len() as u32
@@ -930,29 +911,20 @@
     pub fn pack_v1(
         &mut self,
         parents: DirstateParents,
-        now: TruncatedTimestamp,
     ) -> Result<Vec<u8>, DirstateError> {
         let map = self.get_map_mut();
-        let mut ambiguous_mtimes = Vec::new();
         // Optizimation (to be measured?): pre-compute size to avoid `Vec`
         // reallocations
         let mut size = parents.as_bytes().len();
         for node in map.iter_nodes() {
             let node = node?;
-            if let Some(entry) = node.entry()? {
+            if node.entry()?.is_some() {
                 size += packed_entry_size(
                     node.full_path(map.on_disk)?,
                     node.copy_source(map.on_disk)?,
                 );
-                if entry.need_delay(now) {
-                    ambiguous_mtimes.push(
-                        node.full_path_borrowed(map.on_disk)?
-                            .detach_from_tree(),
-                    )
-                }
             }
         }
-        map.clear_known_ambiguous_mtimes(&ambiguous_mtimes)?;
 
         let mut packed = Vec::with_capacity(size);
         packed.extend(parents.as_bytes());
@@ -978,26 +950,9 @@
     #[timed]
     pub fn pack_v2(
         &mut self,
-        now: TruncatedTimestamp,
         can_append: bool,
     ) -> Result<(Vec<u8>, Vec<u8>, bool), DirstateError> {
         let map = self.get_map_mut();
-        let mut paths = Vec::new();
-        for node in map.iter_nodes() {
-            let node = node?;
-            if let Some(entry) = node.entry()? {
-                if entry.need_delay(now) {
-                    paths.push(
-                        node.full_path_borrowed(map.on_disk)?
-                            .detach_from_tree(),
-                    )
-                }
-            }
-        }
-        // Borrow of `self` ends here since we collect cloned paths
-
-        map.clear_known_ambiguous_mtimes(&paths)?;
-
         on_disk::write(map, can_append)
     }
 
diff --git a/rust/hg-core/src/dirstate/entry.rs b/rust/hg-core/src/dirstate/entry.rs
--- a/rust/hg-core/src/dirstate/entry.rs
+++ b/rust/hg-core/src/dirstate/entry.rs
@@ -592,16 +592,6 @@
     pub fn debug_tuple(&self) -> (u8, i32, i32, i32) {
         (self.state().into(), self.mode(), self.size(), self.mtime())
     }
-
-    /// True if the stored mtime would be ambiguous with the current time
-    pub fn need_delay(&self, now: TruncatedTimestamp) -> bool {
-        if let Some(mtime) = self.mtime {
-            self.state() == EntryState::Normal
-                && mtime.truncated_seconds() == now.truncated_seconds()
-        } else {
-            false
-        }
-    }
 }
 
 impl EntryState {
diff --git a/mercurial/pure/parsers.py b/mercurial/pure/parsers.py
--- a/mercurial/pure/parsers.py
+++ b/mercurial/pure/parsers.py
@@ -536,10 +536,6 @@
         else:
             return self._mtime_s
 
-    def need_delay(self, now):
-        """True if the stored mtime would be ambiguous with the current time"""
-        return self.v1_state() == b'n' and self._mtime_s == now[0]
-
 
 def gettype(q):
     return int(q & 0xFFFF)
@@ -905,23 +901,11 @@
     return parents
 
 
-def pack_dirstate(dmap, copymap, pl, now):
+def pack_dirstate(dmap, copymap, pl):
     cs = stringio()
     write = cs.write
     write(b"".join(pl))
     for f, e in pycompat.iteritems(dmap):
-        if e.need_delay(now):
-            # The file was last modified "simultaneously" with the current
-            # write to dirstate (i.e. within the same second for file-
-            # systems with a granularity of 1 sec). This commonly happens
-            # for at least a couple of files on 'update'.
-            # The user could change the file without changing its size
-            # within the same second. Invalidate the file's mtime in
-            # dirstate, forcing future 'status' calls to compare the
-            # contents of the file if the size is the same. This prevents
-            # mistakenly treating such files as clean.
-            e.set_possibly_dirty()
-
         if f in copymap:
             f = b"%s\0%s" % (f, copymap[f])
         e = _pack(
diff --git a/mercurial/dirstateutils/v2.py b/mercurial/dirstateutils/v2.py
--- a/mercurial/dirstateutils/v2.py
+++ b/mercurial/dirstateutils/v2.py
@@ -174,12 +174,10 @@
         )
 
 
-def pack_dirstate(map, copy_map, now):
+def pack_dirstate(map, copy_map):
     """
     Pack `map` and `copy_map` into the dirstate v2 binary format and return
     the bytearray.
-    `now` is a timestamp of the current filesystem time used to detect race
-    conditions in writing the dirstate to disk, see inline comment.
 
     The on-disk format expects a tree-like structure where the leaves are
     written first (and sorted per-directory), going up levels until the root
@@ -284,17 +282,6 @@
     stack.append(current_node)
 
     for index, (path, entry) in enumerate(sorted_map, 1):
-        if entry.need_delay(now):
-            # The file was last modified "simultaneously" with the current
-            # write to dirstate (i.e. within the same second for file-
-            # systems with a granularity of 1 sec). This commonly happens
-            # for at least a couple of files on 'update'.
-            # The user could change the file without changing its size
-            # within the same second. Invalidate the file's mtime in
-            # dirstate, forcing future 'status' calls to compare the
-            # contents of the file if the size is the same. This prevents
-            # mistakenly treating such files as clean.
-            entry.set_possibly_dirty()
         nodes_with_entry_count += 1
         if path in copy_map:
             nodes_with_copy_source_count += 1
diff --git a/mercurial/dirstatemap.py b/mercurial/dirstatemap.py
--- a/mercurial/dirstatemap.py
+++ b/mercurial/dirstatemap.py
@@ -446,11 +446,11 @@
 
     def write(self, tr, st, now):
         if self._use_dirstate_v2:
-            packed, meta = v2.pack_dirstate(self._map, self.copymap, now)
+            packed, meta = v2.pack_dirstate(self._map, self.copymap)
             self.write_v2_no_append(tr, st, meta, packed)
         else:
             packed = parsers.pack_dirstate(
-                self._map, self.copymap, self.parents(), now
+                self._map, self.copymap, self.parents()
             )
             st.write(packed)
             st.close()
@@ -658,7 +658,7 @@
         def write(self, tr, st, now):
             if not self._use_dirstate_v2:
                 p1, p2 = self.parents()
-                packed = self._map.write_v1(p1, p2, now)
+                packed = self._map.write_v1(p1, p2)
                 st.write(packed)
                 st.close()
                 self._dirtyparents = False
@@ -666,7 +666,7 @@
 
             # We can only append to an existing data file if there is one
             can_append = self.docket.uuid is not None
-            packed, meta, append = self._map.write_v2(now, can_append)
+            packed, meta, append = self._map.write_v2(can_append)
             if append:
                 docket = self.docket
                 data_filename = docket.data_filename()
diff --git a/mercurial/dirstate.py b/mercurial/dirstate.py
--- a/mercurial/dirstate.py
+++ b/mercurial/dirstate.py
@@ -779,25 +779,6 @@
             # filesystem's notion of 'now'
             now = timestamp.mtime_of(util.fstat(st))
 
-        # enough 'delaywrite' prevents 'pack_dirstate' from dropping
-        # timestamp of each entries in dirstate, because of 'now > mtime'
-        delaywrite = self._ui.configint(b'debug', b'dirstate.delaywrite')
-        if delaywrite > 0:
-            # do we have any files to delay for?
-            for f, e in pycompat.iteritems(self._map):
-                if e.need_delay(now):
-                    import time  # to avoid useless import
-
-                    # rather than sleep n seconds, sleep until the next
-                    # multiple of n seconds
-                    clock = time.time()
-                    start = int(clock) - (int(clock) % delaywrite)
-                    end = start + delaywrite
-                    time.sleep(end - clock)
-                    # trust our estimate that the end is near now
-                    now = timestamp.timestamp((end, 0))
-                    break
-
         self._map.write(tr, st, now)
         self._dirty = False
 
diff --git a/mercurial/cext/parsers.c b/mercurial/cext/parsers.c
--- a/mercurial/cext/parsers.c
+++ b/mercurial/cext/parsers.c
@@ -320,21 +320,6 @@
 	return PyInt_FromLong(dirstate_item_c_v1_mtime(self));
 };
 
-static PyObject *dirstate_item_need_delay(dirstateItemObject *self,
-                                          PyObject *now)
-{
-	int now_s;
-	int now_ns;
-	if (!PyArg_ParseTuple(now, "ii", &now_s, &now_ns)) {
-		return NULL;
-	}
-	if (dirstate_item_c_v1_state(self) == 'n' && self->mtime_s == now_s) {
-		Py_RETURN_TRUE;
-	} else {
-		Py_RETURN_FALSE;
-	}
-};
-
 static PyObject *dirstate_item_mtime_likely_equal_to(dirstateItemObject *self,
                                                      PyObject *other)
 {
@@ -548,8 +533,6 @@
      "return a \"size\" suitable for v1 serialization"},
     {"v1_mtime", (PyCFunction)dirstate_item_v1_mtime, METH_NOARGS,
      "return a \"mtime\" suitable for v1 serialization"},
-    {"need_delay", (PyCFunction)dirstate_item_need_delay, METH_O,
-     "True if the stored mtime would be ambiguous with the current time"},
     {"mtime_likely_equal_to", (PyCFunction)dirstate_item_mtime_likely_equal_to,
      METH_O, "True if the stored mtime is likely equal to the given mtime"},
     {"from_v1_data", (PyCFunction)dirstate_item_from_v1_meth,
@@ -922,12 +905,9 @@
 	Py_ssize_t nbytes, pos, l;
 	PyObject *k, *v = NULL, *pn;
 	char *p, *s;
-	int now_s;
-	int now_ns;
 
-	if (!PyArg_ParseTuple(args, "O!O!O!(ii):pack_dirstate", &PyDict_Type,
-	                      &map, &PyDict_Type, &copymap, &PyTuple_Type, &pl,
-	                      &now_s, &now_ns)) {
+	if (!PyArg_ParseTuple(args, "O!O!O!:pack_dirstate", &PyDict_Type, &map,
+	                      &PyDict_Type, &copymap, &PyTuple_Type, &pl)) {
 		return NULL;
 	}
 
@@ -996,21 +976,6 @@
 		mode = dirstate_item_c_v1_mode(tuple);
 		size = dirstate_item_c_v1_size(tuple);
 		mtime = dirstate_item_c_v1_mtime(tuple);
-		if (state == 'n' && tuple->mtime_s == now_s) {
-			/* See pure/parsers.py:pack_dirstate for why we do
-			 * this. */
-			mtime = -1;
-			mtime_unset = (PyObject *)dirstate_item_from_v1_data(
-			    state, mode, size, mtime);
-			if (!mtime_unset) {
-				goto bail;
-			}
-			if (PyDict_SetItem(map, k, mtime_unset) == -1) {
-				goto bail;
-			}
-			Py_DECREF(mtime_unset);
-			mtime_unset = NULL;
-		}
 		*p++ = state;
 		putbe32((uint32_t)mode, p);
 		putbe32((uint32_t)size, p + 4);



To: marmoute, #hg-reviewers
Cc: mercurial-patches, mercurial-devel


More information about the Mercurial-devel mailing list