[Updated] D11794: status: use filesystem time boundary to invalidate racy mtime

marmoute (Pierre-Yves David) phabricator at mercurial-scm.org
Tue Nov 30 23:56:51 UTC 2021


marmoute edited the summary of this revision.
marmoute retitled this revision from "status: use filesystem time boundary to invalidate racy files" to "status: use filesystem time boundary to invalidate racy mtime".
marmoute updated this revision to Diff 31229.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D11794?vs=31116&id=31229

BRANCH
  default

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D11794/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D11794

AFFECTED FILES
  hgext/largefiles/lfutil.py
  hgext/largefiles/overrides.py
  hgext/largefiles/reposetup.py
  hgext/remotefilelog/__init__.py
  mercurial/context.py
  mercurial/dirstate.py
  mercurial/narrowspec.py
  tests/test-dirstate-race.t

CHANGE DETAILS

diff --git a/tests/test-dirstate-race.t b/tests/test-dirstate-race.t
--- a/tests/test-dirstate-race.t
+++ b/tests/test-dirstate-race.t
@@ -66,11 +66,11 @@
   > )
   > def extsetup(ui):
   >     extensions.wrapfunction(context.workingctx, '_checklookup', overridechecklookup)
-  > def overridechecklookup(orig, self, files):
+  > def overridechecklookup(orig, self, *args, **kwargs):
   >     # make an update that changes the dirstate from underneath
   >     self._repo.ui.system(br"sh '$TESTTMP/dirstaterace.sh'",
   >                          cwd=self._repo.root)
-  >     return orig(self, files)
+  >     return orig(self, *args, **kwargs)
   > EOF
 
   $ hg debugrebuilddirstate
diff --git a/mercurial/narrowspec.py b/mercurial/narrowspec.py
--- a/mercurial/narrowspec.py
+++ b/mercurial/narrowspec.py
@@ -323,7 +323,7 @@
     removedmatch = matchmod.differencematcher(oldmatch, newmatch)
 
     ds = repo.dirstate
-    lookup, status = ds.status(
+    lookup, status, _mtime_boundary = ds.status(
         removedmatch, subrepos=[], ignored=True, clean=True, unknown=True
     )
     trackeddirty = status.modified + status.added
diff --git a/mercurial/dirstate.py b/mercurial/dirstate.py
--- a/mercurial/dirstate.py
+++ b/mercurial/dirstate.py
@@ -1308,11 +1308,20 @@
             # Some matchers have yet to be implemented
             use_rust = False
 
+        # Get the time from the filesystem so we can disambiguate files that
+        # appear modified in the present or future.
+        try:
+            mtime_boundary = timestamp.get_fs_now(self._opener)
+        except OSError:
+            # In largefiles or readonly context
+            mtime_boundary = None
+
         if use_rust:
             try:
-                return self._rust_status(
+                res = self._rust_status(
                     match, listclean, listignored, listunknown
                 )
+                return res + (mtime_boundary,)
             except rustmod.FallbackError:
                 pass
 
@@ -1402,7 +1411,7 @@
         status = scmutil.status(
             modified, added, removed, deleted, unknown, ignored, clean
         )
-        return (lookup, status)
+        return (lookup, status, mtime_boundary)
 
     def matches(self, match):
         """
diff --git a/mercurial/context.py b/mercurial/context.py
--- a/mercurial/context.py
+++ b/mercurial/context.py
@@ -1796,13 +1796,14 @@
             sane.append(f)
         return sane
 
-    def _checklookup(self, files):
+    def _checklookup(self, files, mtime_boundary):
         # check for any possibly clean files
         if not files:
-            return [], [], []
+            return [], [], [], []
 
         modified = []
         deleted = []
+        clean = []
         fixup = []
         pctx = self._parents[0]
         # do a full compare of any files that might have changed
@@ -1816,22 +1817,35 @@
                     or pctx[f].cmp(self[f])
                 ):
                     modified.append(f)
+                elif mtime_boundary is None:
+                    clean.append(f)
                 else:
-                    # XXX note that we have a race windows here since we gather
-                    # the stats after we compared so the file might have
-                    # changed.
-                    #
-                    # However this have always been the case because and the
-                    # refactoring moving the code here is improving the
-                    # situation by narrowing the race and moving the two steps
-                    # (comparison + stat) in the same location.
-                    #
-                    # Making this code "correct" is now possible.
                     s = self[f].lstat()
                     mode = s.st_mode
                     size = s.st_size
-                    mtime = timestamp.mtime_of(s)
-                    fixup.append((f, (mode, size, mtime)))
+                    file_mtime = timestamp.mtime_of(s)
+                    cache_info = (mode, size, file_mtime)
+
+                    file_second = file_mtime[0]
+                    boundary_second = mtime_boundary[0]
+                    # If the mtime of the ambiguous file is younger (or equal)
+                    # to the starting point of the `status` walk, we cannot
+                    # garantee that another, racy, write will not happen right
+                    # after with the same mtime and we cannot cache the
+                    # information.
+                    #
+                    # However is the mtime is far away in the future, this is
+                    # likely some mismatch between the current clock and
+                    # previous file system operation. So mtime more than one days
+                    # in the future are considered fine.
+                    if (
+                        boundary_second
+                        <= file_second
+                        < (3600 * 24 + boundary_second)
+                    ):
+                        clean.append(f)
+                    else:
+                        fixup.append((f, cache_info))
             except (IOError, OSError):
                 # A file become inaccessible in between? Mark it as deleted,
                 # matching dirstate behavior (issue5584).
@@ -1841,7 +1855,7 @@
                 # it's in the dirstate.
                 deleted.append(f)
 
-        return modified, deleted, fixup
+        return modified, deleted, clean, fixup
 
     def _poststatusfixup(self, status, fixup):
         """update dirstate for files that are actually clean"""
@@ -1895,17 +1909,21 @@
         subrepos = []
         if b'.hgsub' in self:
             subrepos = sorted(self.substate)
-        cmp, s = self._repo.dirstate.status(
+        cmp, s, mtime_boundary = self._repo.dirstate.status(
             match, subrepos, ignored=ignored, clean=clean, unknown=unknown
         )
 
         # check for any possibly clean files
         fixup = []
         if cmp:
-            modified2, deleted2, fixup = self._checklookup(cmp)
+            modified2, deleted2, clean_set, fixup = self._checklookup(
+                cmp, mtime_boundary
+            )
             s.modified.extend(modified2)
             s.deleted.extend(deleted2)
 
+            if clean_set and clean:
+                s.clean.extend(clean_set)
             if fixup and clean:
                 s.clean.extend((f for f, _ in fixup))
 
diff --git a/hgext/remotefilelog/__init__.py b/hgext/remotefilelog/__init__.py
--- a/hgext/remotefilelog/__init__.py
+++ b/hgext/remotefilelog/__init__.py
@@ -520,7 +520,7 @@
 
 
 # Prefetch files before status attempts to look at their size and contents
-def checklookup(orig, self, files):
+def checklookup(orig, self, files, mtime_boundary):
     repo = self._repo
     if isenabled(repo):
         prefetchfiles = []
@@ -530,7 +530,7 @@
                     prefetchfiles.append((f, hex(parent.filenode(f))))
         # batch fetch the needed files from the server
         repo.fileservice.prefetch(prefetchfiles)
-    return orig(self, files)
+    return orig(self, files, mtime_boundary)
 
 
 # Prefetch the logic that compares added and removed files for renames
diff --git a/hgext/largefiles/reposetup.py b/hgext/largefiles/reposetup.py
--- a/hgext/largefiles/reposetup.py
+++ b/hgext/largefiles/reposetup.py
@@ -197,7 +197,7 @@
                     match._files = [f for f in match._files if sfindirstate(f)]
                     # Don't waste time getting the ignored and unknown
                     # files from lfdirstate
-                    unsure, s = lfdirstate.status(
+                    unsure, s, mtime_boundary = lfdirstate.status(
                         match,
                         subrepos=[],
                         ignored=False,
@@ -230,6 +230,10 @@
                                 size = s.st_size
                                 mtime = timestamp.mtime_of(s)
                                 cache_data = (mode, size, mtime)
+                                # We should consider using the mtime_boundary
+                                # logic here, but largefile never actually had
+                                # ambiguity protection before, so this confuse
+                                # the tests and need more thinking.
                                 lfdirstate.set_clean(lfile, cache_data)
                     else:
                         tocheck = unsure + modified + added + clean
diff --git a/hgext/largefiles/overrides.py b/hgext/largefiles/overrides.py
--- a/hgext/largefiles/overrides.py
+++ b/hgext/largefiles/overrides.py
@@ -1519,7 +1519,7 @@
         return orig(repo, matcher, prefix, uipathfn, opts)
     # Get the list of missing largefiles so we can remove them
     lfdirstate = lfutil.openlfdirstate(repo.ui, repo)
-    unsure, s = lfdirstate.status(
+    unsure, s, mtime_boundary = lfdirstate.status(
         matchmod.always(),
         subrepos=[],
         ignored=False,
@@ -1746,7 +1746,7 @@
         # (*1) deprecated, but used internally (e.g: "rebase --collapse")
 
         lfdirstate = lfutil.openlfdirstate(repo.ui, repo)
-        unsure, s = lfdirstate.status(
+        unsure, s, mtime_boundary = lfdirstate.status(
             matchmod.always(),
             subrepos=[],
             ignored=False,
diff --git a/hgext/largefiles/lfutil.py b/hgext/largefiles/lfutil.py
--- a/hgext/largefiles/lfutil.py
+++ b/hgext/largefiles/lfutil.py
@@ -244,7 +244,7 @@
 def lfdirstatestatus(lfdirstate, repo):
     pctx = repo[b'.']
     match = matchmod.always()
-    unsure, s = lfdirstate.status(
+    unsure, s, mtime_boundary = lfdirstate.status(
         match, subrepos=[], ignored=False, clean=False, unknown=False
     )
     modified, clean = s.modified, s.clean
@@ -263,6 +263,10 @@
             size = st.st_size
             mtime = timestamp.mtime_of(st)
             cache_data = (mode, size, mtime)
+            # We should consider using the mtime_boundary
+            # logic here, but largefile never actually had
+            # ambiguity protection before, so this confuse
+            # the tests and need more thinking.
             lfdirstate.set_clean(lfile, cache_data)
     return s
 
@@ -670,7 +674,7 @@
         # large.
         lfdirstate = openlfdirstate(ui, repo)
         dirtymatch = matchmod.always()
-        unsure, s = lfdirstate.status(
+        unsure, s, mtime_boundary = lfdirstate.status(
             dirtymatch, subrepos=[], ignored=False, clean=False, unknown=False
         )
         modifiedfiles = unsure + s.modified + s.added + s.removed



To: marmoute, #hg-reviewers, Alphare
Cc: Alphare, mercurial-patches
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.mercurial-scm.org/pipermail/mercurial-patches/attachments/20211130/820be764/attachment-0002.html>


More information about the Mercurial-patches mailing list