[Updated] D11794: status: use filesystem time boundary to invalidate racy mtime
marmoute (Pierre-Yves David)
phabricator at mercurial-scm.org
Tue Nov 30 23:56:51 UTC 2021
marmoute edited the summary of this revision.
marmoute retitled this revision from "status: use filesystem time boundary to invalidate racy files" to "status: use filesystem time boundary to invalidate racy mtime".
marmoute updated this revision to Diff 31229.
REPOSITORY
rHG Mercurial
CHANGES SINCE LAST UPDATE
https://phab.mercurial-scm.org/D11794?vs=31116&id=31229
BRANCH
default
CHANGES SINCE LAST ACTION
https://phab.mercurial-scm.org/D11794/new/
REVISION DETAIL
https://phab.mercurial-scm.org/D11794
AFFECTED FILES
hgext/largefiles/lfutil.py
hgext/largefiles/overrides.py
hgext/largefiles/reposetup.py
hgext/remotefilelog/__init__.py
mercurial/context.py
mercurial/dirstate.py
mercurial/narrowspec.py
tests/test-dirstate-race.t
CHANGE DETAILS
diff --git a/tests/test-dirstate-race.t b/tests/test-dirstate-race.t
--- a/tests/test-dirstate-race.t
+++ b/tests/test-dirstate-race.t
@@ -66,11 +66,11 @@
> )
> def extsetup(ui):
> extensions.wrapfunction(context.workingctx, '_checklookup', overridechecklookup)
- > def overridechecklookup(orig, self, files):
+ > def overridechecklookup(orig, self, *args, **kwargs):
> # make an update that changes the dirstate from underneath
> self._repo.ui.system(br"sh '$TESTTMP/dirstaterace.sh'",
> cwd=self._repo.root)
- > return orig(self, files)
+ > return orig(self, *args, **kwargs)
> EOF
$ hg debugrebuilddirstate
diff --git a/mercurial/narrowspec.py b/mercurial/narrowspec.py
--- a/mercurial/narrowspec.py
+++ b/mercurial/narrowspec.py
@@ -323,7 +323,7 @@
removedmatch = matchmod.differencematcher(oldmatch, newmatch)
ds = repo.dirstate
- lookup, status = ds.status(
+ lookup, status, _mtime_boundary = ds.status(
removedmatch, subrepos=[], ignored=True, clean=True, unknown=True
)
trackeddirty = status.modified + status.added
diff --git a/mercurial/dirstate.py b/mercurial/dirstate.py
--- a/mercurial/dirstate.py
+++ b/mercurial/dirstate.py
@@ -1308,11 +1308,20 @@
# Some matchers have yet to be implemented
use_rust = False
+ # Get the time from the filesystem so we can disambiguate files that
+ # appear modified in the present or future.
+ try:
+ mtime_boundary = timestamp.get_fs_now(self._opener)
+ except OSError:
+ # In largefiles or readonly context
+ mtime_boundary = None
+
if use_rust:
try:
- return self._rust_status(
+ res = self._rust_status(
match, listclean, listignored, listunknown
)
+ return res + (mtime_boundary,)
except rustmod.FallbackError:
pass
@@ -1402,7 +1411,7 @@
status = scmutil.status(
modified, added, removed, deleted, unknown, ignored, clean
)
- return (lookup, status)
+ return (lookup, status, mtime_boundary)
def matches(self, match):
"""
diff --git a/mercurial/context.py b/mercurial/context.py
--- a/mercurial/context.py
+++ b/mercurial/context.py
@@ -1796,13 +1796,14 @@
sane.append(f)
return sane
- def _checklookup(self, files):
+ def _checklookup(self, files, mtime_boundary):
# check for any possibly clean files
if not files:
- return [], [], []
+ return [], [], [], []
modified = []
deleted = []
+ clean = []
fixup = []
pctx = self._parents[0]
# do a full compare of any files that might have changed
@@ -1816,22 +1817,35 @@
or pctx[f].cmp(self[f])
):
modified.append(f)
+ elif mtime_boundary is None:
+ clean.append(f)
else:
- # XXX note that we have a race windows here since we gather
- # the stats after we compared so the file might have
- # changed.
- #
- # However this have always been the case because and the
- # refactoring moving the code here is improving the
- # situation by narrowing the race and moving the two steps
- # (comparison + stat) in the same location.
- #
- # Making this code "correct" is now possible.
s = self[f].lstat()
mode = s.st_mode
size = s.st_size
- mtime = timestamp.mtime_of(s)
- fixup.append((f, (mode, size, mtime)))
+ file_mtime = timestamp.mtime_of(s)
+ cache_info = (mode, size, file_mtime)
+
+ file_second = file_mtime[0]
+ boundary_second = mtime_boundary[0]
+ # If the mtime of the ambiguous file is younger (or equal)
+ # to the starting point of the `status` walk, we cannot
+ # garantee that another, racy, write will not happen right
+ # after with the same mtime and we cannot cache the
+ # information.
+ #
+ # However is the mtime is far away in the future, this is
+ # likely some mismatch between the current clock and
+ # previous file system operation. So mtime more than one days
+ # in the future are considered fine.
+ if (
+ boundary_second
+ <= file_second
+ < (3600 * 24 + boundary_second)
+ ):
+ clean.append(f)
+ else:
+ fixup.append((f, cache_info))
except (IOError, OSError):
# A file become inaccessible in between? Mark it as deleted,
# matching dirstate behavior (issue5584).
@@ -1841,7 +1855,7 @@
# it's in the dirstate.
deleted.append(f)
- return modified, deleted, fixup
+ return modified, deleted, clean, fixup
def _poststatusfixup(self, status, fixup):
"""update dirstate for files that are actually clean"""
@@ -1895,17 +1909,21 @@
subrepos = []
if b'.hgsub' in self:
subrepos = sorted(self.substate)
- cmp, s = self._repo.dirstate.status(
+ cmp, s, mtime_boundary = self._repo.dirstate.status(
match, subrepos, ignored=ignored, clean=clean, unknown=unknown
)
# check for any possibly clean files
fixup = []
if cmp:
- modified2, deleted2, fixup = self._checklookup(cmp)
+ modified2, deleted2, clean_set, fixup = self._checklookup(
+ cmp, mtime_boundary
+ )
s.modified.extend(modified2)
s.deleted.extend(deleted2)
+ if clean_set and clean:
+ s.clean.extend(clean_set)
if fixup and clean:
s.clean.extend((f for f, _ in fixup))
diff --git a/hgext/remotefilelog/__init__.py b/hgext/remotefilelog/__init__.py
--- a/hgext/remotefilelog/__init__.py
+++ b/hgext/remotefilelog/__init__.py
@@ -520,7 +520,7 @@
# Prefetch files before status attempts to look at their size and contents
-def checklookup(orig, self, files):
+def checklookup(orig, self, files, mtime_boundary):
repo = self._repo
if isenabled(repo):
prefetchfiles = []
@@ -530,7 +530,7 @@
prefetchfiles.append((f, hex(parent.filenode(f))))
# batch fetch the needed files from the server
repo.fileservice.prefetch(prefetchfiles)
- return orig(self, files)
+ return orig(self, files, mtime_boundary)
# Prefetch the logic that compares added and removed files for renames
diff --git a/hgext/largefiles/reposetup.py b/hgext/largefiles/reposetup.py
--- a/hgext/largefiles/reposetup.py
+++ b/hgext/largefiles/reposetup.py
@@ -197,7 +197,7 @@
match._files = [f for f in match._files if sfindirstate(f)]
# Don't waste time getting the ignored and unknown
# files from lfdirstate
- unsure, s = lfdirstate.status(
+ unsure, s, mtime_boundary = lfdirstate.status(
match,
subrepos=[],
ignored=False,
@@ -230,6 +230,10 @@
size = s.st_size
mtime = timestamp.mtime_of(s)
cache_data = (mode, size, mtime)
+ # We should consider using the mtime_boundary
+ # logic here, but largefile never actually had
+ # ambiguity protection before, so this confuse
+ # the tests and need more thinking.
lfdirstate.set_clean(lfile, cache_data)
else:
tocheck = unsure + modified + added + clean
diff --git a/hgext/largefiles/overrides.py b/hgext/largefiles/overrides.py
--- a/hgext/largefiles/overrides.py
+++ b/hgext/largefiles/overrides.py
@@ -1519,7 +1519,7 @@
return orig(repo, matcher, prefix, uipathfn, opts)
# Get the list of missing largefiles so we can remove them
lfdirstate = lfutil.openlfdirstate(repo.ui, repo)
- unsure, s = lfdirstate.status(
+ unsure, s, mtime_boundary = lfdirstate.status(
matchmod.always(),
subrepos=[],
ignored=False,
@@ -1746,7 +1746,7 @@
# (*1) deprecated, but used internally (e.g: "rebase --collapse")
lfdirstate = lfutil.openlfdirstate(repo.ui, repo)
- unsure, s = lfdirstate.status(
+ unsure, s, mtime_boundary = lfdirstate.status(
matchmod.always(),
subrepos=[],
ignored=False,
diff --git a/hgext/largefiles/lfutil.py b/hgext/largefiles/lfutil.py
--- a/hgext/largefiles/lfutil.py
+++ b/hgext/largefiles/lfutil.py
@@ -244,7 +244,7 @@
def lfdirstatestatus(lfdirstate, repo):
pctx = repo[b'.']
match = matchmod.always()
- unsure, s = lfdirstate.status(
+ unsure, s, mtime_boundary = lfdirstate.status(
match, subrepos=[], ignored=False, clean=False, unknown=False
)
modified, clean = s.modified, s.clean
@@ -263,6 +263,10 @@
size = st.st_size
mtime = timestamp.mtime_of(st)
cache_data = (mode, size, mtime)
+ # We should consider using the mtime_boundary
+ # logic here, but largefile never actually had
+ # ambiguity protection before, so this confuse
+ # the tests and need more thinking.
lfdirstate.set_clean(lfile, cache_data)
return s
@@ -670,7 +674,7 @@
# large.
lfdirstate = openlfdirstate(ui, repo)
dirtymatch = matchmod.always()
- unsure, s = lfdirstate.status(
+ unsure, s, mtime_boundary = lfdirstate.status(
dirtymatch, subrepos=[], ignored=False, clean=False, unknown=False
)
modifiedfiles = unsure + s.modified + s.added + s.removed
To: marmoute, #hg-reviewers, Alphare
Cc: Alphare, mercurial-patches
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.mercurial-scm.org/pipermail/mercurial-patches/attachments/20211130/820be764/attachment-0002.html>
More information about the Mercurial-patches
mailing list