D11750: commit: prevent possible race that results in bad dirstate
valentin.gatienbaron (Valentin Gatien-Baron)
phabricator at mercurial-scm.org
Thu Nov 11 01:25:59 UTC 2021
valentin.gatienbaron created this revision.
Herald added a reviewer: hg-reviewers.
Herald added a subscriber: mercurial-patches.
REVISION SUMMARY
I'm getting reports of wrongly clean hg status, that get fixed by
running hg debugrebuilddirstate, which I suspect is the issue
below. And if it's not, it still seems good to fix.
`hg commit` works by adding new version of files from the working copy
into the store, then later lstat'ing the files in the working copy to
mark them as clean in the dirstate. This is racy, since the files can
be modified in the meantime.
Reduce the race by doing the lstat immediately after adding new file
versions into the store, and guarantee that the file size recorded in
the dirstate is correct (i.e. is the file size from the store).
REPOSITORY
rHG Mercurial
BRANCH
default
REVISION DETAIL
https://phab.mercurial-scm.org/D11750
AFFECTED FILES
hgext/eol.py
hgext/keyword.py
hgext/largefiles/reposetup.py
hgext/lfs/__init__.py
hgext/remotefilelog/shallowrepo.py
mercurial/commit.py
mercurial/context.py
mercurial/interfaces/repository.py
mercurial/localrepo.py
tests/fakedirstatewritetime.py
tests/test-annotate.t
tests/test-commandserver.t
tests/test-dirstate-race2.t
tests/test-fastannotate-hg.t
CHANGE DETAILS
diff --git a/tests/test-fastannotate-hg.t b/tests/test-fastannotate-hg.t
--- a/tests/test-fastannotate-hg.t
+++ b/tests/test-fastannotate-hg.t
@@ -484,7 +484,7 @@
> from __future__ import absolute_import
> from mercurial import commit, error, extensions
> def _filecommit(orig, repo, fctx, manifest1, manifest2,
- > linkrev, tr, includecopymeta, ms):
+ > linkrev, tr, includecopymeta, ms, filedata):
> fname = fctx.path()
> text = fctx.data()
> flog = repo.file(fname)
diff --git a/tests/test-dirstate-race2.t b/tests/test-dirstate-race2.t
--- a/tests/test-dirstate-race2.t
+++ b/tests/test-dirstate-race2.t
@@ -54,8 +54,8 @@
$ echo a > a; hg status; hg diff
Do a commit where file 'a' is changed between hg committing its new
-revision into the repository, and the writing of the dirstate. This
-results in a corrupted dirstate (size doesn't match committed size).
+revision into the repository, and the writing of the dirstate. The
+size in the dirstate is correct nonetheless, and so a diff is shown.
$ echo aaa > a; hg commit -qm _
$ hg merge -qr 1; hg resolve -m; rm a.orig
@@ -71,9 +71,12 @@
m 0 -2 (set |unset) a (re)
$ hg commit -m _ --config extensions.race=$TESTTMP/dirstaterace.py
$ hg debugdirstate --no-dates
- n 644 0 (set |unset) a (re)
+ n 644 105 (set |unset) a (re)
$ cat a | wc -c
*0 (re)
$ hg cat -r . a | wc -c
*105 (re)
$ hg status; hg diff --stat
+ M a
+ a | 5 -----
+ 1 files changed, 0 insertions(+), 5 deletions(-)
diff --git a/tests/test-commandserver.t b/tests/test-commandserver.t
--- a/tests/test-commandserver.t
+++ b/tests/test-commandserver.t
@@ -986,13 +986,13 @@
> raise error.Abort(b'fail after finalization')
> def reposetup(ui, repo):
> class failrepo(repo.__class__):
- > def commitctx(self, ctx, error=False, origctx=None):
+ > def commitctx(self, ctx, **kwargs):
> if self.ui.configbool(b'failafterfinalize', b'fail'):
> # 'sorted()' by ASCII code on category names causes
> # invoking 'fail' after finalization of changelog
> # using "'cl-%i' % id(self)" as category name
> self.currenttransaction().addfinalize(b'zzzzzzzz', fail)
- > return super(failrepo, self).commitctx(ctx, error, origctx)
+ > return super(failrepo, self).commitctx(ctx, **kwargs)
> repo.__class__ = failrepo
> EOF
diff --git a/tests/test-annotate.t b/tests/test-annotate.t
--- a/tests/test-annotate.t
+++ b/tests/test-annotate.t
@@ -481,7 +481,7 @@
> from __future__ import absolute_import
> from mercurial import commit, error, extensions
> def _filecommit(orig, repo, fctx, manifest1, manifest2,
- > linkrev, tr, includecopymeta, ms):
+ > linkrev, tr, includecopymeta, ms, filedata):
> fname = fctx.path()
> text = fctx.data()
> flog = repo.file(fname)
diff --git a/tests/fakedirstatewritetime.py b/tests/fakedirstatewritetime.py
--- a/tests/fakedirstatewritetime.py
+++ b/tests/fakedirstatewritetime.py
@@ -95,9 +95,9 @@
return fakewrite(ui, lambda: orig(workingctx, status, fixup))
-def markcommitted(orig, committablectx, node):
+def markcommitted(orig, committablectx, node, filedata=None):
ui = committablectx.repo().ui
- return fakewrite(ui, lambda: orig(committablectx, node))
+ return fakewrite(ui, lambda: orig(committablectx, node, filedata=filedata))
def extsetup(ui):
diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py
--- a/mercurial/localrepo.py
+++ b/mercurial/localrepo.py
@@ -3198,10 +3198,11 @@
b"precommit", throw=True, parent1=hookp1, parent2=hookp2
)
with self.transaction(b'commit'):
- ret = self.commitctx(cctx, True)
+ filedata = {} if not cctx.isinmemory() else None
+ ret = self.commitctx(cctx, error=True, filedata=filedata)
# update bookmarks, dirstate and mergestate
bookmarks.update(self, [p1, p2], ret)
- cctx.markcommitted(ret)
+ cctx.markcommitted(ret, filedata=filedata)
ms.reset()
except: # re-raises
if edited:
@@ -3228,8 +3229,10 @@
return ret
@unfilteredmethod
- def commitctx(self, ctx, error=False, origctx=None):
- return commit.commitctx(self, ctx, error=error, origctx=origctx)
+ def commitctx(self, ctx, error=False, origctx=None, filedata=None):
+ return commit.commitctx(
+ self, ctx, error=error, origctx=origctx, filedata=filedata
+ )
@unfilteredmethod
def destroying(self):
diff --git a/mercurial/interfaces/repository.py b/mercurial/interfaces/repository.py
--- a/mercurial/interfaces/repository.py
+++ b/mercurial/interfaces/repository.py
@@ -1835,7 +1835,7 @@
):
"""Add a new revision to the repository."""
- def commitctx(ctx, error=False, origctx=None):
+ def commitctx(ctx, error=False, origctx=None, filedata=None):
"""Commit a commitctx instance to the repository."""
def destroying():
diff --git a/mercurial/context.py b/mercurial/context.py
--- a/mercurial/context.py
+++ b/mercurial/context.py
@@ -1511,7 +1511,7 @@
):
yield self._repo[a]
- def markcommitted(self, node):
+ def markcommitted(self, node, filedata=None):
"""Perform post-commit cleanup necessary after committing this ctx
Specifically, this updates backing stores this working context
@@ -2019,15 +2019,23 @@
ds = self._repo.dirstate
return sorted(f for f in ds.matches(match) if ds.get_entry(f).tracked)
- def markcommitted(self, node):
+ def markcommitted(self, node, filedata=None):
+ if filedata is None:
+ filedata = {}
with self._repo.dirstate.parentchange():
for f in self.modified() + self.added():
self._repo.dirstate.update_file(
- f, p1_tracked=True, wc_tracked=True
+ f,
+ p1_tracked=True,
+ wc_tracked=True,
+ parentfiledata=filedata.get(f),
)
for f in self.removed():
self._repo.dirstate.update_file(
- f, p1_tracked=False, wc_tracked=False
+ f,
+ p1_tracked=False,
+ wc_tracked=False,
+ parentfiledata=filedata.get(f),
)
self._repo.dirstate.setparents(node)
self._repo._quick_access_changeid_invalidate()
diff --git a/mercurial/commit.py b/mercurial/commit.py
--- a/mercurial/commit.py
+++ b/mercurial/commit.py
@@ -13,6 +13,7 @@
nullrev,
)
+from .dirstateutils import timestamp
from . import (
context,
mergestate,
@@ -42,7 +43,7 @@
return writechangesetcopy, writefilecopymeta
-def commitctx(repo, ctx, error=False, origctx=None):
+def commitctx(repo, ctx, error=False, origctx=None, filedata=None):
"""Add a new revision to the target repository.
Revision information is passed via the context argument.
@@ -63,7 +64,9 @@
user = ctx.user()
with repo.lock(), repo.transaction(b"commit") as tr:
- mn, files = _prepare_files(tr, ctx, error=error, origctx=origctx)
+ mn, files = _prepare_files(
+ tr, ctx, filedata, error=error, origctx=origctx
+ )
extra = ctx.extra().copy()
@@ -123,7 +126,7 @@
return n
-def _prepare_files(tr, ctx, error=False, origctx=None):
+def _prepare_files(tr, ctx, filedata, error=False, origctx=None):
repo = ctx.repo()
p1 = ctx.p1()
@@ -146,7 +149,7 @@
repo.ui.debug(b'reusing manifest from p1 (no file change)\n')
mn = p1.manifestnode()
else:
- mn = _process_files(tr, ctx, ms, files, error=error)
+ mn = _process_files(tr, ctx, ms, files, filedata, error=error)
if origctx and origctx.manifestnode() == mn:
origfiles = origctx.files()
@@ -177,7 +180,7 @@
return salvaged
-def _process_files(tr, ctx, ms, files, error=False):
+def _process_files(tr, ctx, ms, files, filedata, error=False):
repo = ctx.repo()
p1 = ctx.p1()
p2 = ctx.p2()
@@ -207,7 +210,15 @@
else:
added.append(f)
m[f], is_touched = _filecommit(
- repo, fctx, m1, m2, linkrev, tr, writefilecopymeta, ms
+ repo,
+ fctx,
+ m1,
+ m2,
+ linkrev,
+ tr,
+ writefilecopymeta,
+ ms,
+ filedata,
)
if is_touched:
if is_touched == 'added':
@@ -244,6 +255,22 @@
return mn
+def storefiledata(repo, filedata, fctx, size):
+ if (
+ not repo._encodefilterpats
+ and not repo._decodefilterpats
+ and filedata is not None
+ ):
+ # if there are encode or decode filters, size in store is not
+ # the same as size in dirstate, so this code wouldn't work the
+ # way it is currently written
+ s = fctx.lstat()
+ mode = s.st_mode
+ mtime = timestamp.mtime_of(s)
+ # for dirstate.update_file's parentfiledata argument:
+ filedata[fctx.path()] = (mode, size(), mtime)
+
+
def _filecommit(
repo,
fctx,
@@ -253,6 +280,7 @@
tr,
includecopymeta,
ms,
+ filedata,
):
"""
commit an individual file as part of a larger transaction
@@ -297,6 +325,7 @@
and manifest2.flags(fname) != fctx.flags()
):
touched = 'modified'
+ storefiledata(repo, filedata, fctx, fctx.size)
return node, touched
flog = repo.file(fname)
@@ -406,6 +435,7 @@
fnode = fparent1
else:
fnode = fparent1
+ storefiledata(repo, filedata, fctx, lambda: len(text))
return fnode, touched
diff --git a/hgext/remotefilelog/shallowrepo.py b/hgext/remotefilelog/shallowrepo.py
--- a/hgext/remotefilelog/shallowrepo.py
+++ b/hgext/remotefilelog/shallowrepo.py
@@ -193,7 +193,7 @@
)
@localrepo.unfilteredmethod
- def commitctx(self, ctx, error=False, origctx=None):
+ def commitctx(self, ctx, error=False, origctx=None, filedata=None):
"""Add a new revision to current repository.
Revision information is passed via the context argument.
"""
@@ -211,7 +211,7 @@
files.append((f, hex(fparent1)))
self.fileservice.prefetch(files)
return super(shallowrepository, self).commitctx(
- ctx, error=error, origctx=origctx
+ ctx, error=error, origctx=origctx, filedata=filedata
)
def backgroundprefetch(
diff --git a/hgext/lfs/__init__.py b/hgext/lfs/__init__.py
--- a/hgext/lfs/__init__.py
+++ b/hgext/lfs/__init__.py
@@ -248,9 +248,11 @@
class lfsrepo(repo.__class__):
@localrepo.unfilteredmethod
- def commitctx(self, ctx, error=False, origctx=None):
+ def commitctx(self, ctx, error=False, origctx=None, filedata=None):
repo.svfs.options[b'lfstrack'] = _trackedmatcher(self)
- return super(lfsrepo, self).commitctx(ctx, error, origctx=origctx)
+ return super(lfsrepo, self).commitctx(
+ ctx, error, origctx=origctx, filedata=filedata
+ )
repo.__class__ = lfsrepo
diff --git a/hgext/largefiles/reposetup.py b/hgext/largefiles/reposetup.py
--- a/hgext/largefiles/reposetup.py
+++ b/hgext/largefiles/reposetup.py
@@ -319,7 +319,7 @@
node = super(lfilesrepo, self).commitctx(ctx, *args, **kwargs)
class lfilesctx(ctx.__class__):
- def markcommitted(self, node):
+ def markcommitted(self, node, filedata=None):
orig = super(lfilesctx, self).markcommitted
return lfutil.markcommitted(orig, self, node)
diff --git a/hgext/keyword.py b/hgext/keyword.py
--- a/hgext/keyword.py
+++ b/hgext/keyword.py
@@ -856,8 +856,10 @@
finally:
del self.commitctx
- def kwcommitctx(self, ctx, error=False, origctx=None):
- n = super(kwrepo, self).commitctx(ctx, error, origctx)
+ def kwcommitctx(self, ctx, error=False, origctx=None, filedata=None):
+ n = super(kwrepo, self).commitctx(
+ ctx, error, origctx, filedata=None
+ )
# no lock needed, only called from repo.commit() which already locks
if not kwt.postcommit:
restrict = kwt.restrict
diff --git a/hgext/eol.py b/hgext/eol.py
--- a/hgext/eol.py
+++ b/hgext/eol.py
@@ -457,7 +457,7 @@
if wlock is not None:
wlock.release()
- def commitctx(self, ctx, error=False, origctx=None):
+ def commitctx(self, ctx, error=False, origctx=None, filedata=None):
for f in sorted(ctx.added() + ctx.modified()):
if not self._eolmatch(f):
continue
@@ -474,7 +474,7 @@
raise errormod.Abort(
_(b"inconsistent newline style in %s\n") % f
)
- return super(eolrepo, self).commitctx(ctx, error, origctx)
+ return super(eolrepo, self).commitctx(ctx, error, origctx, filedata)
repo.__class__ = eolrepo
repo._hgcleardirstate()
To: valentin.gatienbaron, #hg-reviewers
Cc: mercurial-patches, mercurial-devel
More information about the Mercurial-devel
mailing list