D11750: commit: prevent possible race that results in bad dirstate

valentin.gatienbaron (Valentin Gatien-Baron) phabricator at mercurial-scm.org
Thu Nov 11 01:25:59 UTC 2021


valentin.gatienbaron created this revision.
Herald added a reviewer: hg-reviewers.
Herald added a subscriber: mercurial-patches.

REVISION SUMMARY
  I'm getting reports of wrongly clean hg status, that get fixed by
  running hg debugrebuilddirstate, which I suspect is the issue
  below. And if it's not, it still seems good to fix.
  
  `hg commit` works by adding new version of files from the working copy
  into the store, then later lstat'ing the files in the working copy to
  mark them as clean in the dirstate. This is racy, since the files can
  be modified in the meantime.
  
  Reduce the race by doing the lstat immediately after adding new file
  versions into the store, and guarantee that the file size recorded in
  the dirstate is correct (i.e. is the file size from the store).

REPOSITORY
  rHG Mercurial

BRANCH
  default

REVISION DETAIL
  https://phab.mercurial-scm.org/D11750

AFFECTED FILES
  hgext/eol.py
  hgext/keyword.py
  hgext/largefiles/reposetup.py
  hgext/lfs/__init__.py
  hgext/remotefilelog/shallowrepo.py
  mercurial/commit.py
  mercurial/context.py
  mercurial/interfaces/repository.py
  mercurial/localrepo.py
  tests/fakedirstatewritetime.py
  tests/test-annotate.t
  tests/test-commandserver.t
  tests/test-dirstate-race2.t
  tests/test-fastannotate-hg.t

CHANGE DETAILS

diff --git a/tests/test-fastannotate-hg.t b/tests/test-fastannotate-hg.t
--- a/tests/test-fastannotate-hg.t
+++ b/tests/test-fastannotate-hg.t
@@ -484,7 +484,7 @@
   > from __future__ import absolute_import
   > from mercurial import commit, error, extensions
   > def _filecommit(orig, repo, fctx, manifest1, manifest2,
-  >                 linkrev, tr, includecopymeta, ms):
+  >                 linkrev, tr, includecopymeta, ms, filedata):
   >     fname = fctx.path()
   >     text = fctx.data()
   >     flog = repo.file(fname)
diff --git a/tests/test-dirstate-race2.t b/tests/test-dirstate-race2.t
--- a/tests/test-dirstate-race2.t
+++ b/tests/test-dirstate-race2.t
@@ -54,8 +54,8 @@
   $ echo a > a; hg status; hg diff
 
 Do a commit where file 'a' is changed between hg committing its new
-revision into the repository, and the writing of the dirstate. This
-results in a corrupted dirstate (size doesn't match committed size).
+revision into the repository, and the writing of the dirstate. The
+size in the dirstate is correct nonetheless, and so a diff is shown.
 
   $ echo aaa > a; hg commit -qm _
   $ hg merge -qr 1; hg resolve -m; rm a.orig
@@ -71,9 +71,12 @@
   m   0         -2 (set  |unset)               a (re)
   $ hg commit -m _ --config extensions.race=$TESTTMP/dirstaterace.py
   $ hg debugdirstate --no-dates
-  n 644          0 (set  |unset)               a (re)
+  n 644        105 (set  |unset)               a (re)
   $ cat a | wc -c
    *0 (re)
   $ hg cat -r . a | wc -c
    *105 (re)
   $ hg status; hg diff --stat
+  M a
+   a |  5 -----
+   1 files changed, 0 insertions(+), 5 deletions(-)
diff --git a/tests/test-commandserver.t b/tests/test-commandserver.t
--- a/tests/test-commandserver.t
+++ b/tests/test-commandserver.t
@@ -986,13 +986,13 @@
   >     raise error.Abort(b'fail after finalization')
   > def reposetup(ui, repo):
   >     class failrepo(repo.__class__):
-  >         def commitctx(self, ctx, error=False, origctx=None):
+  >         def commitctx(self, ctx, **kwargs):
   >             if self.ui.configbool(b'failafterfinalize', b'fail'):
   >                 # 'sorted()' by ASCII code on category names causes
   >                 # invoking 'fail' after finalization of changelog
   >                 # using "'cl-%i' % id(self)" as category name
   >                 self.currenttransaction().addfinalize(b'zzzzzzzz', fail)
-  >             return super(failrepo, self).commitctx(ctx, error, origctx)
+  >             return super(failrepo, self).commitctx(ctx, **kwargs)
   >     repo.__class__ = failrepo
   > EOF
 
diff --git a/tests/test-annotate.t b/tests/test-annotate.t
--- a/tests/test-annotate.t
+++ b/tests/test-annotate.t
@@ -481,7 +481,7 @@
   > from __future__ import absolute_import
   > from mercurial import commit, error, extensions
   > def _filecommit(orig, repo, fctx, manifest1, manifest2,
-  >                 linkrev, tr, includecopymeta, ms):
+  >                 linkrev, tr, includecopymeta, ms, filedata):
   >     fname = fctx.path()
   >     text = fctx.data()
   >     flog = repo.file(fname)
diff --git a/tests/fakedirstatewritetime.py b/tests/fakedirstatewritetime.py
--- a/tests/fakedirstatewritetime.py
+++ b/tests/fakedirstatewritetime.py
@@ -95,9 +95,9 @@
     return fakewrite(ui, lambda: orig(workingctx, status, fixup))
 
 
-def markcommitted(orig, committablectx, node):
+def markcommitted(orig, committablectx, node, filedata=None):
     ui = committablectx.repo().ui
-    return fakewrite(ui, lambda: orig(committablectx, node))
+    return fakewrite(ui, lambda: orig(committablectx, node, filedata=filedata))
 
 
 def extsetup(ui):
diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py
--- a/mercurial/localrepo.py
+++ b/mercurial/localrepo.py
@@ -3198,10 +3198,11 @@
                     b"precommit", throw=True, parent1=hookp1, parent2=hookp2
                 )
                 with self.transaction(b'commit'):
-                    ret = self.commitctx(cctx, True)
+                    filedata = {} if not cctx.isinmemory() else None
+                    ret = self.commitctx(cctx, error=True, filedata=filedata)
                     # update bookmarks, dirstate and mergestate
                     bookmarks.update(self, [p1, p2], ret)
-                    cctx.markcommitted(ret)
+                    cctx.markcommitted(ret, filedata=filedata)
                     ms.reset()
             except:  # re-raises
                 if edited:
@@ -3228,8 +3229,10 @@
         return ret
 
     @unfilteredmethod
-    def commitctx(self, ctx, error=False, origctx=None):
-        return commit.commitctx(self, ctx, error=error, origctx=origctx)
+    def commitctx(self, ctx, error=False, origctx=None, filedata=None):
+        return commit.commitctx(
+            self, ctx, error=error, origctx=origctx, filedata=filedata
+        )
 
     @unfilteredmethod
     def destroying(self):
diff --git a/mercurial/interfaces/repository.py b/mercurial/interfaces/repository.py
--- a/mercurial/interfaces/repository.py
+++ b/mercurial/interfaces/repository.py
@@ -1835,7 +1835,7 @@
     ):
         """Add a new revision to the repository."""
 
-    def commitctx(ctx, error=False, origctx=None):
+    def commitctx(ctx, error=False, origctx=None, filedata=None):
         """Commit a commitctx instance to the repository."""
 
     def destroying():
diff --git a/mercurial/context.py b/mercurial/context.py
--- a/mercurial/context.py
+++ b/mercurial/context.py
@@ -1511,7 +1511,7 @@
         ):
             yield self._repo[a]
 
-    def markcommitted(self, node):
+    def markcommitted(self, node, filedata=None):
         """Perform post-commit cleanup necessary after committing this ctx
 
         Specifically, this updates backing stores this working context
@@ -2019,15 +2019,23 @@
         ds = self._repo.dirstate
         return sorted(f for f in ds.matches(match) if ds.get_entry(f).tracked)
 
-    def markcommitted(self, node):
+    def markcommitted(self, node, filedata=None):
+        if filedata is None:
+            filedata = {}
         with self._repo.dirstate.parentchange():
             for f in self.modified() + self.added():
                 self._repo.dirstate.update_file(
-                    f, p1_tracked=True, wc_tracked=True
+                    f,
+                    p1_tracked=True,
+                    wc_tracked=True,
+                    parentfiledata=filedata.get(f),
                 )
             for f in self.removed():
                 self._repo.dirstate.update_file(
-                    f, p1_tracked=False, wc_tracked=False
+                    f,
+                    p1_tracked=False,
+                    wc_tracked=False,
+                    parentfiledata=filedata.get(f),
                 )
             self._repo.dirstate.setparents(node)
             self._repo._quick_access_changeid_invalidate()
diff --git a/mercurial/commit.py b/mercurial/commit.py
--- a/mercurial/commit.py
+++ b/mercurial/commit.py
@@ -13,6 +13,7 @@
     nullrev,
 )
 
+from .dirstateutils import timestamp
 from . import (
     context,
     mergestate,
@@ -42,7 +43,7 @@
     return writechangesetcopy, writefilecopymeta
 
 
-def commitctx(repo, ctx, error=False, origctx=None):
+def commitctx(repo, ctx, error=False, origctx=None, filedata=None):
     """Add a new revision to the target repository.
     Revision information is passed via the context argument.
 
@@ -63,7 +64,9 @@
     user = ctx.user()
 
     with repo.lock(), repo.transaction(b"commit") as tr:
-        mn, files = _prepare_files(tr, ctx, error=error, origctx=origctx)
+        mn, files = _prepare_files(
+            tr, ctx, filedata, error=error, origctx=origctx
+        )
 
         extra = ctx.extra().copy()
 
@@ -123,7 +126,7 @@
         return n
 
 
-def _prepare_files(tr, ctx, error=False, origctx=None):
+def _prepare_files(tr, ctx, filedata, error=False, origctx=None):
     repo = ctx.repo()
     p1 = ctx.p1()
 
@@ -146,7 +149,7 @@
         repo.ui.debug(b'reusing manifest from p1 (no file change)\n')
         mn = p1.manifestnode()
     else:
-        mn = _process_files(tr, ctx, ms, files, error=error)
+        mn = _process_files(tr, ctx, ms, files, filedata, error=error)
 
     if origctx and origctx.manifestnode() == mn:
         origfiles = origctx.files()
@@ -177,7 +180,7 @@
     return salvaged
 
 
-def _process_files(tr, ctx, ms, files, error=False):
+def _process_files(tr, ctx, ms, files, filedata, error=False):
     repo = ctx.repo()
     p1 = ctx.p1()
     p2 = ctx.p2()
@@ -207,7 +210,15 @@
             else:
                 added.append(f)
                 m[f], is_touched = _filecommit(
-                    repo, fctx, m1, m2, linkrev, tr, writefilecopymeta, ms
+                    repo,
+                    fctx,
+                    m1,
+                    m2,
+                    linkrev,
+                    tr,
+                    writefilecopymeta,
+                    ms,
+                    filedata,
                 )
                 if is_touched:
                     if is_touched == 'added':
@@ -244,6 +255,22 @@
     return mn
 
 
+def storefiledata(repo, filedata, fctx, size):
+    if (
+        not repo._encodefilterpats
+        and not repo._decodefilterpats
+        and filedata is not None
+    ):
+        # if there are encode or decode filters, size in store is not
+        # the same as size in dirstate, so this code wouldn't work the
+        # way it is currently written
+        s = fctx.lstat()
+        mode = s.st_mode
+        mtime = timestamp.mtime_of(s)
+        # for dirstate.update_file's parentfiledata argument:
+        filedata[fctx.path()] = (mode, size(), mtime)
+
+
 def _filecommit(
     repo,
     fctx,
@@ -253,6 +280,7 @@
     tr,
     includecopymeta,
     ms,
+    filedata,
 ):
     """
     commit an individual file as part of a larger transaction
@@ -297,6 +325,7 @@
                 and manifest2.flags(fname) != fctx.flags()
             ):
                 touched = 'modified'
+            storefiledata(repo, filedata, fctx, fctx.size)
             return node, touched
 
     flog = repo.file(fname)
@@ -406,6 +435,7 @@
         fnode = fparent1
     else:
         fnode = fparent1
+    storefiledata(repo, filedata, fctx, lambda: len(text))
     return fnode, touched
 
 
diff --git a/hgext/remotefilelog/shallowrepo.py b/hgext/remotefilelog/shallowrepo.py
--- a/hgext/remotefilelog/shallowrepo.py
+++ b/hgext/remotefilelog/shallowrepo.py
@@ -193,7 +193,7 @@
                 )
 
         @localrepo.unfilteredmethod
-        def commitctx(self, ctx, error=False, origctx=None):
+        def commitctx(self, ctx, error=False, origctx=None, filedata=None):
             """Add a new revision to current repository.
             Revision information is passed via the context argument.
             """
@@ -211,7 +211,7 @@
                         files.append((f, hex(fparent1)))
                 self.fileservice.prefetch(files)
             return super(shallowrepository, self).commitctx(
-                ctx, error=error, origctx=origctx
+                ctx, error=error, origctx=origctx, filedata=filedata
             )
 
         def backgroundprefetch(
diff --git a/hgext/lfs/__init__.py b/hgext/lfs/__init__.py
--- a/hgext/lfs/__init__.py
+++ b/hgext/lfs/__init__.py
@@ -248,9 +248,11 @@
 
     class lfsrepo(repo.__class__):
         @localrepo.unfilteredmethod
-        def commitctx(self, ctx, error=False, origctx=None):
+        def commitctx(self, ctx, error=False, origctx=None, filedata=None):
             repo.svfs.options[b'lfstrack'] = _trackedmatcher(self)
-            return super(lfsrepo, self).commitctx(ctx, error, origctx=origctx)
+            return super(lfsrepo, self).commitctx(
+                ctx, error, origctx=origctx, filedata=filedata
+            )
 
     repo.__class__ = lfsrepo
 
diff --git a/hgext/largefiles/reposetup.py b/hgext/largefiles/reposetup.py
--- a/hgext/largefiles/reposetup.py
+++ b/hgext/largefiles/reposetup.py
@@ -319,7 +319,7 @@
             node = super(lfilesrepo, self).commitctx(ctx, *args, **kwargs)
 
             class lfilesctx(ctx.__class__):
-                def markcommitted(self, node):
+                def markcommitted(self, node, filedata=None):
                     orig = super(lfilesctx, self).markcommitted
                     return lfutil.markcommitted(orig, self, node)
 
diff --git a/hgext/keyword.py b/hgext/keyword.py
--- a/hgext/keyword.py
+++ b/hgext/keyword.py
@@ -856,8 +856,10 @@
             finally:
                 del self.commitctx
 
-        def kwcommitctx(self, ctx, error=False, origctx=None):
-            n = super(kwrepo, self).commitctx(ctx, error, origctx)
+        def kwcommitctx(self, ctx, error=False, origctx=None, filedata=None):
+            n = super(kwrepo, self).commitctx(
+                ctx, error, origctx, filedata=None
+            )
             # no lock needed, only called from repo.commit() which already locks
             if not kwt.postcommit:
                 restrict = kwt.restrict
diff --git a/hgext/eol.py b/hgext/eol.py
--- a/hgext/eol.py
+++ b/hgext/eol.py
@@ -457,7 +457,7 @@
                     if wlock is not None:
                         wlock.release()
 
-        def commitctx(self, ctx, error=False, origctx=None):
+        def commitctx(self, ctx, error=False, origctx=None, filedata=None):
             for f in sorted(ctx.added() + ctx.modified()):
                 if not self._eolmatch(f):
                     continue
@@ -474,7 +474,7 @@
                     raise errormod.Abort(
                         _(b"inconsistent newline style in %s\n") % f
                     )
-            return super(eolrepo, self).commitctx(ctx, error, origctx)
+            return super(eolrepo, self).commitctx(ctx, error, origctx, filedata)
 
     repo.__class__ = eolrepo
     repo._hgcleardirstate()



To: valentin.gatienbaron, #hg-reviewers
Cc: mercurial-patches, mercurial-devel


More information about the Mercurial-devel mailing list