[Updated] [+++++ ] D11481: persistent-nodemap: introduce a test to highlight possible race

marmoute (Pierre-Yves David) phabricator at mercurial-scm.org
Fri Sep 24 16:43:42 UTC 2021


marmoute updated this revision to Diff 30400.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D11481?vs=30355&id=30400

BRANCH
  stable

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D11481/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D11481

AFFECTED FILES
  mercurial/revlogutils/nodemap.py
  tests/test-persistent-nodemap.t
  tests/testlib/persistent-nodemap-race-ext.py

CHANGE DETAILS

diff --git a/tests/testlib/persistent-nodemap-race-ext.py b/tests/testlib/persistent-nodemap-race-ext.py
new file mode 100644
--- /dev/null
+++ b/tests/testlib/persistent-nodemap-race-ext.py
@@ -0,0 +1,294 @@
+"""Create the race condition for issue6554
+
+The persistent nodemap issues had an issue where a second writer could
+overwrite the data that a previous write just wrote. The would break the append
+only garantee of the persistent nodemap and could confuse reader. This
+extensions create all the necessary synchronisation point to the race condition
+to happen.
+
+It involves 3 process <LEFT> (a writer) <RIGHT> (a writer) and <READER>
+
+[1] <LEFT> take the lock and start a transaction
+[2] <LEFT> updated `00changelog.i` with the new data
+[3] <RIGHT> reads:
+    - the new changelog index `00changelog.i`
+    - the old `00changelog.n`
+[4] <LEFT> update the persistent nodemap:
+    - writing new data from the last valid offset
+    - updating the docket (00changelog.n)
+[5] <LEFT> release the lock
+[6] <RIGHT> grab the lock and run `repo.invalidate`
+[7] <READER> reads:
+    - the changelog index after <LEFT> write
+    - the nodemap docket after <LEFT> write
+[8] <RIGHT> reload the changelog since `00changelog.n` changed
+    /!\ This is the faulty part in issue 6554, the outdated docket is kept
+[9] <RIGHT> write:
+    - the changelog index (00changelog.i)
+    - the nodemap data (00changelog*.nd)
+      /!\ if the outdated docket is used, the write starts from the same ofset
+      /!\ as in [4], overwriting data that <LEFT> wrote in step [4].
+    - the nodemap docket (00changelog.n)
+[10] <READER> reads the nodemap data from `00changelog*.nd`
+     /!\ if step [9] was wrong, the data matching the docket that <READER>
+     /!\ loaded have been overwritten and the expected root-nodes is no longer
+     /!\ valid.
+"""
+
+from __future__ import print_function
+
+import os
+
+from mercurial.revlogutils.constants import KIND_CHANGELOG
+
+from mercurial import (
+    changelog,
+    encoding,
+    extensions,
+    localrepo,
+    node,
+    pycompat,
+    registrar,
+    testing,
+    util,
+)
+
+from mercurial.revlogutils import (
+    nodemap as nodemaputil,
+)
+
+configtable = {}
+configitem = registrar.configitem(configtable)
+
+configitem(b'devel', b'nodemap-race.role', default=None)
+
+cmdtable = {}
+command = registrar.command(cmdtable)
+
+LEFT = b'left'
+RIGHT = b'right'
+READER = b'reader'
+
+SYNC_DIR = os.path.join(encoding.environ[b'TESTTMP'], b'sync-files')
+
+# mark the end of step [1]
+FILE_LEFT_LOCKED = os.path.join(SYNC_DIR, b'left-locked')
+# mark that step [3] is ready to run.
+FILE_RIGHT_READY_TO_LOCK = os.path.join(SYNC_DIR, b'right-ready-to-lock')
+
+# mark the end of step [2]
+FILE_LEFT_CL_DATA_WRITE = os.path.join(SYNC_DIR, b'left-data')
+# mark the end of step [4]
+FILE_LEFT_CL_NODEMAP_WRITE = os.path.join(SYNC_DIR, b'left-nodemap')
+# mark the end of step [3]
+FILE_RIGHT_CL_NODEMAP_READ = os.path.join(SYNC_DIR, b'right-nodemap')
+# mark that step [9] is read to run
+FILE_RIGHT_CL_NODEMAP_PRE_WRITE = os.path.join(
+    SYNC_DIR, b'right-pre-nodemap-write'
+)
+# mark that step [9] has run.
+FILE_RIGHT_CL_NODEMAP_POST_WRITE = os.path.join(
+    SYNC_DIR, b'right-post-nodemap-write'
+)
+# mark that step [7] is ready to run
+FILE_READER_READY = os.path.join(SYNC_DIR, b'reader-ready')
+# mark that step [7] has run
+FILE_READER_READ_DOCKET = os.path.join(SYNC_DIR, b'reader-read-docket')
+
+
+def _print(*args, **kwargs):
+    print(*args, **kwargs)
+
+
+def _role(repo):
+    """find the role associated with the process"""
+    return repo.ui.config(b'devel', b'nodemap-race.role')
+
+
+def wrap_changelog_finalize(orig, cl, tr):
+    """wrap the update of `00changelog.i` during transaction finalization
+
+    This is useful for synchronisation before or after the file is updated on disk.
+    """
+    role = getattr(tr, '_race_role', None)
+    if role == RIGHT:
+        print('right ready to write, waiting for reader')
+        testing.wait_file(FILE_READER_READY)
+        testing.write_file(FILE_RIGHT_CL_NODEMAP_PRE_WRITE)
+        testing.wait_file(FILE_READER_READ_DOCKET)
+        print('right proceeding with writing its changelog index and nodemap')
+    ret = orig(cl, tr)
+    print("finalized changelog write")
+    if role == LEFT:
+        testing.write_file(FILE_LEFT_CL_DATA_WRITE)
+    return ret
+
+
+def wrap_persist_nodemap(orig, tr, revlog, *args, **kwargs):
+    """wrap the update of `00changelog.n` and `*.nd` during tr finalization
+
+    This is useful for synchronisation before or after the files are updated on
+    disk.
+    """
+    is_cl = revlog.target[0] == KIND_CHANGELOG
+    role = getattr(tr, '_race_role', None)
+    if is_cl:
+        if role == LEFT:
+            testing.wait_file(FILE_RIGHT_CL_NODEMAP_READ)
+    if is_cl:
+        print("persisting changelog nodemap")
+        print("  new data start at", revlog._nodemap_docket.data_length)
+    ret = orig(tr, revlog, *args, **kwargs)
+    if is_cl:
+        print("persisted changelog nodemap")
+        print_nodemap_details(revlog)
+        if role == LEFT:
+            testing.write_file(FILE_LEFT_CL_NODEMAP_WRITE)
+        elif role == RIGHT:
+            testing.write_file(FILE_RIGHT_CL_NODEMAP_POST_WRITE)
+    return ret
+
+
+def print_nodemap_details(cl):
+    """print relevant information about the nodemap docket currently in memory"""
+    dkt = cl._nodemap_docket
+    print('docket-details:')
+    if dkt is None:
+        print('  <no-docket>')
+        return
+    print('  uid:        ', pycompat.sysstr(dkt.uid))
+    print('  actual-tip: ', cl.tiprev())
+    print('  tip-rev:    ', dkt.tip_rev)
+    print('  data-length:', dkt.data_length)
+
+
+def wrap_persisted_data(orig, revlog):
+    """print some information about the nodemap information we just read
+
+    Used by the <READER> process only.
+    """
+    ret = orig(revlog)
+    if ret is not None:
+        docket, data = ret
+        file_path = nodemaputil._rawdata_filepath(revlog, docket)
+        file_path = revlog.opener.join(file_path)
+        file_size = os.path.getsize(file_path)
+        print('record-data-length:', docket.data_length)
+        print('actual-data-length:', len(data))
+        print('file-actual-length:', file_size)
+    return ret
+
+
+def sync_read(orig):
+    """used by <READER> to force the race window
+
+    This make sure we read the docker from <LEFT> while reading the datafile
+    after <RIGHT> write.
+    """
+    orig()
+    testing.write_file(FILE_READER_READ_DOCKET)
+    print('reader: nodemap docket read')
+    testing.wait_file(FILE_RIGHT_CL_NODEMAP_POST_WRITE)
+
+
+def uisetup(ui):
+    class RacedRepo(localrepo.localrepository):
+        def lock(self, wait=True):
+            # make sure <RIGHT> as the "Wrong" information in memory before
+            # grabbing the lock
+            newlock = self._currentlock(self._lockref) is None
+            if newlock and _role(self) == LEFT:
+                cl = self.unfiltered().changelog
+                print_nodemap_details(cl)
+            elif newlock and _role(self) == RIGHT:
+                testing.write_file(FILE_RIGHT_READY_TO_LOCK)
+                print('nodemap-race: right side start of the locking sequence')
+                testing.wait_file(FILE_LEFT_LOCKED)
+                testing.wait_file(FILE_LEFT_CL_DATA_WRITE)
+                self.invalidate(clearfilecache=True)
+                print('nodemap-race: right side reading changelog')
+                cl = self.unfiltered().changelog
+                tiprev = cl.tiprev()
+                tip = cl.node(tiprev)
+                tiprev2 = cl.rev(tip)
+                if tiprev != tiprev2:
+                    raise RuntimeError(
+                        'bad tip -round-trip %d %d' % (tiprev, tiprev2)
+                    )
+                testing.write_file(FILE_RIGHT_CL_NODEMAP_READ)
+                print('nodemap-race: right side reading of changelog is done')
+                print_nodemap_details(cl)
+                testing.wait_file(FILE_LEFT_CL_NODEMAP_WRITE)
+                print('nodemap-race: right side ready to wait for the lock')
+            ret = super(RacedRepo, self).lock(wait=wait)
+            if newlock and _role(self) == LEFT:
+                print('nodemap-race: left side locked and ready to commit')
+                testing.write_file(FILE_LEFT_LOCKED)
+                testing.wait_file(FILE_RIGHT_READY_TO_LOCK)
+                cl = self.unfiltered().changelog
+                print_nodemap_details(cl)
+            elif newlock and _role(self) == RIGHT:
+                print('nodemap-race: right side locked and ready to commit')
+                cl = self.unfiltered().changelog
+                print_nodemap_details(cl)
+            return ret
+
+        def transaction(self, *args, **kwargs):
+            # duck punch the role on the transaction to help other pieces of code
+            tr = super(RacedRepo, self).transaction(*args, **kwargs)
+            tr._race_role = _role(self)
+            return tr
+
+    localrepo.localrepository = RacedRepo
+
+    extensions.wrapfunction(
+        nodemaputil, 'persist_nodemap', wrap_persist_nodemap
+    )
+    extensions.wrapfunction(
+        changelog.changelog, '_finalize', wrap_changelog_finalize
+    )
+
+
+def reposetup(ui, repo):
+    if _role(repo) == READER:
+        extensions.wrapfunction(
+            nodemaputil, 'persisted_data', wrap_persisted_data
+        )
+        extensions.wrapfunction(nodemaputil, 'test_race_hook_1', sync_read)
+
+        class ReaderRepo(repo.__class__):
+            @util.propertycache
+            def changelog(self):
+                print('reader ready to read the changelog, waiting for right')
+                testing.write_file(FILE_READER_READY)
+                testing.wait_file(FILE_RIGHT_CL_NODEMAP_PRE_WRITE)
+                return super(ReaderRepo, self).changelog
+
+        repo.__class__ = ReaderRepo
+
+
+ at command(b'check-nodemap-race')
+def cmd_check_nodemap_race(ui, repo):
+    """Run proper <READER> access in the race Windows and check nodemap content"""
+    repo = repo.unfiltered()
+    print('reader: reading changelog')
+    cl = repo.changelog
+    print('reader: changelog read')
+    print_nodemap_details(cl)
+    tip_rev = cl.tiprev()
+    tip_node = cl.node(tip_rev)
+    print('tip-rev: ', tip_rev)
+    print('tip-node:', node.short(tip_node).decode('ascii'))
+    print('node-rev:', cl.rev(tip_node))
+    for r in cl.revs():
+        n = cl.node(r)
+        try:
+            r2 = cl.rev(n)
+        except ValueError as exc:
+            print('error while checking revision:', r)
+            print(' ', exc)
+            return 1
+        else:
+            if r2 != r:
+                print('revision %d is missing from the nodemap' % r)
+                return 1
diff --git a/tests/test-persistent-nodemap.t b/tests/test-persistent-nodemap.t
--- a/tests/test-persistent-nodemap.t
+++ b/tests/test-persistent-nodemap.t
@@ -613,9 +613,207 @@
   $ hg id -r . --traceback
   90d5d3ba2fc4 tip
 
+(be a good citizen and regenerate the nodemap)
+  $ hg debugupdatecaches
+  $ hg debugnodemap --metadata
+  uid: * (glob)
+  tip-rev: 5005
+  tip-node: 90d5d3ba2fc47db50f712570487cb261a68c8ffe
+  data-length: 121088
+  data-unused: 0
+  data-unused: 0.000%
+
+Check race condition when multiple process write new data to the repository
+---------------------------------------------------------------------------
+
+In this test, we check that two writers touching the repositories will not
+overwrite each other data. This test is prompted by the existent of issue6554.
+Where a writer ended up using and outdated docket to update the repository. See
+the dedicated extension for details on the race windows and read/write schedule
+necessary to end up in this situation: testlib/persistent-nodemap-race-ext.py
+
+The issue was initially observed on a server with a high push trafic, but it
+can be reproduced using a share and two commiting process which seems simpler.
+
+The test is Rust only as the other implementation does not use the same
+read/write patterns.
+
+  $ cd ..
+
+#if rust
+
+  $ cp -R test-repo race-repo
+  $ hg share race-repo ./other-wc --config format.use-share-safe=yes
+  updating working directory
+  5001 files updated, 0 files merged, 0 files removed, 0 files unresolved
+  $ hg debugformat -R ./race-repo | egrep 'share-safe|persistent-nodemap'
+  share-safe:         yes
+  persistent-nodemap: yes
+  $ hg debugformat -R ./other-wc/ | egrep 'share-safe|persistent-nodemap'
+  share-safe:         yes
+  persistent-nodemap: yes
+  $ hg -R ./other-wc update 'min(head())'
+  3 files updated, 0 files merged, 2 files removed, 0 files unresolved
+  $ hg -R ./race-repo debugnodemap --metadata
+  uid: 43c37dde
+  tip-rev: 5005
+  tip-node: 90d5d3ba2fc47db50f712570487cb261a68c8ffe
+  data-length: 121088
+  data-unused: 0
+  data-unused: 0.000%
+  $ hg -R ./race-repo log -G -r 'head()'
+  @  changeset:   5005:90d5d3ba2fc4
+  |  tag:         tip
+  ~  user:        test
+     date:        Thu Jan 01 00:00:00 1970 +0000
+     summary:     a2
+  
+  o  changeset:   5001:16395c3cf7e2
+  |  user:        test
+  ~  date:        Thu Jan 01 00:00:00 1970 +0000
+     summary:     foo
+  
+  $ hg -R ./other-wc log -G -r 'head()'
+  o  changeset:   5005:90d5d3ba2fc4
+  |  tag:         tip
+  ~  user:        test
+     date:        Thu Jan 01 00:00:00 1970 +0000
+     summary:     a2
+  
+  @  changeset:   5001:16395c3cf7e2
+  |  user:        test
+  ~  date:        Thu Jan 01 00:00:00 1970 +0000
+     summary:     foo
+  
+  $ echo left-side-race > race-repo/left-side-race
+  $ hg -R ./race-repo/ add race-repo/left-side-race
+
+  $ echo right-side-race > ./other-wc/right-side-race
+  $ hg -R ./other-wc/ add ./other-wc/right-side-race
+
+  $ mkdir sync-files
+  $ mkdir outputs
+  $ (
+  >    hg -R ./race-repo/ commit -m left-side-commit \
+  >    --config "extensions.race=${RUNTESTDIR}/testlib/persistent-nodemap-race-ext.py" \
+  >    --config 'devel.nodemap-race.role=left';
+  >    touch sync-files/left-done
+  > ) > outputs/left.txt 2>&1 &
+  $ (
+  >    hg -R ./other-wc/ commit -m right-side-commit \
+  >    --config "extensions.race=${RUNTESTDIR}/testlib/persistent-nodemap-race-ext.py" \
+  >    --config 'devel.nodemap-race.role=right';
+  >    touch sync-files/right-done
+  > ) > outputs/right.txt 2>&1 &
+  $ (
+  >    hg -R ./race-repo/ check-nodemap-race \
+  >    --config "extensions.race=${RUNTESTDIR}/testlib/persistent-nodemap-race-ext.py" \
+  >    --config 'devel.nodemap-race.role=reader';
+  >    touch sync-files/reader-done
+  > ) > outputs/reader.txt 2>&1 &
+  $ sh "$RUNTESTDIR"/testlib/wait-on-file 10 sync-files/left-done
+  $ cat outputs/left.txt
+  docket-details:
+    uid:         43c37dde
+    actual-tip:  5005
+    tip-rev:     5005
+    data-length: 121088
+  nodemap-race: left side locked and ready to commit
+  docket-details:
+    uid:         43c37dde
+    actual-tip:  5005
+    tip-rev:     5005
+    data-length: 121088
+  finalized changelog write
+  persisting changelog nodemap
+    new data start at 121088
+  persisted changelog nodemap
+  docket-details:
+    uid:         43c37dde
+    actual-tip:  5006
+    tip-rev:     5006
+    data-length: 121280
+  $ sh "$RUNTESTDIR"/testlib/wait-on-file 10 sync-files/right-done
+  $ cat outputs/right.txt
+  nodemap-race: right side start of the locking sequence
+  nodemap-race: right side reading changelog
+  nodemap-race: right side reading of changelog is done
+  docket-details:
+    uid:         43c37dde
+    actual-tip:  5006
+    tip-rev:     5005
+    data-length: 121088
+  nodemap-race: right side ready to wait for the lock
+  nodemap-race: right side locked and ready to commit
+  docket-details:
+    uid:         43c37dde
+    actual-tip:  5006
+    tip-rev:     5005
+    data-length: 121088
+  right ready to write, waiting for reader
+  right proceeding with writing its changelog index and nodemap
+  finalized changelog write
+  persisting changelog nodemap
+    new data start at 121088
+  persisted changelog nodemap
+  docket-details:
+    uid:         43c37dde
+    actual-tip:  5007
+    tip-rev:     5007
+    data-length: 121472
+  $ sh "$RUNTESTDIR"/testlib/wait-on-file 10 sync-files/reader-done
+  $ cat outputs/reader.txt
+  reader: reading changelog
+  reader ready to read the changelog, waiting for right
+  reader: nodemap docket read
+  record-data-length: 121280
+  actual-data-length: 121280
+  file-actual-length: 121472
+  reader: changelog read
+  docket-details:
+    uid:         43c37dde
+    actual-tip:  5006
+    tip-rev:     5006
+    data-length: 121280
+  tip-rev:  5006
+  tip-node: 492901161367
+  node-rev: 5006
+  error while checking revision: 18 (known-bad-output !)
+    Inconsistency: Revision 5007 found in nodemap is not in revlog indexi (known-bad-output !)
+
+  $ hg -R ./race-repo log -G -r 'head()'
+  o  changeset:   5007:ac4a2abde241
+  |  tag:         tip
+  ~  parent:      5001:16395c3cf7e2
+     user:        test
+     date:        Thu Jan 01 00:00:00 1970 +0000
+     summary:     right-side-commit
+  
+  @  changeset:   5006:492901161367
+  |  user:        test
+  ~  date:        Thu Jan 01 00:00:00 1970 +0000
+     summary:     left-side-commit
+  
+  $ hg -R ./other-wc log -G -r 'head()'
+  @  changeset:   5007:ac4a2abde241
+  |  tag:         tip
+  ~  parent:      5001:16395c3cf7e2
+     user:        test
+     date:        Thu Jan 01 00:00:00 1970 +0000
+     summary:     right-side-commit
+  
+  o  changeset:   5006:492901161367
+  |  user:        test
+  ~  date:        Thu Jan 01 00:00:00 1970 +0000
+     summary:     left-side-commit
+  
+#endif
+
 Test upgrade / downgrade
 ========================
 
+  $ cd ./test-repo/
+
 downgrading
 
   $ cat << EOF >> .hg/hgrc
diff --git a/mercurial/revlogutils/nodemap.py b/mercurial/revlogutils/nodemap.py
--- a/mercurial/revlogutils/nodemap.py
+++ b/mercurial/revlogutils/nodemap.py
@@ -26,6 +26,14 @@
         raise error.RevlogError(b'unknown node: %s' % x)
 
 
+def test_race_hook_1():
+    """hook point for test
+
+    This let tests to have things happens between the docket reading and the
+    data reading"""
+    pass
+
+
 def persisted_data(revlog):
     """read the nodemap for a revlog from disk"""
     if revlog._nodemap_file is None:
@@ -50,6 +58,8 @@
 
     filename = _rawdata_filepath(revlog, docket)
     use_mmap = revlog.opener.options.get(b"persistent-nodemap.mmap")
+
+    test_race_hook_1()
     try:
         with revlog.opener(filename) as fd:
             if use_mmap:



To: marmoute, #hg-reviewers, Alphare
Cc: Alphare, mercurial-patches
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.mercurial-scm.org/pipermail/mercurial-patches/attachments/20210924/d88e070d/attachment-0002.html>


More information about the Mercurial-patches mailing list