[Request] [++- ] D11238: rust-nodemap: falling back to C impl as mitigation
gracinet (Georges Racinet)
phabricator at mercurial-scm.org
Mon Aug 2 10:23:07 UTC 2021
gracinet created this revision.
Herald added a reviewer: hg-reviewers.
Herald added a subscriber: mercurial-patches.
REVISION SUMMARY
This is a mitigation for https://bz.mercurial-scm.org/show_bug.cgi?id=6554
We see sometimes almost all data except the most recent revisions
removed from the persistent nodemap, but we don't know how to
reproduce yet.
This has sadly repercussions beyond just needing to reconstruct the
persistent nodemap: for instance, this automatically filters out
all bookmarks pointing to revisions that the nodemap cannot resolve.
If such filtering happens in a transaction, the update of the
bookmarks file that happens at the end of transaction loses all
bookmarks that have been affected. There may be similar consequences
for other data.
So this is a data loss, something that we have to prevent as soon as
possible.
As a mitigation measure, we will now fallback to the C implementation
in case nodemap lookups failed. This will add some latency, e.g., in
discovery, yet less than disabling the persistent nodemap entirely.
We considered implementing the fallback directly on the Python
side, but `revlog.get_rev()` is not systematically used, there are
also several direct calls to the index method (`self.index.rev()` for
a `revlog` instance). It is therefore more direct to implement the
mitigation in the rust-cpython wrapper.
REPOSITORY
rHG Mercurial
BRANCH
stable
REVISION DETAIL
https://phab.mercurial-scm.org/D11238
AFFECTED FILES
rust/hg-cpython/src/revlog.rs
tests/test-persistent-nodemap.t
CHANGE DETAILS
diff --git a/tests/test-persistent-nodemap.t b/tests/test-persistent-nodemap.t
--- a/tests/test-persistent-nodemap.t
+++ b/tests/test-persistent-nodemap.t
@@ -428,6 +428,46 @@
data-length: 121088
data-unused: 0
data-unused: 0.000%
+
+Sub-case: fallback for corrupted data file
+------------------------------------------
+
+Sabotaging the data file so that nodemap resolutions fail, triggering fallback to
+(non-persistent) C implementation.
+
+
+ $ UUID=`hg debugnodemap --metadata| grep 'uid:' | \
+ > sed 's/uid: //'`
+ $ FILE=.hg/store/00changelog-"${UUID}".nd
+ $ python -c "fobj = open('$FILE', 'r+b'); fobj.write(b'\xff' * 121088); fobj.close()"
+
+The nodemap data file is still considered in sync with the docket. This
+would fail without the fallback to the (non-persistent) C implementation:
+
+ $ hg log -r b355ef8adce0949b8bdf6afc72ca853740d65944 -T '{rev}\n' --traceback
+ 5002
+
+The nodemap data file hasn't been fixed, more tests can be inserted:
+
+ $ hg debugnodemap --dump-disk | f --bytes=256 --hexdump --size
+ size=121088
+ 0000: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................|
+ 0010: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................|
+ 0020: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................|
+ 0030: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................|
+ 0040: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................|
+ 0050: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................|
+ 0060: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................|
+ 0070: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................|
+ 0080: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................|
+ 0090: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................|
+ 00a0: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................|
+ 00b0: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................|
+ 00c0: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................|
+ 00d0: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................|
+ 00e0: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................|
+ 00f0: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................|
+
$ mv ../tmp-data-file $FILE
$ mv ../tmp-docket .hg/store/00changelog.n
diff --git a/rust/hg-cpython/src/revlog.rs b/rust/hg-cpython/src/revlog.rs
--- a/rust/hg-cpython/src/revlog.rs
+++ b/rust/hg-cpython/src/revlog.rs
@@ -59,12 +59,19 @@
/// Return Revision if found, raises a bare `error.RevlogError`
/// in case of ambiguity, same as C version does
- def get_rev(&self, node: PyBytes) -> PyResult<Option<Revision>> {
+ def get_rev(&self, pynode: PyBytes) -> PyResult<Option<Revision>> {
let opt = self.get_nodetree(py)?.borrow();
let nt = opt.as_ref().unwrap();
let idx = &*self.cindex(py).borrow();
- let node = node_from_py_bytes(py, &node)?;
- nt.find_bin(idx, node.into()).map_err(|e| nodemap_error(py, e))
+ let node = node_from_py_bytes(py, &pynode)?;
+ match nt.find_bin(idx, node.into())
+ {
+ Ok(None) =>
+ self.call_cindex(py, "get_rev", &PyTuple::new(py, &[pynode.into_object()]), None)?
+ .extract(py),
+ Ok(Some(rev)) => Ok(Some(rev)),
+ Err(e) => Err(nodemap_error(py, e)),
+ }
}
/// same as `get_rev()` but raises a bare `error.RevlogError` if node
@@ -94,27 +101,31 @@
}
}
- def partialmatch(&self, node: PyObject) -> PyResult<Option<PyBytes>> {
+ def partialmatch(&self, pynode: PyObject) -> PyResult<Option<PyBytes>> {
let opt = self.get_nodetree(py)?.borrow();
let nt = opt.as_ref().unwrap();
let idx = &*self.cindex(py).borrow();
let node_as_string = if cfg!(feature = "python3-sys") {
- node.cast_as::<PyString>(py)?.to_string(py)?.to_string()
+ pynode.cast_as::<PyString>(py)?.to_string(py)?.to_string()
}
else {
- let node = node.extract::<PyBytes>(py)?;
+ let node = pynode.extract::<PyBytes>(py)?;
String::from_utf8_lossy(node.data(py)).to_string()
};
let prefix = NodePrefix::from_hex(&node_as_string).map_err(|_| PyErr::new::<ValueError, _>(py, "Invalid node or prefix"))?;
- nt.find_bin(idx, prefix)
- // TODO make an inner API returning the node directly
- .map(|opt| opt.map(
- |rev| PyBytes::new(py, idx.node(rev).unwrap().as_bytes())))
- .map_err(|e| nodemap_error(py, e))
-
+ match nt.find_bin(idx, prefix) {
+ Ok(None) =>
+ self.call_cindex(
+ py, "partialmatch",
+ &PyTuple::new(py, &[pynode]), None
+ )?.extract(py),
+ Ok(Some(rev)) =>
+ Ok(Some(PyBytes::new(py, idx.node(rev).unwrap().as_bytes()))),
+ Err(e) => Err(nodemap_error(py, e)),
+ }
}
/// append an index entry
To: gracinet, #hg-reviewers
Cc: mercurial-patches, mercurial-devel
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mercurial-scm.org/pipermail/mercurial-patches/attachments/20210802/4a574708/attachment-0001.html>
More information about the Mercurial-patches
mailing list