[Updated] D11965: rhg: desambiguate status without decompressing filelog if possible
SimonSapin
phabricator at mercurial-scm.org
Fri Jan 7 09:54:36 UTC 2022
Closed by commit rHG340a484b65a7: rhg: desambiguate status without decompressing filelog if possible (authored by SimonSapin).
This revision was automatically updated to reflect the committed changes.
REPOSITORY
rHG Mercurial
CHANGES SINCE LAST UPDATE
https://phab.mercurial-scm.org/D11965?vs=31605&id=31616
CHANGES SINCE LAST ACTION
https://phab.mercurial-scm.org/D11965/new/
REVISION DETAIL
https://phab.mercurial-scm.org/D11965
AFFECTED FILES
mercurial/revlogutils/flagutil.py
rust/hg-core/src/revlog/filelog.rs
rust/hg-core/src/revlog/index.rs
rust/hg-core/src/revlog/revlog.rs
rust/rhg/src/commands/status.rs
CHANGE DETAILS
diff --git a/rust/rhg/src/commands/status.rs b/rust/rhg/src/commands/status.rs
--- a/rust/rhg/src/commands/status.rs
+++ b/rust/rhg/src/commands/status.rs
@@ -516,16 +516,16 @@
filelog.entry_for_node(entry.node_id()?).map_err(|_| {
HgError::corrupted("filelog missing node from manifest")
})?;
- // TODO: check `fs_len` here like below, but based on
- // `RevlogEntry::uncompressed_len` without decompressing the full filelog
- // contents where possible. This is only valid if the revlog data does not
- // contain metadata. See how Python’s `revlog.rawsize` calls
- // `storageutil.filerevisioncopied`.
- // (Maybe also check for content-modifying flags? See `revlog.size`.)
- let filelog_data = filelog_entry.data()?;
- let contents_in_p1 = filelog_data.file_data()?;
- if contents_in_p1.len() as u64 != fs_len {
- // No need to read the file contents:
+ if filelog_entry.file_data_len_not_equal_to(fs_len) {
+ // No need to read file contents:
+ // it cannot be equal if it has a different length.
+ return Ok(true);
+ }
+
+ let p1_filelog_data = filelog_entry.data()?;
+ let p1_contents = p1_filelog_data.file_data()?;
+ if p1_contents.len() as u64 != fs_len {
+ // No need to read file contents:
// it cannot be equal if it has a different length.
return Ok(true);
}
@@ -535,5 +535,5 @@
} else {
vfs.read(fs_path)?
};
- Ok(contents_in_p1 != &*fs_contents)
+ Ok(p1_contents != &*fs_contents)
}
diff --git a/rust/hg-core/src/revlog/revlog.rs b/rust/hg-core/src/revlog/revlog.rs
--- a/rust/hg-core/src/revlog/revlog.rs
+++ b/rust/hg-core/src/revlog/revlog.rs
@@ -20,6 +20,18 @@
use crate::revlog::Revision;
use crate::{Node, NULL_REVISION};
+const REVISION_FLAG_CENSORED: u16 = 1 << 15;
+const REVISION_FLAG_ELLIPSIS: u16 = 1 << 14;
+const REVISION_FLAG_EXTSTORED: u16 = 1 << 13;
+const REVISION_FLAG_HASCOPIESINFO: u16 = 1 << 12;
+
+// Keep this in sync with REVIDX_KNOWN_FLAGS in
+// mercurial/revlogutils/flagutil.py
+const REVIDX_KNOWN_FLAGS: u16 = REVISION_FLAG_CENSORED
+ | REVISION_FLAG_ELLIPSIS
+ | REVISION_FLAG_EXTSTORED
+ | REVISION_FLAG_HASCOPIESINFO;
+
#[derive(derive_more::From)]
pub enum RevlogError {
InvalidRevision,
@@ -282,6 +294,7 @@
},
p1: index_entry.p1(),
p2: index_entry.p2(),
+ flags: index_entry.flags(),
hash: *index_entry.hash(),
};
Ok(entry)
@@ -309,6 +322,7 @@
base_rev_or_base_of_delta_chain: Option<Revision>,
p1: Revision,
p2: Revision,
+ flags: u16,
hash: Node,
}
@@ -321,6 +335,20 @@
u32::try_from(self.uncompressed_len).ok()
}
+ pub fn has_p1(&self) -> bool {
+ self.p1 != NULL_REVISION
+ }
+
+ pub fn is_cencored(&self) -> bool {
+ (self.flags & REVISION_FLAG_CENSORED) != 0
+ }
+
+ pub fn has_length_affecting_flag_processor(&self) -> bool {
+ // Relevant Python code: revlog.size()
+ // note: ELLIPSIS is known to not change the content
+ (self.flags & (REVIDX_KNOWN_FLAGS ^ REVISION_FLAG_ELLIPSIS)) != 0
+ }
+
/// The data for this entry, after resolving deltas if any.
pub fn data(&self) -> Result<Cow<'a, [u8]>, HgError> {
let mut entry = self.clone();
diff --git a/rust/hg-core/src/revlog/index.rs b/rust/hg-core/src/revlog/index.rs
--- a/rust/hg-core/src/revlog/index.rs
+++ b/rust/hg-core/src/revlog/index.rs
@@ -260,6 +260,10 @@
}
}
+ pub fn flags(&self) -> u16 {
+ BigEndian::read_u16(&self.bytes[6..=7])
+ }
+
/// Return the compressed length of the data.
pub fn compressed_len(&self) -> u32 {
BigEndian::read_u32(&self.bytes[8..=11])
diff --git a/rust/hg-core/src/revlog/filelog.rs b/rust/hg-core/src/revlog/filelog.rs
--- a/rust/hg-core/src/revlog/filelog.rs
+++ b/rust/hg-core/src/revlog/filelog.rs
@@ -73,6 +73,89 @@
pub struct FilelogEntry<'a>(RevlogEntry<'a>);
impl FilelogEntry<'_> {
+ /// `self.data()` can be expensive, with decompression and delta
+ /// resolution.
+ ///
+ /// *Without* paying this cost, based on revlog index information
+ /// including `RevlogEntry::uncompressed_len`:
+ ///
+ /// * Returns `true` if the length that `self.data().file_data().len()`
+ /// would return is definitely **not equal** to `other_len`.
+ /// * Returns `false` if available information is inconclusive.
+ pub fn file_data_len_not_equal_to(&self, other_len: u64) -> bool {
+ // Relevant code that implement this behavior in Python code:
+ // basefilectx.cmp, filelog.size, storageutil.filerevisioncopied,
+ // revlog.size, revlog.rawsize
+
+ // Let’s call `file_data_len` what would be returned by
+ // `self.data().file_data().len()`.
+
+ if self.0.is_cencored() {
+ let file_data_len = 0;
+ return other_len != file_data_len;
+ }
+
+ if self.0.has_length_affecting_flag_processor() {
+ // We can’t conclude anything about `file_data_len`.
+ return false;
+ }
+
+ // Revlog revisions (usually) have metadata for the size of
+ // their data after decompression and delta resolution
+ // as would be returned by `Revlog::get_rev_data`.
+ //
+ // For filelogs this is the file’s contents preceded by an optional
+ // metadata block.
+ let uncompressed_len = if let Some(l) = self.0.uncompressed_len() {
+ l as u64
+ } else {
+ // The field was set to -1, the actual uncompressed len is unknown.
+ // We need to decompress to say more.
+ return false;
+ };
+ // `uncompressed_len = file_data_len + optional_metadata_len`,
+ // so `file_data_len <= uncompressed_len`.
+ if uncompressed_len < other_len {
+ // Transitively, `file_data_len < other_len`.
+ // So `other_len != file_data_len` definitely.
+ return true;
+ }
+
+ if uncompressed_len == other_len + 4 {
+ // It’s possible that `file_data_len == other_len` with an empty
+ // metadata block (2 start marker bytes + 2 end marker bytes).
+ // This happens when there wouldn’t otherwise be metadata, but
+ // the first 2 bytes of file data happen to match a start marker
+ // and would be ambiguous.
+ return false;
+ }
+
+ if !self.0.has_p1() {
+ // There may or may not be copy metadata, so we can’t deduce more
+ // about `file_data_len` without computing file data.
+ return false;
+ }
+
+ // Filelog ancestry is not meaningful in the way changelog ancestry is.
+ // It only provides hints to delta generation.
+ // p1 and p2 are set to null when making a copy or rename since
+ // contents are likely unrelatedto what might have previously existed
+ // at the destination path.
+ //
+ // Conversely, since here p1 is non-null, there is no copy metadata.
+ // Note that this reasoning may be invalidated in the presence of
+ // merges made by some previous versions of Mercurial that
+ // swapped p1 and p2. See <https://bz.mercurial-scm.org/show_bug.cgi?id=6528>
+ // and `tests/test-issue6528.t`.
+ //
+ // Since copy metadata is currently the only kind of metadata
+ // kept in revlog data of filelogs,
+ // this `FilelogEntry` does not have such metadata:
+ let file_data_len = uncompressed_len;
+
+ return file_data_len != other_len;
+ }
+
pub fn data(&self) -> Result<FilelogRevisionData, HgError> {
Ok(FilelogRevisionData(self.0.data()?.into_owned()))
}
diff --git a/mercurial/revlogutils/flagutil.py b/mercurial/revlogutils/flagutil.py
--- a/mercurial/revlogutils/flagutil.py
+++ b/mercurial/revlogutils/flagutil.py
@@ -32,6 +32,7 @@
REVIDX_FLAGS_ORDER
REVIDX_RAWTEXT_CHANGING_FLAGS
+# Keep this in sync with REVIDX_KNOWN_FLAGS in rust/hg-core/src/revlog/revlog.rs
REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
# Store flag processors (cf. 'addflagprocessor()' to register)
To: SimonSapin, #hg-reviewers, Alphare
Cc: mercurial-patches
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.mercurial-scm.org/pipermail/mercurial-patches/attachments/20220107/60051730/attachment-0002.html>
More information about the Mercurial-patches
mailing list