D11961: rhg: Expose FilelogEntry that wraps RevlogEntry
SimonSapin
phabricator at mercurial-scm.org
Thu Jan 6 18:57:39 UTC 2022
SimonSapin created this revision.
Herald added a reviewer: hg-reviewers.
Herald added a subscriber: mercurial-patches.
REVISION SUMMARY
This can be later extended to access metadata such as `uncompressed_len` without
necessarily resolving deltas.
REPOSITORY
rHG Mercurial
BRANCH
default
REVISION DETAIL
https://phab.mercurial-scm.org/D11961
AFFECTED FILES
rust/hg-core/src/revlog/filelog.rs
rust/hg-core/src/revlog/revlog.rs
rust/rhg/src/commands/status.rs
CHANGE DETAILS
diff --git a/rust/rhg/src/commands/status.rs b/rust/rhg/src/commands/status.rs
--- a/rust/rhg/src/commands/status.rs
+++ b/rust/rhg/src/commands/status.rs
@@ -512,17 +512,18 @@
}
let filelog = repo.filelog(hg_path)?;
let fs_len = fs_metadata.len();
+ let filelog_entry =
+ filelog.entry_for_node(entry.node_id()?).map_err(|_| {
+ HgError::corrupted("filelog missing node from manifest")
+ })?;
// TODO: check `fs_len` here like below, but based on
// `RevlogEntry::uncompressed_len` without decompressing the full filelog
// contents where possible. This is only valid if the revlog data does not
// contain metadata. See how Pythonâs `revlog.rawsize` calls
// `storageutil.filerevisioncopied`.
// (Maybe also check for content-modifying flags? See `revlog.size`.)
- let filelog_entry =
- filelog.data_for_node(entry.node_id()?).map_err(|_| {
- HgError::corrupted("filelog missing node from manifest")
- })?;
- let contents_in_p1 = filelog_entry.file_data()?;
+ let filelog_data = filelog_entry.data()?;
+ let contents_in_p1 = filelog_data.file_data()?;
if contents_in_p1.len() as u64 != fs_len {
// No need to read the file contents:
// it cannot be equal if it has a different length.
diff --git a/rust/hg-core/src/revlog/revlog.rs b/rust/hg-core/src/revlog/revlog.rs
--- a/rust/hg-core/src/revlog/revlog.rs
+++ b/rust/hg-core/src/revlog/revlog.rs
@@ -39,9 +39,13 @@
}
}
+fn corrupted() -> HgError {
+ HgError::corrupted("corrupted revlog")
+}
+
impl RevlogError {
fn corrupted() -> Self {
- RevlogError::Other(HgError::corrupted("corrupted revlog"))
+ RevlogError::Other(corrupted())
}
}
@@ -191,7 +195,7 @@
if rev == NULL_REVISION {
return Ok(Cow::Borrowed(&[]));
};
- self.get_entry(rev)?.data()
+ Ok(self.get_entry(rev)?.data()?)
}
/// Check the hash of some given data against the recorded hash.
@@ -222,13 +226,13 @@
fn build_data_from_deltas(
snapshot: RevlogEntry,
deltas: &[RevlogEntry],
- ) -> Result<Vec<u8>, RevlogError> {
+ ) -> Result<Vec<u8>, HgError> {
let snapshot = snapshot.data_chunk()?;
let deltas = deltas
.iter()
.rev()
.map(RevlogEntry::data_chunk)
- .collect::<Result<Vec<Cow<'_, [u8]>>, RevlogError>>()?;
+ .collect::<Result<Vec<_>, _>>()?;
let patches: Vec<_> =
deltas.iter().map(|d| patch::PatchList::new(d)).collect();
let patch = patch::fold_patch_lists(&patches);
@@ -246,7 +250,10 @@
}
/// Get an entry of the revlog.
- fn get_entry(&self, rev: Revision) -> Result<RevlogEntry, RevlogError> {
+ pub fn get_entry(
+ &self,
+ rev: Revision,
+ ) -> Result<RevlogEntry, RevlogError> {
let index_entry = self
.index
.get_entry(rev)
@@ -281,8 +288,8 @@
fn get_entry_internal(
&self,
rev: Revision,
- ) -> Result<RevlogEntry, RevlogError> {
- return self.get_entry(rev).map_err(|_| RevlogError::corrupted());
+ ) -> Result<RevlogEntry, HgError> {
+ return self.get_entry(rev).map_err(|_| corrupted());
}
}
@@ -304,7 +311,7 @@
}
/// The data for this entry, after resolving deltas if any.
- pub fn data(&self) -> Result<Cow<'a, [u8]>, RevlogError> {
+ pub fn data(&self) -> Result<Cow<'a, [u8]>, HgError> {
let mut entry = self.clone();
let mut delta_chain = vec![];
@@ -328,7 +335,7 @@
.revlog
.index
.get_entry(self.rev)
- .ok_or(RevlogError::InvalidRevision)?;
+ .ok_or_else(corrupted)?;
let data = if delta_chain.is_empty() {
entry.data_chunk()?
@@ -344,13 +351,13 @@
) {
Ok(data)
} else {
- Err(RevlogError::corrupted())
+ Err(corrupted())
}
}
/// Extract the data contained in the entry.
/// This may be a delta. (See `is_delta`.)
- fn data_chunk(&self) -> Result<Cow<'a, [u8]>, RevlogError> {
+ fn data_chunk(&self) -> Result<Cow<'a, [u8]>, HgError> {
if self.bytes.is_empty() {
return Ok(Cow::Borrowed(&[]));
}
@@ -365,39 +372,35 @@
// zstd data.
b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data()?)),
// A proper new format should have had a repo/store requirement.
- _format_type => Err(RevlogError::corrupted()),
+ _format_type => Err(corrupted()),
}
}
- fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, RevlogError> {
+ fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, HgError> {
let mut decoder = ZlibDecoder::new(self.bytes);
if self.is_delta() {
let mut buf = Vec::with_capacity(self.compressed_len);
- decoder
- .read_to_end(&mut buf)
- .map_err(|_| RevlogError::corrupted())?;
+ decoder.read_to_end(&mut buf).map_err(|_| corrupted())?;
Ok(buf)
} else {
let mut buf = vec![0; self.uncompressed_len];
- decoder
- .read_exact(&mut buf)
- .map_err(|_| RevlogError::corrupted())?;
+ decoder.read_exact(&mut buf).map_err(|_| corrupted())?;
Ok(buf)
}
}
- fn uncompressed_zstd_data(&self) -> Result<Vec<u8>, RevlogError> {
+ fn uncompressed_zstd_data(&self) -> Result<Vec<u8>, HgError> {
if self.is_delta() {
let mut buf = Vec::with_capacity(self.compressed_len);
zstd::stream::copy_decode(self.bytes, &mut buf)
- .map_err(|_| RevlogError::corrupted())?;
+ .map_err(|_| corrupted())?;
Ok(buf)
} else {
let mut buf = vec![0; self.uncompressed_len];
let len = zstd::block::decompress_to_buffer(self.bytes, &mut buf)
- .map_err(|_| RevlogError::corrupted())?;
+ .map_err(|_| corrupted())?;
if len != self.uncompressed_len {
- Err(RevlogError::corrupted())
+ Err(corrupted())
} else {
Ok(buf)
}
diff --git a/rust/hg-core/src/revlog/filelog.rs b/rust/hg-core/src/revlog/filelog.rs
--- a/rust/hg-core/src/revlog/filelog.rs
+++ b/rust/hg-core/src/revlog/filelog.rs
@@ -1,6 +1,7 @@
use crate::errors::HgError;
use crate::repo::Repo;
use crate::revlog::path_encode::path_encode;
+use crate::revlog::revlog::RevlogEntry;
use crate::revlog::revlog::{Revlog, RevlogError};
use crate::revlog::NodePrefix;
use crate::revlog::Revision;
@@ -23,7 +24,7 @@
Ok(Self { revlog })
}
- /// The given node ID is that of the file as found in a manifest, not of a
+ /// The given node ID is that of the file as found in a filelog, not of a
/// changeset.
pub fn data_for_node(
&self,
@@ -33,7 +34,7 @@
self.data_for_rev(file_rev)
}
- /// The given revision is that of the file as found in a manifest, not of a
+ /// The given revision is that of the file as found in a filelog, not of a
/// changeset.
pub fn data_for_rev(
&self,
@@ -42,6 +43,25 @@
let data: Vec<u8> = self.revlog.get_rev_data(file_rev)?.into_owned();
Ok(FilelogRevisionData(data.into()))
}
+
+ /// The given node ID is that of the file as found in a filelog, not of a
+ /// changeset.
+ pub fn entry_for_node(
+ &self,
+ file_node: impl Into<NodePrefix>,
+ ) -> Result<FilelogEntry, RevlogError> {
+ let file_rev = self.revlog.rev_from_node(file_node.into())?;
+ self.entry_for_rev(file_rev)
+ }
+
+ /// The given revision is that of the file as found in a filelog, not of a
+ /// changeset.
+ pub fn entry_for_rev(
+ &self,
+ file_rev: Revision,
+ ) -> Result<FilelogEntry, RevlogError> {
+ Ok(FilelogEntry(self.revlog.get_entry(file_rev)?))
+ }
}
fn store_path(hg_path: &HgPath, suffix: &[u8]) -> PathBuf {
@@ -50,6 +70,14 @@
get_path_from_bytes(&encoded_bytes).into()
}
+pub struct FilelogEntry<'a>(RevlogEntry<'a>);
+
+impl FilelogEntry<'_> {
+ pub fn data(&self) -> Result<FilelogRevisionData, HgError> {
+ Ok(FilelogRevisionData(self.0.data()?.into_owned()))
+ }
+}
+
/// The data for one revision in a filelog, uncompressed and delta-resolved.
pub struct FilelogRevisionData(Vec<u8>);
To: SimonSapin, #hg-reviewers
Cc: mercurial-patches, mercurial-devel
More information about the Mercurial-devel
mailing list