D11408: rust: Add a Filelog struct that wraps Revlog
SimonSapin
phabricator at mercurial-scm.org
Mon Sep 13 18:14:57 UTC 2021
SimonSapin created this revision.
Herald added a reviewer: hg-reviewers.
Herald added a subscriber: mercurial-patches.
REVISION SUMMARY
Some filelog-specific logic is moved from code `rhg cat` into this struct
where it can better be reused.
Additionally, a missing end delimiter for metadata causes an error
to be returned instead of being silently ignored.
REPOSITORY
rHG Mercurial
BRANCH
default
REVISION DETAIL
https://phab.mercurial-scm.org/D11408
AFFECTED FILES
rust/hg-core/src/operations/cat.rs
rust/hg-core/src/repo.rs
rust/hg-core/src/revlog.rs
rust/hg-core/src/revlog/filelog.rs
rust/hg-core/src/utils.rs
CHANGE DETAILS
diff --git a/rust/hg-core/src/utils.rs b/rust/hg-core/src/utils.rs
--- a/rust/hg-core/src/utils.rs
+++ b/rust/hg-core/src/utils.rs
@@ -74,6 +74,7 @@
fn trim(&self) -> &Self;
fn drop_prefix(&self, needle: &Self) -> Option<&Self>;
fn split_2(&self, separator: u8) -> Option<(&[u8], &[u8])>;
+ fn split_2_by_slice(&self, separator: &[u8]) -> Option<(&[u8], &[u8])>;
}
impl SliceExt for [u8] {
@@ -134,6 +135,14 @@
let b = iter.next()?;
Some((a, b))
}
+
+ fn split_2_by_slice(&self, separator: &[u8]) -> Option<(&[u8], &[u8])> {
+ if let Some(pos) = find_slice_in_slice(self, separator) {
+ Some((&self[..pos], &self[pos + separator.len()..]))
+ } else {
+ None
+ }
+ }
}
pub trait Escaped {
diff --git a/rust/hg-core/src/revlog/filelog.rs b/rust/hg-core/src/revlog/filelog.rs
new file mode 100644
--- /dev/null
+++ b/rust/hg-core/src/revlog/filelog.rs
@@ -0,0 +1,79 @@
+use crate::errors::HgError;
+use crate::repo::Repo;
+use crate::revlog::path_encode::path_encode;
+use crate::revlog::revlog::{Revlog, RevlogError};
+use crate::revlog::NodePrefix;
+use crate::revlog::Revision;
+use crate::utils::files::get_path_from_bytes;
+use crate::utils::hg_path::HgPath;
+use crate::utils::SliceExt;
+use std::borrow::Cow;
+use std::path::PathBuf;
+
+/// A specialized `Revlog` to work with file data logs.
+pub struct Filelog {
+ /// The generic `revlog` format.
+ revlog: Revlog,
+}
+
+impl Filelog {
+ pub fn open(repo: &Repo, file_path: &HgPath) -> Result<Self, RevlogError> {
+ let index_path = store_path(file_path, b".i");
+ let data_path = store_path(file_path, b".d");
+ let revlog = Revlog::open(repo, index_path, Some(&data_path))?;
+ Ok(Self { revlog })
+ }
+
+ /// The given node ID is that of the file as found in a manifest, not of a
+ /// changeset.
+ pub fn get_node(
+ &self,
+ file_node: impl Into<NodePrefix>,
+ ) -> Result<FilelogEntry, RevlogError> {
+ let file_rev = self.revlog.get_node_rev(file_node.into())?;
+ self.get_rev(file_rev)
+ }
+
+ /// The given revision is that of the file as found in a manifest, not of a
+ /// changeset.
+ pub fn get_rev(
+ &self,
+ file_rev: Revision,
+ ) -> Result<FilelogEntry, RevlogError> {
+ let data = self.revlog.get_rev_data(file_rev)?;
+ Ok(FilelogEntry(data.into()))
+ }
+}
+
+fn store_path(hg_path: &HgPath, suffix: &[u8]) -> PathBuf {
+ let encoded_bytes =
+ path_encode(&[b"data/", hg_path.as_bytes(), suffix].concat());
+ get_path_from_bytes(&encoded_bytes).into()
+}
+
+pub struct FilelogEntry<'filelog>(Cow<'filelog, [u8]>);
+
+impl<'filelog> FilelogEntry<'filelog> {
+ /// Split into metadata and data
+ pub fn split(&self) -> Result<(Option<&[u8]>, &[u8]), HgError> {
+ const DELIMITER: &[u8; 2] = &[b'\x01', b'\n'];
+
+ if let Some(rest) = self.0.drop_prefix(DELIMITER) {
+ if let Some((metadata, data)) = rest.split_2_by_slice(DELIMITER) {
+ Ok((Some(metadata), data))
+ } else {
+ Err(HgError::corrupted(
+ "Missing metadata end delimiter in filelog entry",
+ ))
+ }
+ } else {
+ Ok((None, &self.0))
+ }
+ }
+
+ /// Returns the file contents at this revision, stripped of any metadata
+ pub fn data(&self) -> Result<&[u8], HgError> {
+ let (_metadata, data) = self.split()?;
+ Ok(data)
+ }
+}
diff --git a/rust/hg-core/src/revlog.rs b/rust/hg-core/src/revlog.rs
--- a/rust/hg-core/src/revlog.rs
+++ b/rust/hg-core/src/revlog.rs
@@ -11,6 +11,7 @@
pub mod path_encode;
pub use node::{FromHexError, Node, NodePrefix};
pub mod changelog;
+pub mod filelog;
pub mod index;
pub mod manifest;
pub mod patch;
diff --git a/rust/hg-core/src/repo.rs b/rust/hg-core/src/repo.rs
--- a/rust/hg-core/src/repo.rs
+++ b/rust/hg-core/src/repo.rs
@@ -7,8 +7,10 @@
use crate::errors::HgResultExt;
use crate::manifest::{Manifest, Manifestlog};
use crate::requirements;
+use crate::revlog::filelog::Filelog;
use crate::revlog::revlog::RevlogError;
use crate::utils::files::get_path_from_bytes;
+use crate::utils::hg_path::HgPath;
use crate::utils::SliceExt;
use crate::vfs::{is_dir, is_file, Vfs};
use crate::{exit_codes, Node};
@@ -346,6 +348,10 @@
Node::from_hex_for_repo(&changelog_entry.manifest_node()?)?;
manifest.get_node(manifest_node.into())
}
+
+ pub fn filelog(&self, path: &HgPath) -> Result<Filelog, RevlogError> {
+ Filelog::open(self, path)
+ }
}
/// Lazily-initialized component of `Repo` with interior mutability
diff --git a/rust/hg-core/src/operations/cat.rs b/rust/hg-core/src/operations/cat.rs
--- a/rust/hg-core/src/operations/cat.rs
+++ b/rust/hg-core/src/operations/cat.rs
@@ -5,15 +5,11 @@
// This software may be used and distributed according to the terms of the
// GNU General Public License version 2 or any later version.
-use std::path::PathBuf;
-
use crate::repo::Repo;
-use crate::revlog::path_encode::path_encode;
-use crate::revlog::revlog::Revlog;
use crate::revlog::revlog::RevlogError;
use crate::revlog::Node;
-use crate::utils::files::get_path_from_bytes;
-use crate::utils::hg_path::{HgPath, HgPathBuf};
+
+use crate::utils::hg_path::HgPathBuf;
pub struct CatOutput {
/// Whether any file in the manifest matched the paths given as CLI
@@ -27,8 +23,6 @@
pub node: Node,
}
-const METADATA_DELIMITER: [u8; 2] = [b'\x01', b'\n'];
-
/// Output the given revision of files
///
/// * `root`: Repository root
@@ -54,26 +48,10 @@
if cat_file.as_bytes() == manifest_file.as_bytes() {
*is_matched = true;
found_any = true;
- let index_path = store_path(manifest_file, b".i");
- let data_path = store_path(manifest_file, b".d");
-
- let file_log =
- Revlog::open(repo, &index_path, Some(&data_path))?;
+ let file_log = repo.filelog(manifest_file)?;
let file_node = Node::from_hex_for_repo(node_bytes)?;
- let file_rev = file_log.get_node_rev(file_node.into())?;
- let data = file_log.get_rev_data(file_rev)?;
- if data.starts_with(&METADATA_DELIMITER) {
- let end_delimiter_position = data
- [METADATA_DELIMITER.len()..]
- .windows(METADATA_DELIMITER.len())
- .position(|bytes| bytes == METADATA_DELIMITER);
- if let Some(position) = end_delimiter_position {
- let offset = METADATA_DELIMITER.len() * 2;
- bytes.extend(data[position + offset..].iter());
- }
- } else {
- bytes.extend(data);
- }
+ let entry = file_log.get_node(file_node)?;
+ bytes.extend(entry.data()?)
}
}
}
@@ -91,9 +69,3 @@
node,
})
}
-
-fn store_path(hg_path: &HgPath, suffix: &[u8]) -> PathBuf {
- let encoded_bytes =
- path_encode(&[b"data/", hg_path.as_bytes(), suffix].concat());
- get_path_from_bytes(&encoded_bytes).into()
-}
To: SimonSapin, #hg-reviewers
Cc: mercurial-patches, mercurial-devel
More information about the Mercurial-devel
mailing list