[Updated] [+++- ] D12439: rhg: start parsing changeset data
martinvonz (Martin von Zweigbergk)
phabricator at mercurial-scm.org
Fri Apr 8 22:10:13 UTC 2022
martinvonz updated this revision to Diff 32988.
REPOSITORY
rHG Mercurial
CHANGES SINCE LAST UPDATE
https://phab.mercurial-scm.org/D12439?vs=32867&id=32988
BRANCH
default
CHANGES SINCE LAST ACTION
https://phab.mercurial-scm.org/D12439/new/
REVISION DETAIL
https://phab.mercurial-scm.org/D12439
AFFECTED FILES
rust/hg-core/src/revlog/changelog.rs
CHANGE DETAILS
diff --git a/rust/hg-core/src/revlog/changelog.rs b/rust/hg-core/src/revlog/changelog.rs
--- a/rust/hg-core/src/revlog/changelog.rs
+++ b/rust/hg-core/src/revlog/changelog.rs
@@ -3,6 +3,10 @@
use crate::revlog::revlog::{Revlog, RevlogError};
use crate::revlog::Revision;
use crate::revlog::{Node, NodePrefix};
+use crate::utils::hg_path::HgPath;
+use itertools::Itertools;
+use std::ascii::escape_default;
+use std::fmt::{Debug, Formatter};
/// A specialized `Revlog` to work with `changelog` data format.
pub struct Changelog {
@@ -35,7 +39,12 @@
if bytes.is_empty() {
Ok(ChangelogRevisionData::null())
} else {
- Ok(ChangelogRevisionData::new(bytes))
+ Ok(ChangelogRevisionData::new(bytes).ok_or_else(|| {
+ RevlogError::Other(HgError::CorruptedRepository(format!(
+ "Invalid changelog data for revision {}",
+ rev
+ )))
+ })?)
}
}
@@ -45,21 +54,60 @@
}
/// `Changelog` entry which knows how to interpret the `changelog` data bytes.
-#[derive(Debug)]
+#[derive(PartialEq)]
pub struct ChangelogRevisionData {
/// The data bytes of the `changelog` entry.
bytes: Vec<u8>,
+ /// The end offset for the hex manifest (not including the newline)
+ manifest_end: usize,
+ /// The end offset for the user+email (not including the newline)
+ user_end: usize,
+ /// The end offset for the timestamp+timezone+extras (not including the
+ /// newline)
+ timestamp_end: usize,
+ /// The end offset for the file list (not including the newline)
+ files_end: usize,
}
impl ChangelogRevisionData {
- fn new(bytes: Vec<u8>) -> Self {
- Self { bytes }
+ fn new(bytes: Vec<u8>) -> Option<Self> {
+ let mut line_iter = bytes.split(|b| b == &b'\n');
+ let manifest_end = line_iter.next().unwrap().len();
+ let user_slice = line_iter.next()?;
+ let user_end = manifest_end + 1 + user_slice.len();
+ let timestamp_slice = line_iter.next()?;
+ let timestamp_end = user_end + 1 + timestamp_slice.len();
+ let mut files_end = timestamp_end + 1;
+ loop {
+ // This line intentionally returns `None` if the list does not end
+ // in a newline
+ let line = line_iter.next()?;
+ if line.is_empty() {
+ if files_end == bytes.len() {
+ // The list of files ended with a single newline (there
+ // should be two)
+ return None;
+ }
+ files_end -= 1;
+ break;
+ }
+ files_end += line.len() + 1;
+ }
+
+ Some(Self {
+ bytes,
+ manifest_end,
+ user_end,
+ timestamp_end,
+ files_end,
+ })
}
fn null() -> Self {
Self::new(
b"0000000000000000000000000000000000000000\n\n0 0\n\n".to_vec(),
)
+ .unwrap()
}
/// Return an iterator over the lines of the entry.
@@ -70,7 +118,130 @@
/// Return the node id of the `manifest` referenced by this `changelog`
/// entry.
pub fn manifest_node(&self) -> Result<Node, HgError> {
- let manifest_node_hex = self.lines().next().unwrap();
+ let manifest_node_hex = &self.bytes[..self.manifest_end];
Node::from_hex_for_repo(manifest_node_hex)
}
+
+ /// The full user string (usually a name followed by an email enclosed in
+ /// angle brackets)
+ pub fn user(&self) -> &[u8] {
+ &self.bytes[self.manifest_end + 1..self.user_end]
+ }
+
+ /// The full timestamp line (timestamp in seconds, offset in seconds, and
+ /// possibly extras)
+ // TODO: We should expose this in a more useful way
+ pub fn timestamp_line(&self) -> &[u8] {
+ &self.bytes[self.user_end + 1..self.timestamp_end]
+ }
+
+ /// The files changed in this revision.
+ pub fn files(&self) -> impl Iterator<Item = &HgPath> {
+ self.bytes[self.timestamp_end + 1..self.files_end]
+ .split(|b| b == &b'\n')
+ .map(|path| HgPath::new(path))
+ }
+
+ /// The change description.
+ pub fn description(&self) -> &[u8] {
+ &self.bytes[self.files_end + 2..]
+ }
}
+
+impl Debug for ChangelogRevisionData {
+ fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+ f.debug_struct("ChangelogRevisionData")
+ .field("bytes", &debug_bytes(&self.bytes))
+ .field("manifest", &debug_bytes(&self.bytes[..self.manifest_end]))
+ .field(
+ "user",
+ &debug_bytes(
+ &self.bytes[self.manifest_end + 1..self.user_end],
+ ),
+ )
+ .field(
+ "timestamp",
+ &debug_bytes(
+ &self.bytes[self.user_end + 1..self.timestamp_end],
+ ),
+ )
+ .field(
+ "files",
+ &debug_bytes(
+ &self.bytes[self.timestamp_end + 1..self.files_end],
+ ),
+ )
+ .field(
+ "description",
+ &debug_bytes(&self.bytes[self.files_end + 2..]),
+ )
+ .finish()
+ }
+}
+
+fn debug_bytes(bytes: &[u8]) -> String {
+ String::from_utf8_lossy(
+ &bytes.iter().flat_map(|b| escape_default(*b)).collect_vec(),
+ )
+ .to_string()
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use itertools::Itertools;
+ use pretty_assertions::assert_eq;
+
+ #[test]
+ fn test_create_changelogrevisiondata_invalid() {
+ // Completely empty
+ assert_eq!(ChangelogRevisionData::new(b"abcd".to_vec()), None);
+ // No newline after manifest
+ assert_eq!(ChangelogRevisionData::new(b"abcd".to_vec()), None);
+ // No newline after user
+ assert_eq!(ChangelogRevisionData::new(b"abcd\n".to_vec()), None);
+ // No newline after timestamp
+ assert_eq!(ChangelogRevisionData::new(b"abcd\n\n0 0".to_vec()), None);
+ // Missing newline after files
+ assert_eq!(
+ ChangelogRevisionData::new(b"abcd\n\n0 0\nfile1\nfile2".to_vec()),
+ None
+ );
+ // Only one newline after files
+ assert_eq!(
+ ChangelogRevisionData::new(
+ b"abcd\n\n0 0\nfile1\nfile2\n".to_vec()
+ ),
+ None
+ );
+ }
+
+ #[test]
+ fn test_create_changelogrevisiondata() {
+ let data = ChangelogRevisionData::new(
+ b"0123456789abcdef0123456789abcdef01234567
+Some One <someone at example.com>
+0 0
+file1
+file2
+
+some
+commit
+message"
+ .to_vec(),
+ )
+ .unwrap();
+ assert_eq!(
+ data.manifest_node().unwrap(),
+ Node::from_hex("0123456789abcdef0123456789abcdef01234567")
+ .unwrap()
+ );
+ assert_eq!(data.user(), b"Some One <someone at example.com>");
+ assert_eq!(data.timestamp_line(), b"0 0");
+ assert_eq!(
+ data.files().collect_vec(),
+ vec![HgPath::new("file1"), HgPath::new("file2")]
+ );
+ assert_eq!(data.description(), b"some\ncommit\nmessage");
+ }
+}
To: martinvonz, #hg-reviewers, Alphare
Cc: Alphare, mercurial-patches
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.mercurial-scm.org/pipermail/mercurial-patches/attachments/20220408/ed3a49f6/attachment-0002.html>
More information about the Mercurial-patches
mailing list