[Request] [+-- ] D11772: rhg: Also parse flags in the manifest parser

SimonSapin phabricator at mercurial-scm.org
Tue Nov 23 19:11:44 UTC 2021


SimonSapin created this revision.
Herald added a reviewer: hg-reviewers.
Herald added a subscriber: mercurial-patches.

REPOSITORY
  rHG Mercurial

BRANCH
  default

REVISION DETAIL
  https://phab.mercurial-scm.org/D11772

AFFECTED FILES
  rust/hg-core/src/operations/cat.rs
  rust/hg-core/src/operations/list_tracked_files.rs
  rust/hg-core/src/revlog/manifest.rs
  rust/rhg/src/commands/status.rs

CHANGE DETAILS

diff --git a/rust/rhg/src/commands/status.rs b/rust/rhg/src/commands/status.rs
--- a/rust/rhg/src/commands/status.rs
+++ b/rust/rhg/src/commands/status.rs
@@ -309,13 +309,14 @@
     manifest: &Manifest,
     hg_path: &HgPath,
 ) -> Result<bool, HgError> {
-    let file_node = manifest
+    let entry = manifest
         .find_file(hg_path)?
         .expect("ambgious file not in p1");
     let filelog = repo.filelog(hg_path)?;
-    let filelog_entry = filelog.data_for_node(file_node).map_err(|_| {
-        HgError::corrupted("filelog missing node from manifest")
-    })?;
+    let filelog_entry =
+        filelog.data_for_node(entry.node_id()?).map_err(|_| {
+            HgError::corrupted("filelog missing node from manifest")
+        })?;
     let contents_in_p1 = filelog_entry.data()?;
 
     let fs_path = hg_path_to_os_string(hg_path).expect("HgPath conversion");
diff --git a/rust/hg-core/src/revlog/manifest.rs b/rust/hg-core/src/revlog/manifest.rs
--- a/rust/hg-core/src/revlog/manifest.rs
+++ b/rust/hg-core/src/revlog/manifest.rs
@@ -4,6 +4,7 @@
 use crate::revlog::Revision;
 use crate::revlog::{Node, NodePrefix};
 use crate::utils::hg_path::HgPath;
+use crate::utils::SliceExt;
 
 /// A specialized `Revlog` to work with `manifest` data format.
 pub struct Manifestlog {
@@ -55,50 +56,64 @@
 }
 
 impl Manifest {
-    /// Return an iterator over the lines of the entry.
-    pub fn lines(&self) -> impl Iterator<Item = &[u8]> {
+    pub fn iter(
+        &self,
+    ) -> impl Iterator<Item = Result<ManifestEntry, HgError>> {
         self.bytes
             .split(|b| b == &b'\n')
             .filter(|line| !line.is_empty())
-    }
-
-    /// Return an iterator over the files of the entry.
-    pub fn files(&self) -> impl Iterator<Item = Result<&HgPath, HgError>> {
-        self.lines().filter(|line| !line.is_empty()).map(|line| {
-            let pos =
-                line.iter().position(|x| x == &b'\0').ok_or_else(|| {
+            .map(|line| {
+                let (path, rest) = line.split_2(b'\0').ok_or_else(|| {
                     HgError::corrupted("manifest line should contain \\0")
                 })?;
-            Ok(HgPath::new(&line[..pos]))
-        })
-    }
-
-    /// Return an iterator over the files of the entry.
-    pub fn files_with_nodes(
-        &self,
-    ) -> impl Iterator<Item = Result<(&HgPath, &[u8]), HgError>> {
-        self.lines().filter(|line| !line.is_empty()).map(|line| {
-            let pos =
-                line.iter().position(|x| x == &b'\0').ok_or_else(|| {
-                    HgError::corrupted("manifest line should contain \\0")
-                })?;
-            let hash_start = pos + 1;
-            let hash_end = hash_start + 40;
-            Ok((HgPath::new(&line[..pos]), &line[hash_start..hash_end]))
-        })
+                let path = HgPath::new(path);
+                let (hex_node_id, flags) = match rest.split_last() {
+                    Some((&b'x', rest)) => (rest, Some(b'x')),
+                    Some((&b'l', rest)) => (rest, Some(b'l')),
+                    Some((&b't', rest)) => (rest, Some(b't')),
+                    _ => (rest, None),
+                };
+                Ok(ManifestEntry {
+                    path,
+                    hex_node_id,
+                    flags,
+                })
+            })
     }
 
     /// If the given path is in this manifest, return its filelog node ID
-    pub fn find_file(&self, path: &HgPath) -> Result<Option<Node>, HgError> {
+    pub fn find_file(
+        &self,
+        path: &HgPath,
+    ) -> Result<Option<ManifestEntry>, HgError> {
         // TODO: use binary search instead of linear scan. This may involve
         // building (and caching) an index of the byte indicex of each manifest
         // line.
-        for entry in self.files_with_nodes() {
-            let (manifest_path, node) = entry?;
-            if manifest_path == path {
-                return Ok(Some(Node::from_hex_for_repo(node)?));
+
+        // TODO: use try_find when available (if still using linear scan)
+        // https://github.com/rust-lang/rust/issues/63178
+        for entry in self.iter() {
+            let entry = entry?;
+            if entry.path == path {
+                return Ok(Some(entry));
             }
         }
         Ok(None)
     }
 }
+
+/// `Manifestlog` entry which knows how to interpret the `manifest` data bytes.
+#[derive(Debug)]
+pub struct ManifestEntry<'manifest> {
+    pub path: &'manifest HgPath,
+    pub hex_node_id: &'manifest [u8],
+
+    /// `Some` values are b'x', b'l', or 't'
+    pub flags: Option<u8>,
+}
+
+impl ManifestEntry<'_> {
+    pub fn node_id(&self) -> Result<Node, HgError> {
+        Node::from_hex_for_repo(self.hex_node_id)
+    }
+}
diff --git a/rust/hg-core/src/operations/list_tracked_files.rs b/rust/hg-core/src/operations/list_tracked_files.rs
--- a/rust/hg-core/src/operations/list_tracked_files.rs
+++ b/rust/hg-core/src/operations/list_tracked_files.rs
@@ -77,6 +77,6 @@
 
 impl FilesForRev {
     pub fn iter(&self) -> impl Iterator<Item = Result<&HgPath, HgError>> {
-        self.0.files()
+        self.0.iter().map(|entry| Ok(entry?.path))
     }
 }
diff --git a/rust/hg-core/src/operations/cat.rs b/rust/hg-core/src/operations/cat.rs
--- a/rust/hg-core/src/operations/cat.rs
+++ b/rust/hg-core/src/operations/cat.rs
@@ -12,6 +12,8 @@
 use crate::utils::hg_path::HgPath;
 
 use crate::errors::HgError;
+use crate::manifest::Manifest;
+use crate::manifest::ManifestEntry;
 use itertools::put_back;
 use itertools::PutBack;
 use std::cmp::Ordering;
@@ -29,39 +31,33 @@
 }
 
 // Find an item in an iterator over a sorted collection.
-fn find_item<'a, D, I: Iterator<Item = Result<(&'a HgPath, D), HgError>>>(
-    i: &mut PutBack<I>,
+fn find_item<'a>(
+    i: &mut PutBack<impl Iterator<Item = Result<ManifestEntry<'a>, HgError>>>,
     needle: &HgPath,
-) -> Result<Option<D>, HgError> {
+) -> Result<Option<Node>, HgError> {
     loop {
         match i.next() {
             None => return Ok(None),
             Some(result) => {
-                let (path, value) = result?;
-                match needle.as_bytes().cmp(path.as_bytes()) {
+                let entry = result?;
+                match needle.as_bytes().cmp(entry.path.as_bytes()) {
                     Ordering::Less => {
-                        i.put_back(Ok((path, value)));
+                        i.put_back(Ok(entry));
                         return Ok(None);
                     }
                     Ordering::Greater => continue,
-                    Ordering::Equal => return Ok(Some(value)),
+                    Ordering::Equal => return Ok(Some(entry.node_id()?)),
                 }
             }
         }
     }
 }
 
-fn find_files_in_manifest<
-    'manifest,
-    'query,
-    Data,
-    Manifest: Iterator<Item = Result<(&'manifest HgPath, Data), HgError>>,
-    Query: Iterator<Item = &'query HgPath>,
->(
-    manifest: Manifest,
-    query: Query,
-) -> Result<(Vec<(&'query HgPath, Data)>, Vec<&'query HgPath>), HgError> {
-    let mut manifest = put_back(manifest);
+fn find_files_in_manifest<'query>(
+    manifest: &Manifest,
+    query: impl Iterator<Item = &'query HgPath>,
+) -> Result<(Vec<(&'query HgPath, Node)>, Vec<&'query HgPath>), HgError> {
+    let mut manifest = put_back(manifest.iter());
     let mut res = vec![];
     let mut missing = vec![];
 
@@ -96,14 +92,13 @@
     files.sort_unstable();
 
     let (found, missing) = find_files_in_manifest(
-        manifest.files_with_nodes(),
+        &manifest,
         files.into_iter().map(|f| f.as_ref()),
     )?;
 
-    for (file_path, node_bytes) in found {
+    for (file_path, file_node) in found {
         found_any = true;
         let file_log = repo.filelog(file_path)?;
-        let file_node = Node::from_hex_for_repo(node_bytes)?;
         results.push((
             file_path,
             file_log.data_for_node(file_node)?.into_data()?,



To: SimonSapin, #hg-reviewers
Cc: mercurial-patches, mercurial-devel
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mercurial-scm.org/pipermail/mercurial-patches/attachments/20211123/f4863a7c/attachment-0001.html>


More information about the Mercurial-patches mailing list