[Updated] D11411: rhg: Reuse manifest when checking status of multiple ambiguous files

SimonSapin phabricator at mercurial-scm.org
Tue Sep 14 08:21:55 UTC 2021


Closed by commit rHG796206e74b10: rhg: Reuse manifest when checking status of multiple ambiguous files (authored by SimonSapin).
This revision was automatically updated to reflect the committed changes.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D11411?vs=30229&id=30239

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D11411/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D11411

AFFECTED FILES
  rust/hg-core/src/operations/cat.rs
  rust/hg-core/src/operations/list_tracked_files.rs
  rust/hg-core/src/repo.rs
  rust/hg-core/src/revlog/changelog.rs
  rust/hg-core/src/revlog/manifest.rs
  rust/rhg/src/commands/status.rs

CHANGE DETAILS

diff --git a/rust/rhg/src/commands/status.rs b/rust/rhg/src/commands/status.rs
--- a/rust/rhg/src/commands/status.rs
+++ b/rust/rhg/src/commands/status.rs
@@ -10,13 +10,11 @@
 use clap::{Arg, SubCommand};
 use hg;
 use hg::dirstate_tree::dispatch::DirstateMapMethods;
-use hg::errors::IoResultExt;
+use hg::errors::{HgError, IoResultExt};
+use hg::manifest::Manifest;
 use hg::matchers::AlwaysMatcher;
-use hg::operations::cat;
 use hg::repo::Repo;
-use hg::revlog::node::Node;
 use hg::utils::hg_path::{hg_path_to_os_string, HgPath};
-use hg::StatusError;
 use hg::{HgPathCow, StatusOptions};
 use log::{info, warn};
 use std::convert::TryInto;
@@ -203,10 +201,12 @@
     if !ds_status.unsure.is_empty()
         && (display_states.modified || display_states.clean)
     {
-        let p1: Node = repo.dirstate_parents()?.p1.into();
-        let p1_hex = format!("{:x}", p1);
+        let p1 = repo.dirstate_parents()?.p1;
+        let manifest = repo.manifest_for_node(p1).map_err(|e| {
+            CommandError::from((e, &*format!("{:x}", p1.short())))
+        })?;
         for to_check in ds_status.unsure {
-            if cat_file_is_modified(repo, &to_check, &p1_hex)? {
+            if cat_file_is_modified(repo, &manifest, &to_check)? {
                 if display_states.modified {
                     ds_status.modified.push(to_check);
                 }
@@ -267,21 +267,22 @@
 /// TODO: detect permission bits and similar metadata modifications
 fn cat_file_is_modified(
     repo: &Repo,
+    manifest: &Manifest,
     hg_path: &HgPath,
-    rev: &str,
-) -> Result<bool, CommandError> {
-    // TODO CatRev expects &[HgPathBuf], something like
-    // &[impl Deref<HgPath>] would be nicer and should avoid the copy
-    let path_bufs = [hg_path.into()];
-    // TODO IIUC CatRev returns a simple Vec<u8> for all files
-    //      being able to tell them apart as (path, bytes) would be nicer
-    //      and OPTIM would allow manifest resolution just once.
-    let output = cat(repo, rev, &path_bufs).map_err(|e| (e, rev))?;
+) -> Result<bool, HgError> {
+    let file_node = manifest
+        .find_file(hg_path)?
+        .expect("ambgious file not in p1");
+    let filelog = repo.filelog(hg_path)?;
+    let filelog_entry = filelog.get_node(file_node).map_err(|_| {
+        HgError::corrupted("filelog missing node from manifest")
+    })?;
+    let contents_in_p1 = filelog_entry.data()?;
 
     let fs_path = repo
         .working_directory_vfs()
         .join(hg_path_to_os_string(hg_path).expect("HgPath conversion"));
-    let hg_data_len: u64 = match output.concatenated.len().try_into() {
+    let hg_data_len: u64 = match contents_in_p1.len().try_into() {
         Ok(v) => v,
         Err(_) => {
             // conversion of data length to u64 failed,
@@ -290,14 +291,12 @@
         }
     };
     let fobj = fs::File::open(&fs_path).when_reading_file(&fs_path)?;
-    if fobj.metadata().map_err(|e| StatusError::from(e))?.len() != hg_data_len
-    {
+    if fobj.metadata().when_reading_file(&fs_path)?.len() != hg_data_len {
         return Ok(true);
     }
-    for (fs_byte, hg_byte) in
-        BufReader::new(fobj).bytes().zip(output.concatenated)
+    for (fs_byte, &hg_byte) in BufReader::new(fobj).bytes().zip(contents_in_p1)
     {
-        if fs_byte.map_err(|e| StatusError::from(e))? != hg_byte {
+        if fs_byte.when_reading_file(&fs_path)? != hg_byte {
             return Ok(true);
         }
     }
diff --git a/rust/hg-core/src/revlog/manifest.rs b/rust/hg-core/src/revlog/manifest.rs
--- a/rust/hg-core/src/revlog/manifest.rs
+++ b/rust/hg-core/src/revlog/manifest.rs
@@ -1,8 +1,8 @@
 use crate::errors::HgError;
 use crate::repo::Repo;
 use crate::revlog::revlog::{Revlog, RevlogError};
-use crate::revlog::NodePrefix;
 use crate::revlog::Revision;
+use crate::revlog::{Node, NodePrefix};
 use crate::utils::hg_path::HgPath;
 
 /// A specialized `Revlog` to work with `manifest` data format.
@@ -68,4 +68,17 @@
             (HgPath::new(&line[..pos]), &line[hash_start..hash_end])
         })
     }
+
+    /// If the given path is in this manifest, return its filelog node ID
+    pub fn find_file(&self, path: &HgPath) -> Result<Option<Node>, HgError> {
+        // TODO: use binary search instead of linear scan. This may involve
+        // building (and caching) an index of the byte indicex of each manifest
+        // line.
+        for (manifest_path, node) in self.files_with_nodes() {
+            if manifest_path == path {
+                return Ok(Some(Node::from_hex_for_repo(node)?));
+            }
+        }
+        Ok(None)
+    }
 }
diff --git a/rust/hg-core/src/revlog/changelog.rs b/rust/hg-core/src/revlog/changelog.rs
--- a/rust/hg-core/src/revlog/changelog.rs
+++ b/rust/hg-core/src/revlog/changelog.rs
@@ -57,9 +57,11 @@
 
     /// Return the node id of the `manifest` referenced by this `changelog`
     /// entry.
-    pub fn manifest_node(&self) -> Result<&[u8], RevlogError> {
-        self.lines()
-            .next()
-            .ok_or_else(|| HgError::corrupted("empty changelog entry").into())
+    pub fn manifest_node(&self) -> Result<Node, HgError> {
+        Node::from_hex_for_repo(
+            self.lines()
+                .next()
+                .ok_or_else(|| HgError::corrupted("empty changelog entry"))?,
+        )
     }
 }
diff --git a/rust/hg-core/src/repo.rs b/rust/hg-core/src/repo.rs
--- a/rust/hg-core/src/repo.rs
+++ b/rust/hg-core/src/repo.rs
@@ -5,15 +5,15 @@
 use crate::dirstate_tree::owning::OwningDirstateMap;
 use crate::errors::HgError;
 use crate::errors::HgResultExt;
+use crate::exit_codes;
 use crate::manifest::{Manifest, Manifestlog};
-use crate::requirements;
 use crate::revlog::filelog::Filelog;
 use crate::revlog::revlog::RevlogError;
 use crate::utils::files::get_path_from_bytes;
 use crate::utils::hg_path::HgPath;
 use crate::utils::SliceExt;
 use crate::vfs::{is_dir, is_file, Vfs};
-use crate::{exit_codes, Node};
+use crate::{requirements, NodePrefix};
 use crate::{DirstateError, Revision};
 use std::cell::{Cell, Ref, RefCell, RefMut};
 use std::collections::HashSet;
@@ -336,17 +336,27 @@
         self.manifestlog.get_mut_or_init(self)
     }
 
+    /// Returns the manifest of the given node ID
+    pub fn manifest_for_node(
+        &self,
+        node: impl Into<NodePrefix>,
+    ) -> Result<Manifest, RevlogError> {
+        self.manifestlog()?.get_node(
+            self.changelog()?
+                .get_node(node.into())?
+                .manifest_node()?
+                .into(),
+        )
+    }
+
     /// Returns the manifest of the given revision
-    pub fn manifest(
+    pub fn manifest_for_rev(
         &self,
         revision: Revision,
     ) -> Result<Manifest, RevlogError> {
-        let changelog = self.changelog()?;
-        let manifest = self.manifestlog()?;
-        let changelog_entry = changelog.get_rev(revision)?;
-        let manifest_node =
-            Node::from_hex_for_repo(&changelog_entry.manifest_node()?)?;
-        manifest.get_node(manifest_node.into())
+        self.manifestlog()?.get_node(
+            self.changelog()?.get_rev(revision)?.manifest_node()?.into(),
+        )
     }
 
     pub fn filelog(&self, path: &HgPath) -> Result<Filelog, HgError> {
diff --git a/rust/hg-core/src/operations/list_tracked_files.rs b/rust/hg-core/src/operations/list_tracked_files.rs
--- a/rust/hg-core/src/operations/list_tracked_files.rs
+++ b/rust/hg-core/src/operations/list_tracked_files.rs
@@ -70,7 +70,7 @@
     revset: &str,
 ) -> Result<FilesForRev, RevlogError> {
     let rev = crate::revset::resolve_single(revset, repo)?;
-    Ok(FilesForRev(repo.manifest(rev)?))
+    Ok(FilesForRev(repo.manifest_for_rev(rev)?))
 }
 
 pub struct FilesForRev(Manifest);
diff --git a/rust/hg-core/src/operations/cat.rs b/rust/hg-core/src/operations/cat.rs
--- a/rust/hg-core/src/operations/cat.rs
+++ b/rust/hg-core/src/operations/cat.rs
@@ -34,7 +34,7 @@
     files: &'a [HgPathBuf],
 ) -> Result<CatOutput, RevlogError> {
     let rev = crate::revset::resolve_single(revset, repo)?;
-    let manifest = repo.manifest(rev)?;
+    let manifest = repo.manifest_for_rev(rev)?;
     let node = *repo
         .changelog()?
         .node_from_rev(rev)



To: SimonSapin, #hg-reviewers, Alphare
Cc: mercurial-patches
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.mercurial-scm.org/pipermail/mercurial-patches/attachments/20210914/d5c2404d/attachment-0002.html>


More information about the Mercurial-patches mailing list