D11411: rhg: Reuse manifest when checking status of multiple ambiguous files

SimonSapin phabricator at mercurial-scm.org
Mon Sep 13 18:15:33 UTC 2021


SimonSapin created this revision.
Herald added a reviewer: hg-reviewers.
Herald added a subscriber: mercurial-patches.

REVISION SUMMARY
  When `rhg status` cannot determine whether a file is clean based on mtime and
  size alone, it needs to compare its contents with those found in the parent
  commit. Previously, rhg would find the (same) manifest of that commit again
  for every such file. This is lifted out of the loop and reused.

REPOSITORY
  rHG Mercurial

BRANCH
  default

REVISION DETAIL
  https://phab.mercurial-scm.org/D11411

AFFECTED FILES
  rust/hg-core/src/operations/cat.rs
  rust/hg-core/src/operations/list_tracked_files.rs
  rust/hg-core/src/repo.rs
  rust/hg-core/src/revlog/changelog.rs
  rust/hg-core/src/revlog/manifest.rs
  rust/rhg/src/commands/status.rs

CHANGE DETAILS

diff --git a/rust/rhg/src/commands/status.rs b/rust/rhg/src/commands/status.rs
--- a/rust/rhg/src/commands/status.rs
+++ b/rust/rhg/src/commands/status.rs
@@ -10,13 +10,11 @@
 use clap::{Arg, SubCommand};
 use hg;
 use hg::dirstate_tree::dispatch::DirstateMapMethods;
-use hg::errors::IoResultExt;
+use hg::errors::{HgError, IoResultExt};
+use hg::manifest::Manifest;
 use hg::matchers::AlwaysMatcher;
-use hg::operations::cat;
 use hg::repo::Repo;
-use hg::revlog::node::Node;
 use hg::utils::hg_path::{hg_path_to_os_string, HgPath};
-use hg::StatusError;
 use hg::{HgPathCow, StatusOptions};
 use log::{info, warn};
 use std::convert::TryInto;
@@ -203,10 +201,12 @@
     if !ds_status.unsure.is_empty()
         && (display_states.modified || display_states.clean)
     {
-        let p1: Node = repo.dirstate_parents()?.p1.into();
-        let p1_hex = format!("{:x}", p1);
+        let p1 = repo.dirstate_parents()?.p1;
+        let manifest = repo.manifest_for_node(p1).map_err(|e| {
+            CommandError::from((e, &*format!("{:x}", p1.short())))
+        })?;
         for to_check in ds_status.unsure {
-            if cat_file_is_modified(repo, &to_check, &p1_hex)? {
+            if cat_file_is_modified(repo, &manifest, &to_check)? {
                 if display_states.modified {
                     ds_status.modified.push(to_check);
                 }
@@ -267,21 +267,22 @@
 /// TODO: detect permission bits and similar metadata modifications
 fn cat_file_is_modified(
     repo: &Repo,
+    manifest: &Manifest,
     hg_path: &HgPath,
-    rev: &str,
-) -> Result<bool, CommandError> {
-    // TODO CatRev expects &[HgPathBuf], something like
-    // &[impl Deref<HgPath>] would be nicer and should avoid the copy
-    let path_bufs = [hg_path.into()];
-    // TODO IIUC CatRev returns a simple Vec<u8> for all files
-    //      being able to tell them apart as (path, bytes) would be nicer
-    //      and OPTIM would allow manifest resolution just once.
-    let output = cat(repo, rev, &path_bufs).map_err(|e| (e, rev))?;
+) -> Result<bool, HgError> {
+    let file_node = manifest
+        .find_file(hg_path)?
+        .expect("ambgious file not in p1");
+    let filelog = repo.filelog(hg_path)?;
+    let filelog_entry = filelog.get_node(file_node).map_err(|_| {
+        HgError::corrupted("filelog missing node from manifest")
+    })?;
+    let contents_in_p1 = filelog_entry.data()?;
 
     let fs_path = repo
         .working_directory_vfs()
         .join(hg_path_to_os_string(hg_path).expect("HgPath conversion"));
-    let hg_data_len: u64 = match output.concatenated.len().try_into() {
+    let hg_data_len: u64 = match contents_in_p1.len().try_into() {
         Ok(v) => v,
         Err(_) => {
             // conversion of data length to u64 failed,
@@ -290,14 +291,12 @@
         }
     };
     let fobj = fs::File::open(&fs_path).when_reading_file(&fs_path)?;
-    if fobj.metadata().map_err(|e| StatusError::from(e))?.len() != hg_data_len
-    {
+    if fobj.metadata().when_reading_file(&fs_path)?.len() != hg_data_len {
         return Ok(true);
     }
-    for (fs_byte, hg_byte) in
-        BufReader::new(fobj).bytes().zip(output.concatenated)
+    for (fs_byte, &hg_byte) in BufReader::new(fobj).bytes().zip(contents_in_p1)
     {
-        if fs_byte.map_err(|e| StatusError::from(e))? != hg_byte {
+        if fs_byte.when_reading_file(&fs_path)? != hg_byte {
             return Ok(true);
         }
     }
diff --git a/rust/hg-core/src/revlog/manifest.rs b/rust/hg-core/src/revlog/manifest.rs
--- a/rust/hg-core/src/revlog/manifest.rs
+++ b/rust/hg-core/src/revlog/manifest.rs
@@ -1,8 +1,8 @@
 use crate::errors::HgError;
 use crate::repo::Repo;
 use crate::revlog::revlog::{Revlog, RevlogError};
-use crate::revlog::NodePrefix;
 use crate::revlog::Revision;
+use crate::revlog::{Node, NodePrefix};
 use crate::utils::hg_path::HgPath;
 
 /// A specialized `Revlog` to work with `manifest` data format.
@@ -68,4 +68,17 @@
             (HgPath::new(&line[..pos]), &line[hash_start..hash_end])
         })
     }
+
+    /// If the given path is in this manifest, return its filelog node ID
+    pub fn find_file(&self, path: &HgPath) -> Result<Option<Node>, HgError> {
+        // TODO: use binary search instead of linear scan. This may involve
+        // building (and caching) an index of the byte indicex of each manifest
+        // line.
+        for (manifest_path, node) in self.files_with_nodes() {
+            if manifest_path == path {
+                return Ok(Some(Node::from_hex_for_repo(node)?));
+            }
+        }
+        Ok(None)
+    }
 }
diff --git a/rust/hg-core/src/revlog/changelog.rs b/rust/hg-core/src/revlog/changelog.rs
--- a/rust/hg-core/src/revlog/changelog.rs
+++ b/rust/hg-core/src/revlog/changelog.rs
@@ -57,9 +57,11 @@
 
     /// Return the node id of the `manifest` referenced by this `changelog`
     /// entry.
-    pub fn manifest_node(&self) -> Result<&[u8], RevlogError> {
-        self.lines()
-            .next()
-            .ok_or_else(|| HgError::corrupted("empty changelog entry").into())
+    pub fn manifest_node(&self) -> Result<Node, HgError> {
+        Node::from_hex_for_repo(
+            self.lines()
+                .next()
+                .ok_or_else(|| HgError::corrupted("empty changelog entry"))?,
+        )
     }
 }
diff --git a/rust/hg-core/src/repo.rs b/rust/hg-core/src/repo.rs
--- a/rust/hg-core/src/repo.rs
+++ b/rust/hg-core/src/repo.rs
@@ -5,15 +5,15 @@
 use crate::dirstate_tree::owning::OwningDirstateMap;
 use crate::errors::HgError;
 use crate::errors::HgResultExt;
+use crate::exit_codes;
 use crate::manifest::{Manifest, Manifestlog};
-use crate::requirements;
 use crate::revlog::filelog::Filelog;
 use crate::revlog::revlog::RevlogError;
 use crate::utils::files::get_path_from_bytes;
 use crate::utils::hg_path::HgPath;
 use crate::utils::SliceExt;
 use crate::vfs::{is_dir, is_file, Vfs};
-use crate::{exit_codes, Node};
+use crate::{requirements, NodePrefix};
 use crate::{DirstateError, Revision};
 use std::cell::{Cell, Ref, RefCell, RefMut};
 use std::collections::HashSet;
@@ -336,17 +336,27 @@
         self.manifestlog.get_mut_or_init(self)
     }
 
+    /// Returns the manifest of the given node ID
+    pub fn manifest_for_node(
+        &self,
+        node: impl Into<NodePrefix>,
+    ) -> Result<Manifest, RevlogError> {
+        self.manifestlog()?.get_node(
+            self.changelog()?
+                .get_node(node.into())?
+                .manifest_node()?
+                .into(),
+        )
+    }
+
     /// Returns the manifest of the given revision
-    pub fn manifest(
+    pub fn manifest_for_rev(
         &self,
         revision: Revision,
     ) -> Result<Manifest, RevlogError> {
-        let changelog = self.changelog()?;
-        let manifest = self.manifestlog()?;
-        let changelog_entry = changelog.get_rev(revision)?;
-        let manifest_node =
-            Node::from_hex_for_repo(&changelog_entry.manifest_node()?)?;
-        manifest.get_node(manifest_node.into())
+        self.manifestlog()?.get_node(
+            self.changelog()?.get_rev(revision)?.manifest_node()?.into(),
+        )
     }
 
     pub fn filelog(&self, path: &HgPath) -> Result<Filelog, HgError> {
diff --git a/rust/hg-core/src/operations/list_tracked_files.rs b/rust/hg-core/src/operations/list_tracked_files.rs
--- a/rust/hg-core/src/operations/list_tracked_files.rs
+++ b/rust/hg-core/src/operations/list_tracked_files.rs
@@ -70,7 +70,7 @@
     revset: &str,
 ) -> Result<FilesForRev, RevlogError> {
     let rev = crate::revset::resolve_single(revset, repo)?;
-    Ok(FilesForRev(repo.manifest(rev)?))
+    Ok(FilesForRev(repo.manifest_for_rev(rev)?))
 }
 
 pub struct FilesForRev(Manifest);
diff --git a/rust/hg-core/src/operations/cat.rs b/rust/hg-core/src/operations/cat.rs
--- a/rust/hg-core/src/operations/cat.rs
+++ b/rust/hg-core/src/operations/cat.rs
@@ -34,7 +34,7 @@
     files: &'a [HgPathBuf],
 ) -> Result<CatOutput, RevlogError> {
     let rev = crate::revset::resolve_single(revset, repo)?;
-    let manifest = repo.manifest(rev)?;
+    let manifest = repo.manifest_for_rev(rev)?;
     let node = *repo
         .changelog()?
         .node_from_rev(rev)



To: SimonSapin, #hg-reviewers
Cc: mercurial-patches, mercurial-devel


More information about the Mercurial-devel mailing list