[Commented On] D9493: copies-rust: tokenize all paths into integer

baymax (Baymax, Your Personal Patch-care Companion) phabricator at mercurial-scm.org
Mon Dec 14 12:59:44 UTC 2020


baymax added a comment.
baymax updated this revision to Diff 24245.


  ✅ refresh by Heptapod after a successful CI run (🐙 💚)

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D9493?vs=24216&id=24245

BRANCH
  default

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D9493/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D9493

AFFECTED FILES
  rust/hg-core/src/copy_tracing.rs

CHANGE DETAILS

diff --git a/rust/hg-core/src/copy_tracing.rs b/rust/hg-core/src/copy_tracing.rs
--- a/rust/hg-core/src/copy_tracing.rs
+++ b/rust/hg-core/src/copy_tracing.rs
@@ -12,9 +12,9 @@
 
 pub type PathCopies = HashMap<HgPathBuf, HgPathBuf>;
 
-type PathToken = HgPathBuf;
+type PathToken = usize;
 
-#[derive(Clone, Debug, PartialEq)]
+#[derive(Clone, Debug, PartialEq, Copy)]
 struct TimeStampedPathCopy {
     /// revision at which the copy information was added
     rev: Revision,
@@ -314,6 +314,37 @@
     SecondParent,
 }
 
+/// A small "tokenizer" responsible of turning full HgPath into lighter
+/// PathToken
+///
+/// Dealing with small object, like integer is much faster, so HgPath input are
+/// turned into integer "PathToken" and converted back in the end.
+#[derive(Clone, Debug, Default)]
+struct TwoWayPathMap {
+    token: HashMap<HgPathBuf, PathToken>,
+    path: Vec<HgPathBuf>,
+}
+
+impl TwoWayPathMap {
+    fn tokenize(&mut self, path: &HgPath) -> PathToken {
+        match self.token.get(path) {
+            Some(a) => *a,
+            None => {
+                let a = self.token.len();
+                let buf = path.to_owned();
+                self.path.push(buf.clone());
+                self.token.insert(buf, a);
+                a
+            }
+        }
+    }
+
+    fn untokenize(&self, token: PathToken) -> &HgPathBuf {
+        assert!(token < self.path.len(), format!("Unknown token: {}", token));
+        &self.path[token]
+    }
+}
+
 /// Same as mercurial.copies._combine_changeset_copies, but in Rust.
 ///
 /// Arguments are:
@@ -337,6 +368,8 @@
     let mut all_copies = HashMap::new();
     let mut oracle = AncestorOracle::new(is_ancestor);
 
+    let mut path_map = TwoWayPathMap::default();
+
     for rev in revs {
         let mut d: DataHolder<D> = DataHolder { data: None };
         let (p1, p2, changes) = rev_info(rev, &mut d);
@@ -355,6 +388,7 @@
             if let Some(parent_copies) = parent_copies {
                 // combine it with data for that revision
                 let vertex_copies = add_from_changes(
+                    &mut path_map,
                     &parent_copies,
                     &changes,
                     Parent::FirstParent,
@@ -374,6 +408,7 @@
             if let Some(parent_copies) = parent_copies {
                 // combine it with data for that revision
                 let vertex_copies = add_from_changes(
+                    &mut path_map,
                     &parent_copies,
                     &changes,
                     Parent::SecondParent,
@@ -388,6 +423,7 @@
                     // If we got data from both parents, We need to combine
                     // them.
                     Some(copies) => Some(merge_copies_dict(
+                        &path_map,
                         vertex_copies,
                         copies,
                         &changes,
@@ -412,7 +448,9 @@
     let mut result = PathCopies::default();
     for (dest, tt_source) in tt_result {
         if let Some(path) = tt_source.path {
-            result.insert(dest, path);
+            let path_dest = path_map.untokenize(dest).to_owned();
+            let path_path = path_map.untokenize(path).to_owned();
+            result.insert(path_dest, path_path);
         }
     }
     result
@@ -447,6 +485,7 @@
 /// Combine ChangedFiles with some existing PathCopies information and return
 /// the result
 fn add_from_changes(
+    path_map: &mut TwoWayPathMap,
     base_copies: &TimeStampedPathCopies,
     changes: &ChangedFiles,
     parent: Parent,
@@ -455,9 +494,11 @@
     let mut copies = base_copies.clone();
     for action in changes.iter_actions(parent) {
         match action {
-            Action::Copied(dest, source) => {
+            Action::Copied(path_dest, path_source) => {
+                let dest = path_map.tokenize(path_dest);
+                let source = path_map.tokenize(path_source);
                 let entry;
-                if let Some(v) = base_copies.get(source) {
+                if let Some(v) = base_copies.get(&source) {
                     entry = match &v.path {
                         Some(path) => Some((*(path)).to_owned()),
                         None => Some(source.to_owned()),
@@ -475,18 +516,19 @@
                 };
                 copies.insert(dest.to_owned(), ttpc);
             }
-            Action::Removed(f) => {
+            Action::Removed(deleted_path) => {
                 // We must drop copy information for removed file.
                 //
                 // We need to explicitly record them as dropped to
                 // propagate this information when merging two
                 // TimeStampedPathCopies object.
-                if copies.contains_key(f.as_ref()) {
+                let deleted = path_map.tokenize(deleted_path);
+                if copies.contains_key(&deleted) {
                     let ttpc = TimeStampedPathCopy {
                         rev: current_rev,
                         path: None,
                     };
-                    copies.insert(f.to_owned(), ttpc);
+                    copies.insert(deleted, ttpc);
                 }
             }
         }
@@ -499,6 +541,7 @@
 /// In case of conflict, value from "major" will be picked, unless in some
 /// cases. See inline documentation for details.
 fn merge_copies_dict<A: Fn(Revision, Revision) -> bool>(
+    path_map: &TwoWayPathMap,
     mut minor: TimeStampedPathCopies,
     mut major: TimeStampedPathCopies,
     changes: &ChangedFiles,
@@ -511,7 +554,9 @@
         |dest: &PathToken,
          src_minor: &TimeStampedPathCopy,
          src_major: &TimeStampedPathCopy| {
-            compare_value(changes, oracle, dest, src_minor, src_major)
+            compare_value(
+                path_map, changes, oracle, dest, src_minor, src_major,
+            )
         };
     if minor.is_empty() {
         major
@@ -641,6 +686,7 @@
 /// decide which side prevails in case of conflicting values
 #[allow(clippy::if_same_then_else)]
 fn compare_value<A: Fn(Revision, Revision) -> bool>(
+    path_map: &TwoWayPathMap,
     changes: &ChangedFiles,
     oracle: &mut AncestorOracle<A>,
     dest: &PathToken,
@@ -664,7 +710,8 @@
             "conflict information from p1 and p2 in the same revision"
         );
     } else {
-        let action = changes.get_merge_case(&dest);
+        let dest_path = path_map.untokenize(*dest);
+        let action = changes.get_merge_case(dest_path);
         if src_major.path.is_none() && action == MergeCase::Salvaged {
             // If the file is "deleted" in the major side but was
             // salvaged by the merge, we keep the minor side alive



To: marmoute, #hg-reviewers
Cc: Alphare, SimonSapin, mercurial-patches
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.mercurial-scm.org/pipermail/mercurial-patches/attachments/20201214/ead0779b/attachment-0002.html>


More information about the Mercurial-patches mailing list