[Request] [+-- ] D8861: hg-core: make parse_dirstate return references rather than update hashmaps

acezar (Antoine Cezar) phabricator at mercurial-scm.org
Mon Aug 3 14:26:43 UTC 2020


acezar created this revision.
Herald added a reviewer: hg-reviewers.
Herald added a subscriber: mercurial-patches.

REVISION SUMMARY
  Returing a vec is faster than updating a hashmap when the hashmap is not needed
  like in `hg files` which just list tracked files.
  
  Returning references avoid copying data when not needed improving performence
  for large repositories.

REPOSITORY
  rHG Mercurial

BRANCH
  default

REVISION DETAIL
  https://phab.mercurial-scm.org/D8861

AFFECTED FILES
  rust/hg-core/src/dirstate/dirstate_map.rs
  rust/hg-core/src/dirstate/parsers.rs
  rust/hg-cpython/src/parsers.rs

CHANGE DETAILS

diff --git a/rust/hg-cpython/src/parsers.rs b/rust/hg-cpython/src/parsers.rs
--- a/rust/hg-cpython/src/parsers.rs
+++ b/rust/hg-cpython/src/parsers.rs
@@ -14,7 +14,7 @@
     PythonObject, ToPyObject,
 };
 use hg::{
-    pack_dirstate, parse_dirstate, utils::hg_path::HgPathBuf,
+    pack_dirstate, parse_dirstate, utils::hg_path::HgPathBuf, DirstateEntry,
     DirstatePackError, DirstateParents, DirstateParseError, FastHashMap,
     PARENT_SIZE,
 };
@@ -29,11 +29,23 @@
     copymap: PyDict,
     st: PyBytes,
 ) -> PyResult<PyTuple> {
-    let mut dirstate_map = FastHashMap::default();
-    let mut copies = FastHashMap::default();
+    match parse_dirstate(st.data(py)) {
+        Ok((parents, entries, copies)) => {
+            let mut dirstate_map: FastHashMap<HgPathBuf, DirstateEntry> =
+                FastHashMap::default();
+            let mut copy_map: FastHashMap<HgPathBuf, HgPathBuf> =
+                FastHashMap::default();
+            dirstate_map.extend(
+                entries
+                    .into_iter()
+                    .map(|(path, entry)| (path.to_owned(), entry)),
+            );
+            copy_map.extend(
+                copies
+                    .into_iter()
+                    .map(|(path, copy)| (path.to_owned(), copy.to_owned())),
+            );
 
-    match parse_dirstate(&mut dirstate_map, &mut copies, st.data(py)) {
-        Ok(parents) => {
             for (filename, entry) in &dirstate_map {
                 dmap.set_item(
                     py,
@@ -41,7 +53,7 @@
                     make_dirstate_tuple(py, entry)?,
                 )?;
             }
-            for (path, copy_path) in copies {
+            for (path, copy_path) in copy_map {
                 copymap.set_item(
                     py,
                     PyBytes::new(py, path.as_bytes()),
diff --git a/rust/hg-core/src/dirstate/parsers.rs b/rust/hg-core/src/dirstate/parsers.rs
--- a/rust/hg-core/src/dirstate/parsers.rs
+++ b/rust/hg-core/src/dirstate/parsers.rs
@@ -19,17 +19,21 @@
 /// Dirstate entries have a static part of 8 + 32 + 32 + 32 + 32 bits.
 const MIN_ENTRY_SIZE: usize = 17;
 
-// TODO parse/pack: is mutate-on-loop better for performance?
+type ParseResult<'a> = (
+    DirstateParents,
+    Vec<(&'a HgPath, DirstateEntry)>,
+    Vec<(&'a HgPath, &'a HgPath)>,
+);
 
 #[timed]
 pub fn parse_dirstate(
-    state_map: &mut StateMap,
-    copy_map: &mut CopyMap,
     contents: &[u8],
-) -> Result<DirstateParents, DirstateParseError> {
+) -> Result<ParseResult, DirstateParseError> {
     if contents.len() < PARENT_SIZE * 2 {
         return Err(DirstateParseError::TooLittleData);
     }
+    let mut copies = vec![];
+    let mut entries = vec![];
 
     let mut curr_pos = PARENT_SIZE * 2;
     let parents = DirstateParents {
@@ -63,24 +67,21 @@
         };
 
         if let Some(copy_path) = copy {
-            copy_map.insert(
-                HgPath::new(path).to_owned(),
-                HgPath::new(copy_path).to_owned(),
-            );
+            copies.push((HgPath::new(path), HgPath::new(copy_path)));
         };
-        state_map.insert(
-            HgPath::new(path).to_owned(),
+        entries.push((
+            HgPath::new(path),
             DirstateEntry {
                 state,
                 mode,
                 size,
                 mtime,
             },
-        );
+        ));
         curr_pos = curr_pos + MIN_ENTRY_SIZE + (path_len);
     }
 
-    Ok(parents)
+    Ok((parents, entries, copies))
 }
 
 /// `now` is the duration in seconds since the Unix epoch
@@ -287,12 +288,19 @@
 
         let mut new_state_map: StateMap = FastHashMap::default();
         let mut new_copy_map: CopyMap = FastHashMap::default();
-        let new_parents = parse_dirstate(
-            &mut new_state_map,
-            &mut new_copy_map,
-            result.as_slice(),
-        )
-        .unwrap();
+        let (new_parents, entries, copies) =
+            parse_dirstate(result.as_slice()).unwrap();
+        new_state_map.extend(
+            entries
+                .into_iter()
+                .map(|(path, entry)| (path.to_owned(), entry)),
+        );
+        new_copy_map.extend(
+            copies
+                .into_iter()
+                .map(|(path, copy)| (path.to_owned(), copy.to_owned())),
+        );
+
         assert_eq!(
             (parents, state_map, copymap),
             (new_parents, new_state_map, new_copy_map)
@@ -362,12 +370,19 @@
 
         let mut new_state_map: StateMap = FastHashMap::default();
         let mut new_copy_map: CopyMap = FastHashMap::default();
-        let new_parents = parse_dirstate(
-            &mut new_state_map,
-            &mut new_copy_map,
-            result.as_slice(),
-        )
-        .unwrap();
+        let (new_parents, entries, copies) =
+            parse_dirstate(result.as_slice()).unwrap();
+        new_state_map.extend(
+            entries
+                .into_iter()
+                .map(|(path, entry)| (path.to_owned(), entry)),
+        );
+        new_copy_map.extend(
+            copies
+                .into_iter()
+                .map(|(path, copy)| (path.to_owned(), copy.to_owned())),
+        );
+
         assert_eq!(
             (parents, state_map, copymap),
             (new_parents, new_state_map, new_copy_map)
@@ -405,12 +420,18 @@
 
         let mut new_state_map: StateMap = FastHashMap::default();
         let mut new_copy_map: CopyMap = FastHashMap::default();
-        let new_parents = parse_dirstate(
-            &mut new_state_map,
-            &mut new_copy_map,
-            result.as_slice(),
-        )
-        .unwrap();
+        let (new_parents, entries, copies) =
+            parse_dirstate(result.as_slice()).unwrap();
+        new_state_map.extend(
+            entries
+                .into_iter()
+                .map(|(path, entry)| (path.to_owned(), entry)),
+        );
+        new_copy_map.extend(
+            copies
+                .into_iter()
+                .map(|(path, copy)| (path.to_owned(), copy.to_owned())),
+        );
 
         assert_eq!(
             (
diff --git a/rust/hg-core/src/dirstate/dirstate_map.rs b/rust/hg-core/src/dirstate/dirstate_map.rs
--- a/rust/hg-core/src/dirstate/dirstate_map.rs
+++ b/rust/hg-core/src/dirstate/dirstate_map.rs
@@ -364,11 +364,17 @@
             return Ok(None);
         }
 
-        let parents = parse_dirstate(
-            &mut self.state_map,
-            &mut self.copy_map,
-            file_contents,
-        )?;
+        let (parents, entries, copies) = parse_dirstate(file_contents)?;
+        self.state_map.extend(
+            entries
+                .into_iter()
+                .map(|(path, entry)| (path.to_owned(), entry)),
+        );
+        self.copy_map.extend(
+            copies
+                .into_iter()
+                .map(|(path, copy)| (path.to_owned(), copy.to_owned())),
+        );
 
         if !self.dirty_parents {
             self.set_parents(&parents);



To: acezar, #hg-reviewers
Cc: mercurial-patches, mercurial-devel
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mercurial-scm.org/pipermail/mercurial-patches/attachments/20200803/f6513f6e/attachment-0001.html>


More information about the Mercurial-patches mailing list