[Updated] D10486: dirstate-tree: Serialize to disk

SimonSapin phabricator at mercurial-scm.org
Thu May 6 16:52:28 UTC 2021


Closed by commit rHG1ae8c997b5ab: dirstate-tree: Serialize to disk (authored by SimonSapin).
SimonSapin marked an inline comment as done.
This revision was automatically updated to reflect the committed changes.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D10486?vs=27136&id=27645

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D10486/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D10486

AFFECTED FILES
  rust/Cargo.lock
  rust/hg-core/Cargo.toml
  rust/hg-core/src/dirstate/parsers.rs
  rust/hg-core/src/dirstate_tree/dirstate_map.rs

CHANGE DETAILS

diff --git a/rust/hg-core/src/dirstate_tree/dirstate_map.rs b/rust/hg-core/src/dirstate_tree/dirstate_map.rs
--- a/rust/hg-core/src/dirstate_tree/dirstate_map.rs
+++ b/rust/hg-core/src/dirstate_tree/dirstate_map.rs
@@ -1,11 +1,14 @@
-use std::collections::BTreeMap;
+use bytes_cast::BytesCast;
 use std::path::PathBuf;
+use std::{collections::BTreeMap, convert::TryInto};
 
 use super::path_with_basename::WithBasename;
+use crate::dirstate::parsers::clear_ambiguous_mtime;
+use crate::dirstate::parsers::pack_entry;
+use crate::dirstate::parsers::packed_entry_size;
 use crate::dirstate::parsers::parse_dirstate_entries;
 use crate::dirstate::parsers::parse_dirstate_parents;
 use crate::dirstate::parsers::Timestamp;
-
 use crate::matchers::Matcher;
 use crate::revlog::node::NULL_NODE;
 use crate::utils::hg_path::{HgPath, HgPathBuf};
@@ -327,11 +330,38 @@
 
     fn pack(
         &mut self,
-        _parents: DirstateParents,
-        _now: Timestamp,
+        parents: DirstateParents,
+        now: Timestamp,
     ) -> Result<Vec<u8>, DirstateError> {
-        let _ = self.iter_node_data_mut();
-        todo!()
+        // Optizimation (to be measured?): pre-compute size to avoid `Vec`
+        // reallocations
+        let mut size = parents.as_bytes().len();
+        for (path, node) in self.iter_nodes() {
+            if node.entry.is_some() {
+                size += packed_entry_size(
+                    path.full_path(),
+                    node.copy_source.as_ref(),
+                )
+            }
+        }
+
+        let mut packed = Vec::with_capacity(size);
+        packed.extend(parents.as_bytes());
+
+        let now: i32 = now.0.try_into().expect("time overflow");
+        for (path, opt_entry, copy_source) in self.iter_node_data_mut() {
+            if let Some(entry) = opt_entry {
+                clear_ambiguous_mtime(entry, now);
+                pack_entry(
+                    path.full_path(),
+                    entry,
+                    copy_source.as_ref(),
+                    &mut packed,
+                );
+            }
+        }
+        self.dirty_parents = false;
+        Ok(packed)
     }
 
     fn build_file_fold_map(&mut self) -> &FastHashMap<HgPathBuf, HgPathBuf> {
diff --git a/rust/hg-core/src/dirstate/parsers.rs b/rust/hg-core/src/dirstate/parsers.rs
--- a/rust/hg-core/src/dirstate/parsers.rs
+++ b/rust/hg-core/src/dirstate/parsers.rs
@@ -4,7 +4,7 @@
 // GNU General Public License version 2 or any later version.
 
 use crate::errors::HgError;
-use crate::utils::hg_path::HgPath;
+use crate::utils::hg_path::{HgPath, HgPathBuf};
 use crate::{
     dirstate::{CopyMap, EntryState, RawEntry, StateMap},
     DirstateEntry, DirstateParents,
@@ -82,9 +82,71 @@
     Ok(parents)
 }
 
+fn packed_filename_and_copy_source_size(
+    filename: &HgPathBuf,
+    copy_source: Option<&HgPathBuf>,
+) -> usize {
+    filename.len()
+        + if let Some(source) = copy_source {
+            b"\0".len() + source.len()
+        } else {
+            0
+        }
+}
+
+pub fn packed_entry_size(
+    filename: &HgPathBuf,
+    copy_source: Option<&HgPathBuf>,
+) -> usize {
+    MIN_ENTRY_SIZE
+        + packed_filename_and_copy_source_size(filename, copy_source)
+}
+
+pub fn pack_entry(
+    filename: &HgPathBuf,
+    entry: &DirstateEntry,
+    copy_source: Option<&HgPathBuf>,
+    packed: &mut Vec<u8>,
+) {
+    let length = packed_filename_and_copy_source_size(filename, copy_source);
+
+    // Unwrapping because `impl std::io::Write for Vec<u8>` never errors
+    packed.write_u8(entry.state.into()).unwrap();
+    packed.write_i32::<BigEndian>(entry.mode).unwrap();
+    packed.write_i32::<BigEndian>(entry.size).unwrap();
+    packed.write_i32::<BigEndian>(entry.mtime).unwrap();
+    packed.write_i32::<BigEndian>(length as i32).unwrap();
+    packed.extend(filename.as_bytes());
+    if let Some(source) = copy_source {
+        packed.push(b'\0');
+        packed.extend(source.as_bytes());
+    }
+}
+
 /// Seconds since the Unix epoch
 pub struct Timestamp(pub u64);
 
+pub fn clear_ambiguous_mtime(
+    entry: &mut DirstateEntry,
+    mtime_now: i32,
+) -> bool {
+    let ambiguous =
+        entry.state == EntryState::Normal && entry.mtime == mtime_now;
+    if ambiguous {
+        // The file was last modified "simultaneously" with the current
+        // write to dirstate (i.e. within the same second for file-
+        // systems with a granularity of 1 sec). This commonly happens
+        // for at least a couple of files on 'update'.
+        // The user could change the file without changing its size
+        // within the same second. Invalidate the file's mtime in
+        // dirstate, forcing future 'status' calls to compare the
+        // contents of the file if the size is the same. This prevents
+        // mistakenly treating such files as clean.
+        entry.mtime = -1;
+    }
+    ambiguous
+}
+
 pub fn pack_dirstate(
     state_map: &mut StateMap,
     copy_map: &CopyMap,
@@ -97,11 +159,7 @@
     let expected_size: usize = state_map
         .iter()
         .map(|(filename, _)| {
-            let mut length = MIN_ENTRY_SIZE + filename.len();
-            if let Some(copy) = copy_map.get(filename) {
-                length += copy.len() + 1;
-            }
-            length
+            packed_entry_size(filename, copy_map.get(filename))
         })
         .sum();
     let expected_size = expected_size + PARENT_SIZE * 2;
@@ -112,39 +170,8 @@
     packed.extend(parents.p2.as_bytes());
 
     for (filename, entry) in state_map.iter_mut() {
-        let new_filename = filename.to_owned();
-        let mut new_mtime: i32 = entry.mtime;
-        if entry.state == EntryState::Normal && entry.mtime == now {
-            // The file was last modified "simultaneously" with the current
-            // write to dirstate (i.e. within the same second for file-
-            // systems with a granularity of 1 sec). This commonly happens
-            // for at least a couple of files on 'update'.
-            // The user could change the file without changing its size
-            // within the same second. Invalidate the file's mtime in
-            // dirstate, forcing future 'status' calls to compare the
-            // contents of the file if the size is the same. This prevents
-            // mistakenly treating such files as clean.
-            new_mtime = -1;
-            *entry = DirstateEntry {
-                mtime: new_mtime,
-                ..*entry
-            };
-        }
-        let mut new_filename = new_filename.into_vec();
-        if let Some(copy) = copy_map.get(filename) {
-            new_filename.push(b'\0');
-            new_filename.extend(copy.bytes());
-        }
-
-        // Unwrapping because `impl std::io::Write for Vec<u8>` never errors
-        packed.write_u8(entry.state.into()).unwrap();
-        packed.write_i32::<BigEndian>(entry.mode).unwrap();
-        packed.write_i32::<BigEndian>(entry.size).unwrap();
-        packed.write_i32::<BigEndian>(new_mtime).unwrap();
-        packed
-            .write_i32::<BigEndian>(new_filename.len() as i32)
-            .unwrap();
-        packed.extend(new_filename)
+        clear_ambiguous_mtime(entry, now);
+        pack_entry(filename, entry, copy_map.get(filename), &mut packed)
     }
 
     if packed.len() != expected_size {
diff --git a/rust/hg-core/Cargo.toml b/rust/hg-core/Cargo.toml
--- a/rust/hg-core/Cargo.toml
+++ b/rust/hg-core/Cargo.toml
@@ -9,7 +9,7 @@
 name = "hg"
 
 [dependencies]
-bytes-cast = "0.1"
+bytes-cast = "0.2"
 byteorder = "1.3.4"
 derive_more = "0.99"
 home = "0.5"
diff --git a/rust/Cargo.lock b/rust/Cargo.lock
--- a/rust/Cargo.lock
+++ b/rust/Cargo.lock
@@ -64,9 +64,9 @@
 
 [[package]]
 name = "bytes-cast"
-version = "0.1.0"
+version = "0.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3196ba300c7bc9282a4331e878496cb3e9603a898a8f1446601317163e16ca52"
+checksum = "0d434f9a4ecbe987e7ccfda7274b6f82ea52c9b63742565a65cb5e8ba0f2c452"
 dependencies = [
  "bytes-cast-derive",
 ]



To: SimonSapin, #hg-reviewers, Alphare
Cc: Alphare, mercurial-patches
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.mercurial-scm.org/pipermail/mercurial-patches/attachments/20210506/99dd233c/attachment-0002.html>


More information about the Mercurial-patches mailing list