[Request] [++- ] D11839: rhg: Add Repo::write_dirstate

SimonSapin phabricator at mercurial-scm.org
Thu Dec 2 16:49:40 UTC 2021


SimonSapin created this revision.
Herald added a reviewer: hg-reviewers.
Herald added a subscriber: mercurial-patches.

REVISION SUMMARY
  This method is not used yet. It saves to disk any mutation that was done to
  the `Repo`’s dirstate through `Repo::dirstate_map_mut`. It takes care of
  dirstate-v1 v.s. dirstate-v2, dockets, data files, appending when possible,
  etc.

REPOSITORY
  rHG Mercurial

BRANCH
  default

REVISION DETAIL
  https://phab.mercurial-scm.org/D11839

AFFECTED FILES
  rust/hg-core/src/dirstate_tree/dirstate_map.rs
  rust/hg-core/src/dirstate_tree/on_disk.rs
  rust/hg-core/src/repo.rs
  rust/hg-core/src/revlog/node.rs
  rust/hg-cpython/src/dirstate/dirstate_map.rs

CHANGE DETAILS

diff --git a/rust/hg-cpython/src/dirstate/dirstate_map.rs b/rust/hg-cpython/src/dirstate/dirstate_map.rs
--- a/rust/hg-cpython/src/dirstate/dirstate_map.rs
+++ b/rust/hg-cpython/src/dirstate/dirstate_map.rs
@@ -222,7 +222,7 @@
         match result {
             Ok((packed, tree_metadata, append)) => {
                 let packed = PyBytes::new(py, &packed);
-                let tree_metadata = PyBytes::new(py, &tree_metadata);
+                let tree_metadata = PyBytes::new(py, tree_metadata.as_bytes());
                 let tuple = (packed, tree_metadata, append);
                 Ok(tuple.to_py_object(py).into_object())
             },
diff --git a/rust/hg-core/src/revlog/node.rs b/rust/hg-core/src/revlog/node.rs
--- a/rust/hg-core/src/revlog/node.rs
+++ b/rust/hg-core/src/revlog/node.rs
@@ -174,6 +174,12 @@
             data: self.data,
         }
     }
+
+    pub fn pad_to_256_bits(&self) -> [u8; 32] {
+        let mut bits = [0; 32];
+        bits[..NODE_BYTES_LENGTH].copy_from_slice(&self.data);
+        bits
+    }
 }
 
 /// The beginning of a binary revision SHA.
diff --git a/rust/hg-core/src/repo.rs b/rust/hg-core/src/repo.rs
--- a/rust/hg-core/src/repo.rs
+++ b/rust/hg-core/src/repo.rs
@@ -2,9 +2,10 @@
 use crate::config::{Config, ConfigError, ConfigParseError};
 use crate::dirstate::DirstateParents;
 use crate::dirstate_tree::dirstate_map::DirstateMap;
+use crate::dirstate_tree::on_disk::Docket as DirstateDocket;
 use crate::dirstate_tree::owning::OwningDirstateMap;
-use crate::errors::HgError;
 use crate::errors::HgResultExt;
+use crate::errors::{HgError, IoResultExt};
 use crate::exit_codes;
 use crate::lock::{try_with_lock_no_wait, LockError};
 use crate::manifest::{Manifest, Manifestlog};
@@ -16,8 +17,13 @@
 use crate::vfs::{is_dir, is_file, Vfs};
 use crate::{requirements, NodePrefix};
 use crate::{DirstateError, Revision};
+use rand::Rng;
 use std::cell::{Ref, RefCell, RefMut};
 use std::collections::HashSet;
+use std::fmt::Write;
+use std::io::Seek;
+use std::io::SeekFrom;
+use std::io::Write as IoWrite;
 use std::path::{Path, PathBuf};
 
 /// A repository on disk
@@ -408,6 +414,79 @@
     pub fn filelog(&self, path: &HgPath) -> Result<Filelog, HgError> {
         Filelog::open(self, path)
     }
+
+    /// Write to disk any updates that were made through `dirstate_map_mut`.
+    ///
+    /// The "wlock" must be held while calling this.
+    /// See for example `try_with_wlock_no_wait`.
+    ///
+    /// TODO: have a `WritableRepo` type only accessible while holding the
+    /// lock?
+    pub fn write_dirstate(&self) -> Result<(), DirstateError> {
+        let map = self.dirstate_map()?;
+        // TODO: Maintain a `DirstateMap::dirty` flag, and return early here if
+        // it’s unset
+        let parents = self.dirstate_parents()?;
+        let packed_dirstate = if self.has_dirstate_v2() {
+            let uuid = self.dirstate_data_file_uuid.get_or_init(self)?;
+            let mut uuid = uuid.as_ref();
+            let can_append = uuid.is_some();
+            let (data, tree_metadata, append) = map.pack_v2(can_append)?;
+            if !append {
+                uuid = None
+            }
+            let uuid = if let Some(uuid) = uuid {
+                std::str::from_utf8(uuid)
+                    .map_err(|_| {
+                        HgError::corrupted("non-UTF-8 dirstate data file ID")
+                    })?
+                    .to_owned()
+            } else {
+                const ID_LENGTH: usize = 8;
+                let mut id = String::with_capacity(ID_LENGTH);
+                let mut rng = rand::thread_rng();
+                for _ in 0..ID_LENGTH {
+                    // One random hexadecimal digit.
+                    // `unwrap` never panics because `impl Write for String`
+                    // never returns an error.
+                    write!(&mut id, "{:x}", rng.gen_range(0, 16)).unwrap();
+                }
+                id
+            };
+            let data_filename = format!("dirstate.{}", uuid);
+            let data_filename = self.hg_vfs().join(data_filename);
+            let mut options = std::fs::OpenOptions::new();
+            if append {
+                options.append(true);
+            } else {
+                options.write(true).create_new(true);
+            }
+            let data_size = (|| {
+                // TODO: loop and try another random ID if !append and this
+                // returns `ErrorKind::AlreadyExists`? Collision chance of two
+                // random IDs is one in 2**32
+                let mut file = options.open(&data_filename)?;
+                file.write_all(&data)?;
+                file.flush()?;
+                // TODO: use https://doc.rust-lang.org/std/io/trait.Seek.html#method.stream_position when we require Rust 1.51+
+                file.seek(SeekFrom::Current(0))
+            })()
+            .when_writing_file(&data_filename)?;
+            DirstateDocket::serialize(
+                parents,
+                tree_metadata,
+                data_size,
+                uuid.as_bytes(),
+            )
+            .map_err(|_: std::num::TryFromIntError| {
+                HgError::corrupted("overflow in dirstate docket serialization")
+            })?
+        } else {
+            map.pack_v1(parents)?
+        };
+        self.hg_vfs().atomic_write("dirstate", &packed_dirstate)?;
+        Ok(())
+    }
 }
 
 /// Lazily-initialized component of `Repo` with interior mutability
diff --git a/rust/hg-core/src/dirstate_tree/on_disk.rs b/rust/hg-core/src/dirstate_tree/on_disk.rs
--- a/rust/hg-core/src/dirstate_tree/on_disk.rs
+++ b/rust/hg-core/src/dirstate_tree/on_disk.rs
@@ -68,7 +68,7 @@
 /// section of `mercurial/helptext/internals/dirstate-v2.txt`
 #[derive(BytesCast)]
 #[repr(C)]
-struct TreeMetadata {
+pub struct TreeMetadata {
     root_nodes: ChildNodes,
     nodes_with_entry_count: Size,
     nodes_with_copy_source_count: Size,
@@ -186,7 +186,34 @@
     }
 }
 
+impl TreeMetadata {
+    pub fn as_bytes(&self) -> &[u8] {
+        BytesCast::as_bytes(self)
+    }
+}
+
 impl<'on_disk> Docket<'on_disk> {
+    pub fn serialize(
+        parents: DirstateParents,
+        tree_metadata: TreeMetadata,
+        data_size: u64,
+        uuid: &[u8],
+    ) -> Result<Vec<u8>, std::num::TryFromIntError> {
+        let header = DocketHeader {
+            marker: *V2_FORMAT_MARKER,
+            parent_1: parents.p1.pad_to_256_bits(),
+            parent_2: parents.p2.pad_to_256_bits(),
+            metadata: tree_metadata,
+            data_size: u32::try_from(data_size)?.into(),
+            uuid_size: uuid.len().try_into()?,
+        };
+        let header = header.as_bytes();
+        let mut docket = Vec::with_capacity(header.len() + uuid.len());
+        docket.extend_from_slice(header);
+        docket.extend_from_slice(uuid);
+        Ok(docket)
+    }
+
     pub fn parents(&self) -> DirstateParents {
         use crate::Node;
         let p1 = Node::try_from(&self.header.parent_1[..USED_NODE_ID_BYTES])
@@ -551,7 +578,7 @@
 pub(super) fn write(
     dirstate_map: &DirstateMap,
     can_append: bool,
-) -> Result<(Vec<u8>, Vec<u8>, bool), DirstateError> {
+) -> Result<(Vec<u8>, TreeMetadata, bool), DirstateError> {
     let append = can_append && dirstate_map.write_should_append();
 
     // This ignores the space for paths, and for nodes without an entry.
@@ -577,7 +604,7 @@
         unused: [0; 4],
         ignore_patterns_hash: dirstate_map.ignore_patterns_hash,
     };
-    Ok((writer.out, meta.as_bytes().to_vec(), append))
+    Ok((writer.out, meta, append))
 }
 
 struct Writer<'dmap, 'on_disk> {
diff --git a/rust/hg-core/src/dirstate_tree/dirstate_map.rs b/rust/hg-core/src/dirstate_tree/dirstate_map.rs
--- a/rust/hg-core/src/dirstate_tree/dirstate_map.rs
+++ b/rust/hg-core/src/dirstate_tree/dirstate_map.rs
@@ -951,7 +951,7 @@
     pub fn pack_v2(
         &self,
         can_append: bool,
-    ) -> Result<(Vec<u8>, Vec<u8>, bool), DirstateError> {
+    ) -> Result<(Vec<u8>, on_disk::TreeMetadata, bool), DirstateError> {
         let map = self.get_map();
         on_disk::write(map, can_append)
     }



To: SimonSapin, #hg-reviewers
Cc: mercurial-patches, mercurial-devel
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mercurial-scm.org/pipermail/mercurial-patches/attachments/20211202/1ef140a1/attachment-0001.html>


More information about the Mercurial-patches mailing list