D11088: dirstate-v2: Introduce a docket file

SimonSapin phabricator at mercurial-scm.org
Mon Jul 12 15:54:46 UTC 2021


SimonSapin created this revision.
Herald added a reviewer: hg-reviewers.
Herald added a subscriber: mercurial-patches.

REVISION SUMMARY
  .hg/dirstate now only contains some metadata to point to a separate data file
  named .hg/dirstate.{}.d with a random hexadecimal identifier. For now every
  update creates a new data file and removes the old one, but later we’ll
  (usually) append to an existing file.
  
  Separating into two files allows doing the "write to a temporary file then
  atomically rename into destination" dance with only a small docket file,
  without always rewriting a lot of data.

REPOSITORY
  rHG Mercurial

BRANCH
  default

REVISION DETAIL
  https://phab.mercurial-scm.org/D11088

AFFECTED FILES
  mercurial/debugcommands.py
  mercurial/dirstate.py
  mercurial/dirstatemap.py
  mercurial/dirstateutils/docket.py
  mercurial/upgrade_utils/engine.py
  rust/hg-core/src/dirstate_tree/dirstate_map.rs
  rust/hg-core/src/dirstate_tree/dispatch.rs
  rust/hg-core/src/dirstate_tree/on_disk.rs
  rust/hg-core/src/repo.rs
  rust/hg-cpython/src/dirstate/dirstate_map.rs
  rust/hg-cpython/src/dirstate/dispatch.rs
  rust/hg-cpython/src/dirstate/owning.rs
  rust/rhg/src/commands/status.rs
  rust/rhg/src/error.rs
  tests/test-hgignore.t

CHANGE DETAILS

diff --git a/tests/test-hgignore.t b/tests/test-hgignore.t
--- a/tests/test-hgignore.t
+++ b/tests/test-hgignore.t
@@ -405,20 +405,19 @@
 
 #if dirstate-v2
 
-Check the hash of ignore patterns written in the dirstate at offset
-12 + 20 + 20 + 4 + 4 + 4 + 4 = 68
+Check the hash of ignore patterns written in the dirstate
 
   $ hg status > /dev/null
   $ cat .hg/testhgignore .hg/testhgignorerel .hgignore dir2/.hgignore dir1/.hgignore dir1/.hgignoretwo | $TESTDIR/f --sha1
   sha1=6e315b60f15fb5dfa02be00f3e2c8f923051f5ff
-  >>> import binascii; print(binascii.hexlify(open(".hg/dirstate", "rb").read()[68:][:20]).decode())
+  $ hg debugdirstateignorepatternshash
   6e315b60f15fb5dfa02be00f3e2c8f923051f5ff
 
   $ echo rel > .hg/testhgignorerel
   $ hg status > /dev/null
   $ cat .hg/testhgignore .hg/testhgignorerel .hgignore dir2/.hgignore dir1/.hgignore dir1/.hgignoretwo | $TESTDIR/f --sha1
   sha1=dea19cc7119213f24b6b582a4bae7b0cb063e34e
-  >>> import binascii; print(binascii.hexlify(open(".hg/dirstate", "rb").read()[68:][:20]).decode())
+  $ hg debugdirstateignorepatternshash
   dea19cc7119213f24b6b582a4bae7b0cb063e34e
 
 #endif
diff --git a/rust/rhg/src/error.rs b/rust/rhg/src/error.rs
--- a/rust/rhg/src/error.rs
+++ b/rust/rhg/src/error.rs
@@ -3,6 +3,7 @@
 use crate::NoRepoInCwdError;
 use format_bytes::format_bytes;
 use hg::config::{ConfigError, ConfigParseError, ConfigValueParseError};
+use hg::dirstate_tree::on_disk::DirstateV2ParseError;
 use hg::errors::HgError;
 use hg::exit_codes;
 use hg::repo::RepoError;
@@ -199,3 +200,9 @@
         }
     }
 }
+
+impl From<DirstateV2ParseError> for CommandError {
+    fn from(error: DirstateV2ParseError) -> Self {
+        HgError::from(error).into()
+    }
+}
diff --git a/rust/rhg/src/commands/status.rs b/rust/rhg/src/commands/status.rs
--- a/rust/rhg/src/commands/status.rs
+++ b/rust/rhg/src/commands/status.rs
@@ -10,6 +10,7 @@
 use clap::{Arg, SubCommand};
 use hg;
 use hg::dirstate_tree::dirstate_map::DirstateMap;
+use hg::dirstate_tree::on_disk;
 use hg::errors::HgResultExt;
 use hg::errors::IoResultExt;
 use hg::matchers::AlwaysMatcher;
@@ -165,17 +166,33 @@
     };
 
     let repo = invocation.repo?;
-    let dirstate_data =
-        repo.hg_vfs().mmap_open("dirstate").io_not_found_as_none()?;
-    let dirstate_data = match &dirstate_data {
-        Some(mmap) => &**mmap,
-        None => b"",
-    };
+    let dirstate_data_mmap;
     let (mut dmap, parents) = if repo.has_dirstate_v2() {
-        DirstateMap::new_v2(dirstate_data)?
+        let parents;
+        let dirstate_data;
+        if let Some(docket_data) =
+            repo.hg_vfs().read("dirstate").io_not_found_as_none()?
+        {
+            let docket = on_disk::read_docket(&docket_data)?;
+            parents = Some(docket.parents());
+            dirstate_data_mmap = repo
+                .hg_vfs()
+                .mmap_open(docket.data_filename())
+                .io_not_found_as_none()?;
+            dirstate_data = dirstate_data_mmap.as_deref().unwrap_or(b"");
+        } else {
+            parents = None;
+            dirstate_data = b"";
+        }
+        let dmap = DirstateMap::new_v2(dirstate_data)?;
+        (dmap, parents)
     } else {
+        dirstate_data_mmap =
+            repo.hg_vfs().mmap_open("dirstate").io_not_found_as_none()?;
+        let dirstate_data = dirstate_data_mmap.as_deref().unwrap_or(b"");
         DirstateMap::new_v1(dirstate_data)?
     };
+
     let options = StatusOptions {
         // TODO should be provided by the dirstate parsing and
         // hence be stored on dmap. Using a value that assumes we aren't
diff --git a/rust/hg-cpython/src/dirstate/owning.rs b/rust/hg-cpython/src/dirstate/owning.rs
--- a/rust/hg-cpython/src/dirstate/owning.rs
+++ b/rust/hg-cpython/src/dirstate/owning.rs
@@ -28,17 +28,12 @@
 }
 
 impl OwningDirstateMap {
-    pub fn new(
+    pub fn new_v1(
         py: Python,
         on_disk: PyBytes,
-        use_dirstate_v2: bool,
     ) -> Result<(Self, Option<DirstateParents>), DirstateError> {
         let bytes: &'_ [u8] = on_disk.data(py);
-        let (map, parents) = if use_dirstate_v2 {
-            DirstateMap::new_v2(bytes)?
-        } else {
-            DirstateMap::new_v1(bytes)?
-        };
+        let (map, parents) = DirstateMap::new_v1(bytes)?;
 
         // Like in `bytes` above, this `'_` lifetime parameter borrows from
         // the bytes buffer owned by `on_disk`.
@@ -50,6 +45,23 @@
         Ok((Self { on_disk, ptr }, parents))
     }
 
+    pub fn new_v2(
+        py: Python,
+        on_disk: PyBytes,
+    ) -> Result<Self, DirstateError> {
+        let bytes: &'_ [u8] = on_disk.data(py);
+        let map = DirstateMap::new_v2(bytes)?;
+
+        // Like in `bytes` above, this `'_` lifetime parameter borrows from
+        // the bytes buffer owned by `on_disk`.
+        let ptr: *mut DirstateMap<'_> = Box::into_raw(Box::new(map));
+
+        // Erase the pointed type entirely in order to erase the lifetime.
+        let ptr: *mut () = ptr.cast();
+
+        Ok(Self { on_disk, ptr })
+    }
+
     pub fn get_mut<'a>(&'a mut self) -> &'a mut DirstateMap<'a> {
         // SAFETY: We cast the type-erased pointer back to the same type it had
         // in `new`, except with a different lifetime parameter. This time we
diff --git a/rust/hg-cpython/src/dirstate/dispatch.rs b/rust/hg-cpython/src/dirstate/dispatch.rs
--- a/rust/hg-cpython/src/dirstate/dispatch.rs
+++ b/rust/hg-cpython/src/dirstate/dispatch.rs
@@ -124,12 +124,8 @@
         self.get_mut().pack_v1(parents, now)
     }
 
-    fn pack_v2(
-        &mut self,
-        parents: DirstateParents,
-        now: Timestamp,
-    ) -> Result<Vec<u8>, DirstateError> {
-        self.get_mut().pack_v2(parents, now)
+    fn pack_v2(&mut self, now: Timestamp) -> Result<Vec<u8>, DirstateError> {
+        self.get_mut().pack_v2(now)
     }
 
     fn status<'a>(
diff --git a/rust/hg-cpython/src/dirstate/dirstate_map.rs b/rust/hg-cpython/src/dirstate/dirstate_map.rs
--- a/rust/hg-cpython/src/dirstate/dirstate_map.rs
+++ b/rust/hg-cpython/src/dirstate/dirstate_map.rs
@@ -57,17 +57,15 @@
 
     /// Returns a `(dirstate_map, parents)` tuple
     @staticmethod
-    def new(
+    def new_v1(
         use_dirstate_tree: bool,
-        use_dirstate_v2: bool,
         on_disk: PyBytes,
     ) -> PyResult<PyObject> {
         let dirstate_error = |e: DirstateError| {
             PyErr::new::<exc::OSError, _>(py, format!("Dirstate error: {:?}", e))
         };
-        let (inner, parents) = if use_dirstate_tree || use_dirstate_v2 {
-            let (map, parents) =
-                OwningDirstateMap::new(py, on_disk, use_dirstate_v2)
+        let (inner, parents) = if use_dirstate_tree {
+            let (map, parents) = OwningDirstateMap::new_v1(py, on_disk)
                 .map_err(dirstate_error)?;
             (Box::new(map) as _, parents)
         } else {
@@ -81,6 +79,20 @@
         Ok((map, parents).to_py_object(py).into_object())
     }
 
+    /// Returns a DirstateMap
+    @staticmethod
+    def new_v2(
+        on_disk: PyBytes,
+    ) -> PyResult<PyObject> {
+        let dirstate_error = |e: DirstateError| {
+            PyErr::new::<exc::OSError, _>(py, format!("Dirstate error: {:?}", e))
+        };
+        let inner = OwningDirstateMap::new_v2(py, on_disk)
+                .map_err(dirstate_error)?;
+        let map = Self::create_instance(py, Box::new(inner))?;
+        Ok(map.into_object())
+    }
+
     def clear(&self) -> PyResult<PyObject> {
         self.inner(py).borrow_mut().clear();
         Ok(py.None())
@@ -304,25 +316,37 @@
             .to_py_object(py))
     }
 
-    def write(
+    def write_v1(
         &self,
-        use_dirstate_v2: bool,
         p1: PyObject,
         p2: PyObject,
         now: PyObject
     ) -> PyResult<PyBytes> {
         let now = Timestamp(now.extract(py)?);
+
+        let mut inner = self.inner(py).borrow_mut();
         let parents = DirstateParents {
             p1: extract_node_id(py, &p1)?,
             p2: extract_node_id(py, &p2)?,
         };
+        let result = inner.pack_v1(parents, now);
+        match result {
+            Ok(packed) => Ok(PyBytes::new(py, &packed)),
+            Err(_) => Err(PyErr::new::<exc::OSError, _>(
+                py,
+                "Dirstate error".to_string(),
+            )),
+        }
+    }
+
+    def write_v2(
+        &self,
+        now: PyObject
+    ) -> PyResult<PyBytes> {
+        let now = Timestamp(now.extract(py)?);
 
         let mut inner = self.inner(py).borrow_mut();
-        let result = if use_dirstate_v2 {
-            inner.pack_v2(parents, now)
-        } else {
-            inner.pack_v1(parents, now)
-        };
+        let result = inner.pack_v2(now);
         match result {
             Ok(packed) => Ok(PyBytes::new(py, &packed)),
             Err(_) => Err(PyErr::new::<exc::OSError, _>(
diff --git a/rust/hg-core/src/repo.rs b/rust/hg-core/src/repo.rs
--- a/rust/hg-core/src/repo.rs
+++ b/rust/hg-core/src/repo.rs
@@ -241,11 +241,12 @@
             return Ok(crate::dirstate::DirstateParents::NULL);
         }
         let parents = if self.has_dirstate_v2() {
-            crate::dirstate_tree::on_disk::parse_dirstate_parents(&dirstate)?
+            crate::dirstate_tree::on_disk::read_docket(&dirstate)?.parents()
         } else {
             crate::dirstate::parsers::parse_dirstate_parents(&dirstate)?
+                .clone()
         };
-        Ok(parents.clone())
+        Ok(parents)
     }
 }
 
diff --git a/rust/hg-core/src/dirstate_tree/on_disk.rs b/rust/hg-core/src/dirstate_tree/on_disk.rs
--- a/rust/hg-core/src/dirstate_tree/on_disk.rs
+++ b/rust/hg-core/src/dirstate_tree/on_disk.rs
@@ -19,6 +19,7 @@
 use crate::EntryState;
 use bytes_cast::unaligned::{I32Be, I64Be, U32Be};
 use bytes_cast::BytesCast;
+use format_bytes::format_bytes;
 use std::borrow::Cow;
 use std::convert::TryFrom;
 use std::time::{Duration, SystemTime, UNIX_EPOCH};
@@ -28,18 +29,34 @@
 /// `.hg/requires` already governs which format should be used.
 pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";
 
+/// Keep space for 256-bit hashes
+const STORED_NODE_ID_BYTES: usize = 32;
+
+/// … even though only 160 bits are used for now, with SHA-1
+const USED_NODE_ID_BYTES: usize = 20;
+
 pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20;
 pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN];
 
+// Must match `HEADER` in `mercurial/dirstateutils/docket.py`
+#[derive(BytesCast)]
+#[repr(C)]
+struct DocketHeader {
+    marker: [u8; V2_FORMAT_MARKER.len()],
+    parent_1: [u8; STORED_NODE_ID_BYTES],
+    parent_2: [u8; STORED_NODE_ID_BYTES],
+    data_size: Size,
+    uuid_size: u8,
+}
+
+pub struct Docket<'on_disk> {
+    header: &'on_disk DocketHeader,
+    uuid: &'on_disk [u8],
+}
+
 #[derive(BytesCast)]
 #[repr(C)]
 struct Header {
-    marker: [u8; V2_FORMAT_MARKER.len()],
-
-    /// `dirstatemap.parents()` in `mercurial/dirstate.py` relies on this
-    /// `parents` field being at this offset, immediately after `marker`.
-    parents: DirstateParents,
-
     root: ChildNodes,
     nodes_with_entry_count: Size,
     nodes_with_copy_source_count: Size,
@@ -172,7 +189,8 @@
 
 /// Make sure that size-affecting changes are made knowingly
 fn _static_assert_size_of() {
-    let _ = std::mem::transmute::<Header, [u8; 88]>;
+    let _ = std::mem::transmute::<DocketHeader, [u8; 81]>;
+    let _ = std::mem::transmute::<Header, [u8; 36]>;
     let _ = std::mem::transmute::<Node, [u8; 49]>;
 }
 
@@ -194,11 +212,31 @@
     }
 }
 
-fn read_header(on_disk: &[u8]) -> Result<&Header, DirstateV2ParseError> {
-    let (header, _) =
-        Header::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?;
-    if header.marker == *V2_FORMAT_MARKER {
-        Ok(header)
+impl<'on_disk> Docket<'on_disk> {
+    pub fn parents(&self) -> DirstateParents {
+        use crate::Node;
+        let p1 = Node::try_from(&self.header.parent_1[..USED_NODE_ID_BYTES])
+            .unwrap()
+            .clone();
+        let p2 = Node::try_from(&self.header.parent_2[..USED_NODE_ID_BYTES])
+            .unwrap()
+            .clone();
+        DirstateParents { p1, p2 }
+    }
+
+    pub fn data_filename(&self) -> String {
+        String::from_utf8(format_bytes!(b"dirstate.{}.d", self.uuid)).unwrap()
+    }
+}
+
+pub fn read_docket(
+    on_disk: &[u8],
+) -> Result<Docket<'_>, DirstateV2ParseError> {
+    let (header, uuid) =
+        DocketHeader::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?;
+    let uuid_size = header.uuid_size as usize;
+    if header.marker == *V2_FORMAT_MARKER && uuid.len() == uuid_size {
+        Ok(Docket { header, uuid })
     } else {
         Err(DirstateV2ParseError)
     }
@@ -206,14 +244,12 @@
 
 pub(super) fn read<'on_disk>(
     on_disk: &'on_disk [u8],
-) -> Result<
-    (DirstateMap<'on_disk>, Option<DirstateParents>),
-    DirstateV2ParseError,
-> {
+) -> Result<DirstateMap<'on_disk>, DirstateV2ParseError> {
     if on_disk.is_empty() {
-        return Ok((DirstateMap::empty(on_disk), None));
+        return Ok(DirstateMap::empty(on_disk));
     }
-    let header = read_header(on_disk)?;
+    let (header, _) =
+        Header::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?;
     let dirstate_map = DirstateMap {
         on_disk,
         root: dirstate_map::ChildNodes::OnDisk(read_slice::<Node>(
@@ -226,8 +262,7 @@
             .get(),
         ignore_patterns_hash: header.ignore_patterns_hash,
     };
-    let parents = Some(header.parents.clone());
-    Ok((dirstate_map, parents))
+    Ok(dirstate_map)
 }
 
 impl Node {
@@ -447,17 +482,12 @@
         .ok_or_else(|| DirstateV2ParseError)
 }
 
-pub(crate) fn parse_dirstate_parents(
-    on_disk: &[u8],
-) -> Result<&DirstateParents, HgError> {
-    Ok(&read_header(on_disk)?.parents)
-}
-
 pub(crate) fn for_each_tracked_path<'on_disk>(
     on_disk: &'on_disk [u8],
     mut f: impl FnMut(&'on_disk HgPath),
 ) -> Result<(), DirstateV2ParseError> {
-    let header = read_header(on_disk)?;
+    let (header, _) =
+        Header::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?;
     fn recur<'on_disk>(
         on_disk: &'on_disk [u8],
         nodes: Slice,
@@ -478,7 +508,6 @@
 
 pub(super) fn write(
     dirstate_map: &mut DirstateMap,
-    parents: DirstateParents,
 ) -> Result<Vec<u8>, DirstateError> {
     let header_len = std::mem::size_of::<Header>();
 
@@ -497,8 +526,6 @@
         write_nodes(dirstate_map, dirstate_map.root.as_ref(), &mut out)?;
 
     let header = Header {
-        marker: *V2_FORMAT_MARKER,
-        parents: parents,
         root,
         nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),
         nodes_with_copy_source_count: dirstate_map
diff --git a/rust/hg-core/src/dirstate_tree/dispatch.rs b/rust/hg-core/src/dirstate_tree/dispatch.rs
--- a/rust/hg-core/src/dirstate_tree/dispatch.rs
+++ b/rust/hg-core/src/dirstate_tree/dispatch.rs
@@ -183,11 +183,7 @@
     /// format.
     ///
     /// Note: this is only supported by the tree dirstate map.
-    fn pack_v2(
-        &mut self,
-        parents: DirstateParents,
-        now: Timestamp,
-    ) -> Result<Vec<u8>, DirstateError>;
+    fn pack_v2(&mut self, now: Timestamp) -> Result<Vec<u8>, DirstateError>;
 
     /// Run the status algorithm.
     ///
@@ -387,11 +383,7 @@
         self.pack(parents, now)
     }
 
-    fn pack_v2(
-        &mut self,
-        _parents: DirstateParents,
-        _now: Timestamp,
-    ) -> Result<Vec<u8>, DirstateError> {
+    fn pack_v2(&mut self, _now: Timestamp) -> Result<Vec<u8>, DirstateError> {
         panic!(
             "should have used dirstate_tree::DirstateMap to use the v2 format"
         )
diff --git a/rust/hg-core/src/dirstate_tree/dirstate_map.rs b/rust/hg-core/src/dirstate_tree/dirstate_map.rs
--- a/rust/hg-core/src/dirstate_tree/dirstate_map.rs
+++ b/rust/hg-core/src/dirstate_tree/dirstate_map.rs
@@ -410,9 +410,7 @@
     }
 
     #[timed]
-    pub fn new_v2(
-        on_disk: &'on_disk [u8],
-    ) -> Result<(Self, Option<DirstateParents>), DirstateError> {
+    pub fn new_v2(on_disk: &'on_disk [u8]) -> Result<Self, DirstateError> {
         Ok(on_disk::read(on_disk)?)
     }
 
@@ -1039,11 +1037,7 @@
     }
 
     #[timed]
-    fn pack_v2(
-        &mut self,
-        parents: DirstateParents,
-        now: Timestamp,
-    ) -> Result<Vec<u8>, DirstateError> {
+    fn pack_v2(&mut self, now: Timestamp) -> Result<Vec<u8>, DirstateError> {
         // TODO: how do we want to handle this in 2038?
         let now: i32 = now.0.try_into().expect("time overflow");
         let mut paths = Vec::new();
@@ -1062,7 +1056,7 @@
 
         self.clear_known_ambiguous_mtimes(&paths)?;
 
-        on_disk::write(self, parents)
+        on_disk::write(self)
     }
 
     fn status<'a>(
diff --git a/mercurial/upgrade_utils/engine.py b/mercurial/upgrade_utils/engine.py
--- a/mercurial/upgrade_utils/engine.py
+++ b/mercurial/upgrade_utils/engine.py
@@ -627,6 +627,7 @@
     srcrepo.dirstate._use_dirstate_v2 = new == b'v2'
     srcrepo.dirstate._map._use_dirstate_v2 = srcrepo.dirstate._use_dirstate_v2
     srcrepo.dirstate._dirty = True
+    srcrepo.vfs.unlink(b'dirstate')
     srcrepo.dirstate.write(None)
 
     scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements)
diff --git a/mercurial/dirstateutils/docket.py b/mercurial/dirstateutils/docket.py
new file mode 100644
--- /dev/null
+++ b/mercurial/dirstateutils/docket.py
@@ -0,0 +1,60 @@
+# dirstatedocket.py - docket file for dirstate-v2
+#
+# Copyright Mercurial Contributors
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+
+from __future__ import absolute_import
+
+import struct
+
+from ..revlogutils import docket as docket_mod
+
+
+V2_FORMAT_MARKER = b"dirstate-v2\n"
+
+# * 12 bytes: format marker
+# * 32 bytes: node ID of the working directory’s first parent
+# * 32 bytes: node ID of the working directory’s second parent
+# * 4 bytes: big-endian used size of the data file
+# * 1 byte: length of the data file’s UUID
+# * variable: data file’s UUID
+#
+# Node IDs are null-padded if shorter than 32 bytes.
+# A data file shorter than the specified used size is corrupted (truncated)
+HEADER = struct.Struct(">{}s32s32sLB".format(len(V2_FORMAT_MARKER)))
+
+
+class DirstateDocket(object):
+    data_filename_pattern = b'dirstate.%s.d'
+
+    def __init__(self, parents, data_size, uuid):
+        self.parents = parents
+        self.data_size = data_size
+        self.uuid = uuid
+
+    @classmethod
+    def with_new_uuid(cls, parents, data):
+        return cls(parents, data, docket_mod.make_uid())
+
+    @classmethod
+    def parse(cls, data, nodeconstants):
+        if not data:
+            parents = (nodeconstants.nullid, nodeconstants.nullid)
+            return cls(parents, 0, None)
+        marker, p1, p2, data_size, uuid_size = HEADER.unpack_from(data)
+        if marker != V2_FORMAT_MARKER:
+            raise ValueError("expected dirstate-v2 marker")
+        uuid = data[HEADER.size : HEADER.size + uuid_size]
+        p1 = p1[:nodeconstants.nodelen]
+        p2 = p2[:nodeconstants.nodelen]
+        return cls((p1, p2), data_size, uuid)
+
+    def serialize(self):
+        p1, p2 = self.parents
+        header = HEADER.pack(V2_FORMAT_MARKER, p1, p2, self.data_size, len(self.uuid))
+        return header + self.uuid
+
+    def data_filename(self):
+        return self.data_filename_pattern % self.uuid
diff --git a/mercurial/dirstatemap.py b/mercurial/dirstatemap.py
--- a/mercurial/dirstatemap.py
+++ b/mercurial/dirstatemap.py
@@ -18,6 +18,10 @@
     util,
 )
 
+from .dirstateutils import (
+    docket as docketmod,
+)
+
 parsers = policy.importmod('parsers')
 rustmod = policy.importrust('dirstate')
 
@@ -468,6 +472,7 @@
             self._nodelen = 20  # Also update Rust code when changing this!
             self._parents = None
             self._dirtyparents = False
+            self._docket = None
 
             # for consistent view between _pl() and _read() invocations
             self._pendingmode = None
@@ -567,6 +572,16 @@
             self._pendingmode = mode
             return fp
 
+        def _readdirstatefile(self, size=-1):
+            try:
+                with self._opendirstatefile() as fp:
+                    return fp.read(size)
+            except IOError as err:
+                if err.errno != errno.ENOENT:
+                    raise
+                # File doesn't exist, so the current state is empty
+                return b''
+
         def setparents(self, p1, p2):
             self._parents = (p1, p2)
             self._dirtyparents = True
@@ -574,39 +589,40 @@
         def parents(self):
             if not self._parents:
                 if self._use_dirstate_v2:
-                    offset = len(rustmod.V2_FORMAT_MARKER)
+                    self._parents = self.docket.parents
                 else:
-                    offset = 0
-                read_len = offset + self._nodelen * 2
-                try:
-                    fp = self._opendirstatefile()
-                    st = fp.read(read_len)
-                    fp.close()
-                except IOError as err:
-                    if err.errno != errno.ENOENT:
-                        raise
-                    # File doesn't exist, so the current state is empty
-                    st = b''
-
-                l = len(st)
-                if l == read_len:
-                    st = st[offset:]
-                    self._parents = (
-                        st[: self._nodelen],
-                        st[self._nodelen : 2 * self._nodelen],
-                    )
-                elif l == 0:
-                    self._parents = (
-                        self._nodeconstants.nullid,
-                        self._nodeconstants.nullid,
-                    )
-                else:
-                    raise error.Abort(
-                        _(b'working directory state appears damaged!')
-                    )
+                    read_len = self._nodelen * 2
+                    st = self._readdirstatefile(read_len)
+                    l = len(st)
+                    if l == read_len:
+                        self._parents = (
+                            st[: self._nodelen],
+                            st[self._nodelen : 2 * self._nodelen],
+                        )
+                    elif l == 0:
+                        self._parents = (
+                            self._nodeconstants.nullid,
+                            self._nodeconstants.nullid,
+                        )
+                    else:
+                        raise error.Abort(
+                            _(b'working directory state appears damaged!')
+                        )
 
             return self._parents
 
+        @property
+        def docket(self):
+            if not self._docket:
+                if not self._use_dirstate_v2:
+                    raise error.ProgrammingError(
+                        b'dirstate only has a docket in v2 format'
+                    )
+                self._docket = docketmod.DirstateDocket.parse(
+                    self._readdirstatefile(), self._nodeconstants
+                )
+            return self._docket
+
         @propertycache
         def _rustmap(self):
             """
@@ -617,20 +633,19 @@
                 self._opener.join(self._filename)
             )
 
-            try:
-                fp = self._opendirstatefile()
-                try:
-                    st = fp.read()
-                finally:
-                    fp.close()
-            except IOError as err:
-                if err.errno != errno.ENOENT:
-                    raise
-                st = b''
-
-            self._rustmap, parents = rustmod.DirstateMap.new(
-                self._use_dirstate_tree, self._use_dirstate_v2, st
-            )
+            st = self._readdirstatefile()
+            if self._use_dirstate_v2:
+                if self.docket.uuid:
+                    # TODO: use mmap when possible
+                    data = self._opener.read(self.docket.data_filename())
+                else:
+                    data = b''
+                self._rustmap = rustmod.DirstateMap.new_v2(data)
+                parents = self.docket.parents
+            else:
+                self._rustmap, parents = rustmod.DirstateMap.new_v1(
+                    self._use_dirstate_tree, st
+                )
 
             if parents and not self._dirtyparents:
                 self.setparents(*parents)
@@ -640,13 +655,31 @@
             self.get = self._rustmap.get
             return self._rustmap
 
-        def write(self, st, now):
-            parents = self.parents()
-            packed = self._rustmap.write(
-                self._use_dirstate_v2, parents[0], parents[1], now
-            )
-            st.write(packed)
-            st.close()
+        def write(self, tr, st, now):
+            if self._use_dirstate_v2:
+                packed = self._rustmap.write_v2(now)
+                old_docket = self.docket
+                new_docket = docketmod.DirstateDocket.with_new_uuid(
+                    self.parents(), len(packed)
+                )
+                self._opener.write(new_docket.data_filename(), packed)
+                # Write the new docket after the new data file has been
+                # written.
+                # TODO: Doesn’t opening the docket file for writing truncate
+                # it to zero size? Should we also do that after writing the
+                # data file?
+                st.write(new_docket.serialize())
+                st.close()
+                # Remove the old data file after the new docket pointing to
+                # the new data file was written.
+                if old_docket.uuid:
+                    self._opener.unlink(old_docket.data_filename())
+                self._docket = new_docket
+            else:
+                p1, p2 = self.parents()
+                packed = self._rustmap.write_v1(p1, p2, now)
+                st.write(packed)
+                st.close()
             self._dirtyparents = False
 
         @propertycache
diff --git a/mercurial/dirstate.py b/mercurial/dirstate.py
--- a/mercurial/dirstate.py
+++ b/mercurial/dirstate.py
@@ -717,13 +717,13 @@
             tr.addfilegenerator(
                 b'dirstate',
                 (self._filename,),
-                self._writedirstate,
+                lambda f: self._writedirstate(tr, f),
                 location=b'plain',
             )
             return
 
         st = self._opener(filename, b"w", atomictemp=True, checkambig=True)
-        self._writedirstate(st)
+        self._writedirstate(tr, st)
 
     def addparentchangecallback(self, category, callback):
         """add a callback to be called when the wd parents are changed
@@ -736,7 +736,7 @@
         """
         self._plchangecallbacks[category] = callback
 
-    def _writedirstate(self, st):
+    def _writedirstate(self, tr, st):
         # notify callbacks about parents change
         if self._origpl is not None and self._origpl != self._pl:
             for c, callback in sorted(
@@ -766,7 +766,7 @@
                     now = end  # trust our estimate that the end is near now
                     break
 
-        self._map.write(st, now)
+        self._map.write(tr, st, now)
         self._lastnormaltime = 0
         self._dirty = False
 
@@ -1391,6 +1391,7 @@
         # output file will be used to create backup of dirstate at this point.
         if self._dirty or not self._opener.exists(filename):
             self._writedirstate(
+                tr,
                 self._opener(filename, b"w", atomictemp=True, checkambig=True)
             )
 
@@ -1401,7 +1402,7 @@
             tr.addfilegenerator(
                 b'dirstate',
                 (self._filename,),
-                self._writedirstate,
+                lambda f: self._writedirstate(tr, f),
                 location=b'plain',
             )
 
diff --git a/mercurial/debugcommands.py b/mercurial/debugcommands.py
--- a/mercurial/debugcommands.py
+++ b/mercurial/debugcommands.py
@@ -7,6 +7,7 @@
 
 from __future__ import absolute_import
 
+import binascii
 import codecs
 import collections
 import contextlib
@@ -987,6 +988,24 @@
 
 
 @command(
+    b'debugdirstateignorepatternshash',
+    [],
+    _(b''),
+)
+def debugdirstateignorepatternshash(ui, repo, **opts):
+    """show the hash of ignore patterns stored in dirstate if v2,
+    or nothing for dirstate-v2
+    """
+    if repo.dirstate._use_dirstate_v2:
+        hash_offset = 16  # Four 32-bit integers before this field
+        hash_len = 20  # 160 bits for SHA-1
+        data_filename = repo.dirstate._map.docket.data_filename()
+        with repo.vfs(data_filename) as f:
+            hash = f.read(hash_offset + hash_len)[-hash_len:]
+        print(binascii.hexlify(hash).decode())
+
+
+ at command(
     b'debugdiscovery',
     [
         (b'', b'old', None, _(b'use old-style discovery')),



To: SimonSapin, #hg-reviewers
Cc: mercurial-patches, mercurial-devel


More information about the Mercurial-devel mailing list