D11088: dirstate-v2: Introduce a docket file
SimonSapin
phabricator at mercurial-scm.org
Mon Jul 12 15:54:46 UTC 2021
SimonSapin created this revision.
Herald added a reviewer: hg-reviewers.
Herald added a subscriber: mercurial-patches.
REVISION SUMMARY
.hg/dirstate now only contains some metadata to point to a separate data file
named .hg/dirstate.{}.d with a random hexadecimal identifier. For now every
update creates a new data file and removes the old one, but later weâll
(usually) append to an existing file.
Separating into two files allows doing the "write to a temporary file then
atomically rename into destination" dance with only a small docket file,
without always rewriting a lot of data.
REPOSITORY
rHG Mercurial
BRANCH
default
REVISION DETAIL
https://phab.mercurial-scm.org/D11088
AFFECTED FILES
mercurial/debugcommands.py
mercurial/dirstate.py
mercurial/dirstatemap.py
mercurial/dirstateutils/docket.py
mercurial/upgrade_utils/engine.py
rust/hg-core/src/dirstate_tree/dirstate_map.rs
rust/hg-core/src/dirstate_tree/dispatch.rs
rust/hg-core/src/dirstate_tree/on_disk.rs
rust/hg-core/src/repo.rs
rust/hg-cpython/src/dirstate/dirstate_map.rs
rust/hg-cpython/src/dirstate/dispatch.rs
rust/hg-cpython/src/dirstate/owning.rs
rust/rhg/src/commands/status.rs
rust/rhg/src/error.rs
tests/test-hgignore.t
CHANGE DETAILS
diff --git a/tests/test-hgignore.t b/tests/test-hgignore.t
--- a/tests/test-hgignore.t
+++ b/tests/test-hgignore.t
@@ -405,20 +405,19 @@
#if dirstate-v2
-Check the hash of ignore patterns written in the dirstate at offset
-12 + 20 + 20 + 4 + 4 + 4 + 4 = 68
+Check the hash of ignore patterns written in the dirstate
$ hg status > /dev/null
$ cat .hg/testhgignore .hg/testhgignorerel .hgignore dir2/.hgignore dir1/.hgignore dir1/.hgignoretwo | $TESTDIR/f --sha1
sha1=6e315b60f15fb5dfa02be00f3e2c8f923051f5ff
- >>> import binascii; print(binascii.hexlify(open(".hg/dirstate", "rb").read()[68:][:20]).decode())
+ $ hg debugdirstateignorepatternshash
6e315b60f15fb5dfa02be00f3e2c8f923051f5ff
$ echo rel > .hg/testhgignorerel
$ hg status > /dev/null
$ cat .hg/testhgignore .hg/testhgignorerel .hgignore dir2/.hgignore dir1/.hgignore dir1/.hgignoretwo | $TESTDIR/f --sha1
sha1=dea19cc7119213f24b6b582a4bae7b0cb063e34e
- >>> import binascii; print(binascii.hexlify(open(".hg/dirstate", "rb").read()[68:][:20]).decode())
+ $ hg debugdirstateignorepatternshash
dea19cc7119213f24b6b582a4bae7b0cb063e34e
#endif
diff --git a/rust/rhg/src/error.rs b/rust/rhg/src/error.rs
--- a/rust/rhg/src/error.rs
+++ b/rust/rhg/src/error.rs
@@ -3,6 +3,7 @@
use crate::NoRepoInCwdError;
use format_bytes::format_bytes;
use hg::config::{ConfigError, ConfigParseError, ConfigValueParseError};
+use hg::dirstate_tree::on_disk::DirstateV2ParseError;
use hg::errors::HgError;
use hg::exit_codes;
use hg::repo::RepoError;
@@ -199,3 +200,9 @@
}
}
}
+
+impl From<DirstateV2ParseError> for CommandError {
+ fn from(error: DirstateV2ParseError) -> Self {
+ HgError::from(error).into()
+ }
+}
diff --git a/rust/rhg/src/commands/status.rs b/rust/rhg/src/commands/status.rs
--- a/rust/rhg/src/commands/status.rs
+++ b/rust/rhg/src/commands/status.rs
@@ -10,6 +10,7 @@
use clap::{Arg, SubCommand};
use hg;
use hg::dirstate_tree::dirstate_map::DirstateMap;
+use hg::dirstate_tree::on_disk;
use hg::errors::HgResultExt;
use hg::errors::IoResultExt;
use hg::matchers::AlwaysMatcher;
@@ -165,17 +166,33 @@
};
let repo = invocation.repo?;
- let dirstate_data =
- repo.hg_vfs().mmap_open("dirstate").io_not_found_as_none()?;
- let dirstate_data = match &dirstate_data {
- Some(mmap) => &**mmap,
- None => b"",
- };
+ let dirstate_data_mmap;
let (mut dmap, parents) = if repo.has_dirstate_v2() {
- DirstateMap::new_v2(dirstate_data)?
+ let parents;
+ let dirstate_data;
+ if let Some(docket_data) =
+ repo.hg_vfs().read("dirstate").io_not_found_as_none()?
+ {
+ let docket = on_disk::read_docket(&docket_data)?;
+ parents = Some(docket.parents());
+ dirstate_data_mmap = repo
+ .hg_vfs()
+ .mmap_open(docket.data_filename())
+ .io_not_found_as_none()?;
+ dirstate_data = dirstate_data_mmap.as_deref().unwrap_or(b"");
+ } else {
+ parents = None;
+ dirstate_data = b"";
+ }
+ let dmap = DirstateMap::new_v2(dirstate_data)?;
+ (dmap, parents)
} else {
+ dirstate_data_mmap =
+ repo.hg_vfs().mmap_open("dirstate").io_not_found_as_none()?;
+ let dirstate_data = dirstate_data_mmap.as_deref().unwrap_or(b"");
DirstateMap::new_v1(dirstate_data)?
};
+
let options = StatusOptions {
// TODO should be provided by the dirstate parsing and
// hence be stored on dmap. Using a value that assumes we aren't
diff --git a/rust/hg-cpython/src/dirstate/owning.rs b/rust/hg-cpython/src/dirstate/owning.rs
--- a/rust/hg-cpython/src/dirstate/owning.rs
+++ b/rust/hg-cpython/src/dirstate/owning.rs
@@ -28,17 +28,12 @@
}
impl OwningDirstateMap {
- pub fn new(
+ pub fn new_v1(
py: Python,
on_disk: PyBytes,
- use_dirstate_v2: bool,
) -> Result<(Self, Option<DirstateParents>), DirstateError> {
let bytes: &'_ [u8] = on_disk.data(py);
- let (map, parents) = if use_dirstate_v2 {
- DirstateMap::new_v2(bytes)?
- } else {
- DirstateMap::new_v1(bytes)?
- };
+ let (map, parents) = DirstateMap::new_v1(bytes)?;
// Like in `bytes` above, this `'_` lifetime parameter borrows from
// the bytes buffer owned by `on_disk`.
@@ -50,6 +45,23 @@
Ok((Self { on_disk, ptr }, parents))
}
+ pub fn new_v2(
+ py: Python,
+ on_disk: PyBytes,
+ ) -> Result<Self, DirstateError> {
+ let bytes: &'_ [u8] = on_disk.data(py);
+ let map = DirstateMap::new_v2(bytes)?;
+
+ // Like in `bytes` above, this `'_` lifetime parameter borrows from
+ // the bytes buffer owned by `on_disk`.
+ let ptr: *mut DirstateMap<'_> = Box::into_raw(Box::new(map));
+
+ // Erase the pointed type entirely in order to erase the lifetime.
+ let ptr: *mut () = ptr.cast();
+
+ Ok(Self { on_disk, ptr })
+ }
+
pub fn get_mut<'a>(&'a mut self) -> &'a mut DirstateMap<'a> {
// SAFETY: We cast the type-erased pointer back to the same type it had
// in `new`, except with a different lifetime parameter. This time we
diff --git a/rust/hg-cpython/src/dirstate/dispatch.rs b/rust/hg-cpython/src/dirstate/dispatch.rs
--- a/rust/hg-cpython/src/dirstate/dispatch.rs
+++ b/rust/hg-cpython/src/dirstate/dispatch.rs
@@ -124,12 +124,8 @@
self.get_mut().pack_v1(parents, now)
}
- fn pack_v2(
- &mut self,
- parents: DirstateParents,
- now: Timestamp,
- ) -> Result<Vec<u8>, DirstateError> {
- self.get_mut().pack_v2(parents, now)
+ fn pack_v2(&mut self, now: Timestamp) -> Result<Vec<u8>, DirstateError> {
+ self.get_mut().pack_v2(now)
}
fn status<'a>(
diff --git a/rust/hg-cpython/src/dirstate/dirstate_map.rs b/rust/hg-cpython/src/dirstate/dirstate_map.rs
--- a/rust/hg-cpython/src/dirstate/dirstate_map.rs
+++ b/rust/hg-cpython/src/dirstate/dirstate_map.rs
@@ -57,17 +57,15 @@
/// Returns a `(dirstate_map, parents)` tuple
@staticmethod
- def new(
+ def new_v1(
use_dirstate_tree: bool,
- use_dirstate_v2: bool,
on_disk: PyBytes,
) -> PyResult<PyObject> {
let dirstate_error = |e: DirstateError| {
PyErr::new::<exc::OSError, _>(py, format!("Dirstate error: {:?}", e))
};
- let (inner, parents) = if use_dirstate_tree || use_dirstate_v2 {
- let (map, parents) =
- OwningDirstateMap::new(py, on_disk, use_dirstate_v2)
+ let (inner, parents) = if use_dirstate_tree {
+ let (map, parents) = OwningDirstateMap::new_v1(py, on_disk)
.map_err(dirstate_error)?;
(Box::new(map) as _, parents)
} else {
@@ -81,6 +79,20 @@
Ok((map, parents).to_py_object(py).into_object())
}
+ /// Returns a DirstateMap
+ @staticmethod
+ def new_v2(
+ on_disk: PyBytes,
+ ) -> PyResult<PyObject> {
+ let dirstate_error = |e: DirstateError| {
+ PyErr::new::<exc::OSError, _>(py, format!("Dirstate error: {:?}", e))
+ };
+ let inner = OwningDirstateMap::new_v2(py, on_disk)
+ .map_err(dirstate_error)?;
+ let map = Self::create_instance(py, Box::new(inner))?;
+ Ok(map.into_object())
+ }
+
def clear(&self) -> PyResult<PyObject> {
self.inner(py).borrow_mut().clear();
Ok(py.None())
@@ -304,25 +316,37 @@
.to_py_object(py))
}
- def write(
+ def write_v1(
&self,
- use_dirstate_v2: bool,
p1: PyObject,
p2: PyObject,
now: PyObject
) -> PyResult<PyBytes> {
let now = Timestamp(now.extract(py)?);
+
+ let mut inner = self.inner(py).borrow_mut();
let parents = DirstateParents {
p1: extract_node_id(py, &p1)?,
p2: extract_node_id(py, &p2)?,
};
+ let result = inner.pack_v1(parents, now);
+ match result {
+ Ok(packed) => Ok(PyBytes::new(py, &packed)),
+ Err(_) => Err(PyErr::new::<exc::OSError, _>(
+ py,
+ "Dirstate error".to_string(),
+ )),
+ }
+ }
+
+ def write_v2(
+ &self,
+ now: PyObject
+ ) -> PyResult<PyBytes> {
+ let now = Timestamp(now.extract(py)?);
let mut inner = self.inner(py).borrow_mut();
- let result = if use_dirstate_v2 {
- inner.pack_v2(parents, now)
- } else {
- inner.pack_v1(parents, now)
- };
+ let result = inner.pack_v2(now);
match result {
Ok(packed) => Ok(PyBytes::new(py, &packed)),
Err(_) => Err(PyErr::new::<exc::OSError, _>(
diff --git a/rust/hg-core/src/repo.rs b/rust/hg-core/src/repo.rs
--- a/rust/hg-core/src/repo.rs
+++ b/rust/hg-core/src/repo.rs
@@ -241,11 +241,12 @@
return Ok(crate::dirstate::DirstateParents::NULL);
}
let parents = if self.has_dirstate_v2() {
- crate::dirstate_tree::on_disk::parse_dirstate_parents(&dirstate)?
+ crate::dirstate_tree::on_disk::read_docket(&dirstate)?.parents()
} else {
crate::dirstate::parsers::parse_dirstate_parents(&dirstate)?
+ .clone()
};
- Ok(parents.clone())
+ Ok(parents)
}
}
diff --git a/rust/hg-core/src/dirstate_tree/on_disk.rs b/rust/hg-core/src/dirstate_tree/on_disk.rs
--- a/rust/hg-core/src/dirstate_tree/on_disk.rs
+++ b/rust/hg-core/src/dirstate_tree/on_disk.rs
@@ -19,6 +19,7 @@
use crate::EntryState;
use bytes_cast::unaligned::{I32Be, I64Be, U32Be};
use bytes_cast::BytesCast;
+use format_bytes::format_bytes;
use std::borrow::Cow;
use std::convert::TryFrom;
use std::time::{Duration, SystemTime, UNIX_EPOCH};
@@ -28,18 +29,34 @@
/// `.hg/requires` already governs which format should be used.
pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";
+/// Keep space for 256-bit hashes
+const STORED_NODE_ID_BYTES: usize = 32;
+
+/// ⦠even though only 160 bits are used for now, with SHA-1
+const USED_NODE_ID_BYTES: usize = 20;
+
pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20;
pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN];
+// Must match `HEADER` in `mercurial/dirstateutils/docket.py`
+#[derive(BytesCast)]
+#[repr(C)]
+struct DocketHeader {
+ marker: [u8; V2_FORMAT_MARKER.len()],
+ parent_1: [u8; STORED_NODE_ID_BYTES],
+ parent_2: [u8; STORED_NODE_ID_BYTES],
+ data_size: Size,
+ uuid_size: u8,
+}
+
+pub struct Docket<'on_disk> {
+ header: &'on_disk DocketHeader,
+ uuid: &'on_disk [u8],
+}
+
#[derive(BytesCast)]
#[repr(C)]
struct Header {
- marker: [u8; V2_FORMAT_MARKER.len()],
-
- /// `dirstatemap.parents()` in `mercurial/dirstate.py` relies on this
- /// `parents` field being at this offset, immediately after `marker`.
- parents: DirstateParents,
-
root: ChildNodes,
nodes_with_entry_count: Size,
nodes_with_copy_source_count: Size,
@@ -172,7 +189,8 @@
/// Make sure that size-affecting changes are made knowingly
fn _static_assert_size_of() {
- let _ = std::mem::transmute::<Header, [u8; 88]>;
+ let _ = std::mem::transmute::<DocketHeader, [u8; 81]>;
+ let _ = std::mem::transmute::<Header, [u8; 36]>;
let _ = std::mem::transmute::<Node, [u8; 49]>;
}
@@ -194,11 +212,31 @@
}
}
-fn read_header(on_disk: &[u8]) -> Result<&Header, DirstateV2ParseError> {
- let (header, _) =
- Header::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?;
- if header.marker == *V2_FORMAT_MARKER {
- Ok(header)
+impl<'on_disk> Docket<'on_disk> {
+ pub fn parents(&self) -> DirstateParents {
+ use crate::Node;
+ let p1 = Node::try_from(&self.header.parent_1[..USED_NODE_ID_BYTES])
+ .unwrap()
+ .clone();
+ let p2 = Node::try_from(&self.header.parent_2[..USED_NODE_ID_BYTES])
+ .unwrap()
+ .clone();
+ DirstateParents { p1, p2 }
+ }
+
+ pub fn data_filename(&self) -> String {
+ String::from_utf8(format_bytes!(b"dirstate.{}.d", self.uuid)).unwrap()
+ }
+}
+
+pub fn read_docket(
+ on_disk: &[u8],
+) -> Result<Docket<'_>, DirstateV2ParseError> {
+ let (header, uuid) =
+ DocketHeader::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?;
+ let uuid_size = header.uuid_size as usize;
+ if header.marker == *V2_FORMAT_MARKER && uuid.len() == uuid_size {
+ Ok(Docket { header, uuid })
} else {
Err(DirstateV2ParseError)
}
@@ -206,14 +244,12 @@
pub(super) fn read<'on_disk>(
on_disk: &'on_disk [u8],
-) -> Result<
- (DirstateMap<'on_disk>, Option<DirstateParents>),
- DirstateV2ParseError,
-> {
+) -> Result<DirstateMap<'on_disk>, DirstateV2ParseError> {
if on_disk.is_empty() {
- return Ok((DirstateMap::empty(on_disk), None));
+ return Ok(DirstateMap::empty(on_disk));
}
- let header = read_header(on_disk)?;
+ let (header, _) =
+ Header::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?;
let dirstate_map = DirstateMap {
on_disk,
root: dirstate_map::ChildNodes::OnDisk(read_slice::<Node>(
@@ -226,8 +262,7 @@
.get(),
ignore_patterns_hash: header.ignore_patterns_hash,
};
- let parents = Some(header.parents.clone());
- Ok((dirstate_map, parents))
+ Ok(dirstate_map)
}
impl Node {
@@ -447,17 +482,12 @@
.ok_or_else(|| DirstateV2ParseError)
}
-pub(crate) fn parse_dirstate_parents(
- on_disk: &[u8],
-) -> Result<&DirstateParents, HgError> {
- Ok(&read_header(on_disk)?.parents)
-}
-
pub(crate) fn for_each_tracked_path<'on_disk>(
on_disk: &'on_disk [u8],
mut f: impl FnMut(&'on_disk HgPath),
) -> Result<(), DirstateV2ParseError> {
- let header = read_header(on_disk)?;
+ let (header, _) =
+ Header::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?;
fn recur<'on_disk>(
on_disk: &'on_disk [u8],
nodes: Slice,
@@ -478,7 +508,6 @@
pub(super) fn write(
dirstate_map: &mut DirstateMap,
- parents: DirstateParents,
) -> Result<Vec<u8>, DirstateError> {
let header_len = std::mem::size_of::<Header>();
@@ -497,8 +526,6 @@
write_nodes(dirstate_map, dirstate_map.root.as_ref(), &mut out)?;
let header = Header {
- marker: *V2_FORMAT_MARKER,
- parents: parents,
root,
nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),
nodes_with_copy_source_count: dirstate_map
diff --git a/rust/hg-core/src/dirstate_tree/dispatch.rs b/rust/hg-core/src/dirstate_tree/dispatch.rs
--- a/rust/hg-core/src/dirstate_tree/dispatch.rs
+++ b/rust/hg-core/src/dirstate_tree/dispatch.rs
@@ -183,11 +183,7 @@
/// format.
///
/// Note: this is only supported by the tree dirstate map.
- fn pack_v2(
- &mut self,
- parents: DirstateParents,
- now: Timestamp,
- ) -> Result<Vec<u8>, DirstateError>;
+ fn pack_v2(&mut self, now: Timestamp) -> Result<Vec<u8>, DirstateError>;
/// Run the status algorithm.
///
@@ -387,11 +383,7 @@
self.pack(parents, now)
}
- fn pack_v2(
- &mut self,
- _parents: DirstateParents,
- _now: Timestamp,
- ) -> Result<Vec<u8>, DirstateError> {
+ fn pack_v2(&mut self, _now: Timestamp) -> Result<Vec<u8>, DirstateError> {
panic!(
"should have used dirstate_tree::DirstateMap to use the v2 format"
)
diff --git a/rust/hg-core/src/dirstate_tree/dirstate_map.rs b/rust/hg-core/src/dirstate_tree/dirstate_map.rs
--- a/rust/hg-core/src/dirstate_tree/dirstate_map.rs
+++ b/rust/hg-core/src/dirstate_tree/dirstate_map.rs
@@ -410,9 +410,7 @@
}
#[timed]
- pub fn new_v2(
- on_disk: &'on_disk [u8],
- ) -> Result<(Self, Option<DirstateParents>), DirstateError> {
+ pub fn new_v2(on_disk: &'on_disk [u8]) -> Result<Self, DirstateError> {
Ok(on_disk::read(on_disk)?)
}
@@ -1039,11 +1037,7 @@
}
#[timed]
- fn pack_v2(
- &mut self,
- parents: DirstateParents,
- now: Timestamp,
- ) -> Result<Vec<u8>, DirstateError> {
+ fn pack_v2(&mut self, now: Timestamp) -> Result<Vec<u8>, DirstateError> {
// TODO:Â how do we want to handle this in 2038?
let now: i32 = now.0.try_into().expect("time overflow");
let mut paths = Vec::new();
@@ -1062,7 +1056,7 @@
self.clear_known_ambiguous_mtimes(&paths)?;
- on_disk::write(self, parents)
+ on_disk::write(self)
}
fn status<'a>(
diff --git a/mercurial/upgrade_utils/engine.py b/mercurial/upgrade_utils/engine.py
--- a/mercurial/upgrade_utils/engine.py
+++ b/mercurial/upgrade_utils/engine.py
@@ -627,6 +627,7 @@
srcrepo.dirstate._use_dirstate_v2 = new == b'v2'
srcrepo.dirstate._map._use_dirstate_v2 = srcrepo.dirstate._use_dirstate_v2
srcrepo.dirstate._dirty = True
+ srcrepo.vfs.unlink(b'dirstate')
srcrepo.dirstate.write(None)
scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements)
diff --git a/mercurial/dirstateutils/docket.py b/mercurial/dirstateutils/docket.py
new file mode 100644
--- /dev/null
+++ b/mercurial/dirstateutils/docket.py
@@ -0,0 +1,60 @@
+# dirstatedocket.py - docket file for dirstate-v2
+#
+# Copyright Mercurial Contributors
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+
+from __future__ import absolute_import
+
+import struct
+
+from ..revlogutils import docket as docket_mod
+
+
+V2_FORMAT_MARKER = b"dirstate-v2\n"
+
+# * 12 bytes: format marker
+# * 32 bytes: node ID of the working directoryâs first parent
+# * 32 bytes: node ID of the working directoryâs second parent
+# * 4 bytes: big-endian used size of the data file
+# * 1 byte: length of the data fileâs UUID
+# * variable: data fileâs UUID
+#
+# Node IDs are null-padded if shorter than 32 bytes.
+# A data file shorter than the specified used size is corrupted (truncated)
+HEADER = struct.Struct(">{}s32s32sLB".format(len(V2_FORMAT_MARKER)))
+
+
+class DirstateDocket(object):
+ data_filename_pattern = b'dirstate.%s.d'
+
+ def __init__(self, parents, data_size, uuid):
+ self.parents = parents
+ self.data_size = data_size
+ self.uuid = uuid
+
+ @classmethod
+ def with_new_uuid(cls, parents, data):
+ return cls(parents, data, docket_mod.make_uid())
+
+ @classmethod
+ def parse(cls, data, nodeconstants):
+ if not data:
+ parents = (nodeconstants.nullid, nodeconstants.nullid)
+ return cls(parents, 0, None)
+ marker, p1, p2, data_size, uuid_size = HEADER.unpack_from(data)
+ if marker != V2_FORMAT_MARKER:
+ raise ValueError("expected dirstate-v2 marker")
+ uuid = data[HEADER.size : HEADER.size + uuid_size]
+ p1 = p1[:nodeconstants.nodelen]
+ p2 = p2[:nodeconstants.nodelen]
+ return cls((p1, p2), data_size, uuid)
+
+ def serialize(self):
+ p1, p2 = self.parents
+ header = HEADER.pack(V2_FORMAT_MARKER, p1, p2, self.data_size, len(self.uuid))
+ return header + self.uuid
+
+ def data_filename(self):
+ return self.data_filename_pattern % self.uuid
diff --git a/mercurial/dirstatemap.py b/mercurial/dirstatemap.py
--- a/mercurial/dirstatemap.py
+++ b/mercurial/dirstatemap.py
@@ -18,6 +18,10 @@
util,
)
+from .dirstateutils import (
+ docket as docketmod,
+)
+
parsers = policy.importmod('parsers')
rustmod = policy.importrust('dirstate')
@@ -468,6 +472,7 @@
self._nodelen = 20 # Also update Rust code when changing this!
self._parents = None
self._dirtyparents = False
+ self._docket = None
# for consistent view between _pl() and _read() invocations
self._pendingmode = None
@@ -567,6 +572,16 @@
self._pendingmode = mode
return fp
+ def _readdirstatefile(self, size=-1):
+ try:
+ with self._opendirstatefile() as fp:
+ return fp.read(size)
+ except IOError as err:
+ if err.errno != errno.ENOENT:
+ raise
+ # File doesn't exist, so the current state is empty
+ return b''
+
def setparents(self, p1, p2):
self._parents = (p1, p2)
self._dirtyparents = True
@@ -574,39 +589,40 @@
def parents(self):
if not self._parents:
if self._use_dirstate_v2:
- offset = len(rustmod.V2_FORMAT_MARKER)
+ self._parents = self.docket.parents
else:
- offset = 0
- read_len = offset + self._nodelen * 2
- try:
- fp = self._opendirstatefile()
- st = fp.read(read_len)
- fp.close()
- except IOError as err:
- if err.errno != errno.ENOENT:
- raise
- # File doesn't exist, so the current state is empty
- st = b''
-
- l = len(st)
- if l == read_len:
- st = st[offset:]
- self._parents = (
- st[: self._nodelen],
- st[self._nodelen : 2 * self._nodelen],
- )
- elif l == 0:
- self._parents = (
- self._nodeconstants.nullid,
- self._nodeconstants.nullid,
- )
- else:
- raise error.Abort(
- _(b'working directory state appears damaged!')
- )
+ read_len = self._nodelen * 2
+ st = self._readdirstatefile(read_len)
+ l = len(st)
+ if l == read_len:
+ self._parents = (
+ st[: self._nodelen],
+ st[self._nodelen : 2 * self._nodelen],
+ )
+ elif l == 0:
+ self._parents = (
+ self._nodeconstants.nullid,
+ self._nodeconstants.nullid,
+ )
+ else:
+ raise error.Abort(
+ _(b'working directory state appears damaged!')
+ )
return self._parents
+ @property
+ def docket(self):
+ if not self._docket:
+ if not self._use_dirstate_v2:
+ raise error.ProgrammingError(
+ b'dirstate only has a docket in v2 format'
+ )
+ self._docket = docketmod.DirstateDocket.parse(
+ self._readdirstatefile(), self._nodeconstants
+ )
+ return self._docket
+
@propertycache
def _rustmap(self):
"""
@@ -617,20 +633,19 @@
self._opener.join(self._filename)
)
- try:
- fp = self._opendirstatefile()
- try:
- st = fp.read()
- finally:
- fp.close()
- except IOError as err:
- if err.errno != errno.ENOENT:
- raise
- st = b''
-
- self._rustmap, parents = rustmod.DirstateMap.new(
- self._use_dirstate_tree, self._use_dirstate_v2, st
- )
+ st = self._readdirstatefile()
+ if self._use_dirstate_v2:
+ if self.docket.uuid:
+ # TODO: use mmap when possible
+ data = self._opener.read(self.docket.data_filename())
+ else:
+ data = b''
+ self._rustmap = rustmod.DirstateMap.new_v2(data)
+ parents = self.docket.parents
+ else:
+ self._rustmap, parents = rustmod.DirstateMap.new_v1(
+ self._use_dirstate_tree, st
+ )
if parents and not self._dirtyparents:
self.setparents(*parents)
@@ -640,13 +655,31 @@
self.get = self._rustmap.get
return self._rustmap
- def write(self, st, now):
- parents = self.parents()
- packed = self._rustmap.write(
- self._use_dirstate_v2, parents[0], parents[1], now
- )
- st.write(packed)
- st.close()
+ def write(self, tr, st, now):
+ if self._use_dirstate_v2:
+ packed = self._rustmap.write_v2(now)
+ old_docket = self.docket
+ new_docket = docketmod.DirstateDocket.with_new_uuid(
+ self.parents(), len(packed)
+ )
+ self._opener.write(new_docket.data_filename(), packed)
+ # Write the new docket after the new data file has been
+ # written.
+ # TODO: Doesnât opening the docket file for writing truncate
+ # it to zero size? Should we also do that after writing the
+ # data file?
+ st.write(new_docket.serialize())
+ st.close()
+ # Remove the old data file after the new docket pointing to
+ # the new data file was written.
+ if old_docket.uuid:
+ self._opener.unlink(old_docket.data_filename())
+ self._docket = new_docket
+ else:
+ p1, p2 = self.parents()
+ packed = self._rustmap.write_v1(p1, p2, now)
+ st.write(packed)
+ st.close()
self._dirtyparents = False
@propertycache
diff --git a/mercurial/dirstate.py b/mercurial/dirstate.py
--- a/mercurial/dirstate.py
+++ b/mercurial/dirstate.py
@@ -717,13 +717,13 @@
tr.addfilegenerator(
b'dirstate',
(self._filename,),
- self._writedirstate,
+ lambda f: self._writedirstate(tr, f),
location=b'plain',
)
return
st = self._opener(filename, b"w", atomictemp=True, checkambig=True)
- self._writedirstate(st)
+ self._writedirstate(tr, st)
def addparentchangecallback(self, category, callback):
"""add a callback to be called when the wd parents are changed
@@ -736,7 +736,7 @@
"""
self._plchangecallbacks[category] = callback
- def _writedirstate(self, st):
+ def _writedirstate(self, tr, st):
# notify callbacks about parents change
if self._origpl is not None and self._origpl != self._pl:
for c, callback in sorted(
@@ -766,7 +766,7 @@
now = end # trust our estimate that the end is near now
break
- self._map.write(st, now)
+ self._map.write(tr, st, now)
self._lastnormaltime = 0
self._dirty = False
@@ -1391,6 +1391,7 @@
# output file will be used to create backup of dirstate at this point.
if self._dirty or not self._opener.exists(filename):
self._writedirstate(
+ tr,
self._opener(filename, b"w", atomictemp=True, checkambig=True)
)
@@ -1401,7 +1402,7 @@
tr.addfilegenerator(
b'dirstate',
(self._filename,),
- self._writedirstate,
+ lambda f: self._writedirstate(tr, f),
location=b'plain',
)
diff --git a/mercurial/debugcommands.py b/mercurial/debugcommands.py
--- a/mercurial/debugcommands.py
+++ b/mercurial/debugcommands.py
@@ -7,6 +7,7 @@
from __future__ import absolute_import
+import binascii
import codecs
import collections
import contextlib
@@ -987,6 +988,24 @@
@command(
+ b'debugdirstateignorepatternshash',
+ [],
+ _(b''),
+)
+def debugdirstateignorepatternshash(ui, repo, **opts):
+ """show the hash of ignore patterns stored in dirstate if v2,
+ or nothing for dirstate-v2
+ """
+ if repo.dirstate._use_dirstate_v2:
+ hash_offset = 16 # Four 32-bit integers before this field
+ hash_len = 20 # 160 bits for SHA-1
+ data_filename = repo.dirstate._map.docket.data_filename()
+ with repo.vfs(data_filename) as f:
+ hash = f.read(hash_offset + hash_len)[-hash_len:]
+ print(binascii.hexlify(hash).decode())
+
+
+ at command(
b'debugdiscovery',
[
(b'', b'old', None, _(b'use old-style discovery')),
To: SimonSapin, #hg-reviewers
Cc: mercurial-patches, mercurial-devel
More information about the Mercurial-devel
mailing list