[Request] [++-- ] D10825: dirstate-v2: Allow tree nodes without an entry to store a timestamp
SimonSapin
phabricator at mercurial-scm.org
Tue Jun 1 16:52:32 UTC 2021
SimonSapin created this revision.
Herald added a reviewer: hg-reviewers.
Herald added a subscriber: mercurial-patches.
REVISION SUMMARY
Timestamps are stored on 96 bits:
- 64 bits for the signed number of seconds since the Unix epoch
- 32 bits for the nanoseconds in the `0 <= ns < 1_000_000_000` range
For now timestamps are not used or set yet.
REPOSITORY
rHG Mercurial
BRANCH
default
REVISION DETAIL
https://phab.mercurial-scm.org/D10825
AFFECTED FILES
rust/hg-core/src/dirstate_tree/dirstate_map.rs
rust/hg-core/src/dirstate_tree/on_disk.rs
CHANGE DETAILS
diff --git a/rust/hg-core/src/dirstate_tree/on_disk.rs b/rust/hg-core/src/dirstate_tree/on_disk.rs
--- a/rust/hg-core/src/dirstate_tree/on_disk.rs
+++ b/rust/hg-core/src/dirstate_tree/on_disk.rs
@@ -17,10 +17,11 @@
use crate::DirstateError;
use crate::DirstateParents;
use crate::EntryState;
-use bytes_cast::unaligned::{I32Be, U32Be, U64Be};
+use bytes_cast::unaligned::{I32Be, I64Be, U32Be, U64Be};
use bytes_cast::BytesCast;
use std::borrow::Cow;
-use std::convert::{TryFrom, TryInto};
+use std::convert::TryFrom;
+use std::time::{Duration, SystemTime, UNIX_EPOCH};
/// Added at the start of `.hg/dirstate` when the "v2" format is used.
/// This a redundant sanity check more than an actual "magic number" since
@@ -50,22 +51,43 @@
base_name_start: Size,
copy_source: OptPathSlice,
- entry: OptEntry,
children: ChildNodes,
pub(super) tracked_descendants_count: Size,
+
+ /// Dependending on the value of `state`:
+ ///
+ /// * A null byte: `data` represents nothing
+ /// * A `n`, `a`, `r`, or `m` ASCII byte: `state` and `data` together
+ /// represents a dirstate entry like in the v1 format.
+ /// * A `d` ASCII byte: the bytes of `data` should instead be interpreted
+ /// as the `Timestamp` for the mtime of a cached directory.
+ ///
+ /// TODO: document directory caching
+ state: u8,
+ data: Entry,
}
-/// Either nothing if `state == b'\0'`, or a dirstate entry like in the v1
-/// format
#[derive(BytesCast, Copy, Clone)]
#[repr(C)]
-struct OptEntry {
- state: u8,
+struct Entry {
mode: I32Be,
mtime: I32Be,
size: I32Be,
}
+/// Duration since the Unix epoch
+#[derive(BytesCast, Copy, Clone)]
+#[repr(C)]
+pub(super) struct Timestamp {
+ seconds: I64Be,
+
+ /// In `0 .. 1_000_000_000`.
+ ///
+ /// This timestamp is later or earlier than `(seconds, 0)` by this many
+ /// nanoseconds, if `seconds` is non-negative or negative, respectively.
+ nanoseconds: U32Be,
+}
+
/// Counted in bytes from the start of the file
///
/// NOTE: If we decide to never support `.hg/dirstate` files larger than 4 GiB
@@ -216,34 +238,54 @@
})
}
- pub(super) fn has_entry(&self) -> bool {
- self.entry.state != b'\0'
+ pub(super) fn node_data(
+ &self,
+ ) -> Result<dirstate_map::NodeData, DirstateV2ParseError> {
+ let entry = |state| {
+ dirstate_map::NodeData::Entry(self.entry_with_given_state(state))
+ };
+
+ match self.state {
+ b'\0' => Ok(dirstate_map::NodeData::None),
+ b'd' => Ok(dirstate_map::NodeData::CachedDirectory {
+ mtime: *self.data.as_timestamp(),
+ }),
+ b'n' => Ok(entry(EntryState::Normal)),
+ b'a' => Ok(entry(EntryState::Added)),
+ b'r' => Ok(entry(EntryState::Removed)),
+ b'm' => Ok(entry(EntryState::Merged)),
+ _ => Err(DirstateV2ParseError),
+ }
}
pub(super) fn state(
&self,
) -> Result<Option<EntryState>, DirstateV2ParseError> {
- Ok(if self.has_entry() {
- Some(
- self.entry
- .state
- .try_into()
- .map_err(|_| DirstateV2ParseError)?,
- )
- } else {
- None
- })
+ match self.state {
+ b'\0' | b'd' => Ok(None),
+ b'n' => Ok(Some(EntryState::Normal)),
+ b'a' => Ok(Some(EntryState::Added)),
+ b'r' => Ok(Some(EntryState::Removed)),
+ b'm' => Ok(Some(EntryState::Merged)),
+ _ => Err(DirstateV2ParseError),
+ }
+ }
+
+ fn entry_with_given_state(&self, state: EntryState) -> DirstateEntry {
+ DirstateEntry {
+ state,
+ mode: self.data.mode.get(),
+ mtime: self.data.mtime.get(),
+ size: self.data.size.get(),
+ }
}
pub(super) fn entry(
&self,
) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
- Ok(self.state()?.map(|state| DirstateEntry {
- state,
- mode: self.entry.mode.get(),
- mtime: self.entry.mtime.get(),
- size: self.entry.size.get(),
- }))
+ Ok(self
+ .state()?
+ .map(|state| self.entry_with_given_state(state)))
}
pub(super) fn children<'on_disk>(
@@ -262,12 +304,58 @@
self.children(on_disk)?,
),
copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed),
- entry: self.entry()?,
+ data: self.node_data()?,
tracked_descendants_count: self.tracked_descendants_count.get(),
})
}
}
+impl Entry {
+ fn from_timestamp(timestamp: Timestamp) -> Self {
+ // Safety: both types implement the `ByteCast` trait, so we could
+ // safely use `as_bytes` and `from_bytes` to do this conversion. Using
+ // `transmute` instead makes the compiler check that the two types
+ // have the same size, which eliminates the error case of
+ // `from_bytes`.
+ unsafe { std::mem::transmute::<Timestamp, Entry>(timestamp) }
+ }
+
+ fn as_timestamp(&self) -> &Timestamp {
+ // Safety: same as above in `from_timestamp`
+ unsafe { &*(self as *const Entry as *const Timestamp) }
+ }
+}
+
+impl From<&'_ SystemTime> for Timestamp {
+ fn from(system_time: &'_ SystemTime) -> Self {
+ let (secs, nanos) = match system_time.duration_since(UNIX_EPOCH) {
+ Ok(duration) => {
+ (duration.as_secs() as i64, duration.subsec_nanos())
+ }
+ Err(error) => {
+ let negative = error.duration();
+ (-(negative.as_secs() as i64), negative.subsec_nanos())
+ }
+ };
+ Timestamp {
+ seconds: secs.into(),
+ nanoseconds: nanos.into(),
+ }
+ }
+}
+
+impl From<&'_ Timestamp> for SystemTime {
+ fn from(timestamp: &'_ Timestamp) -> Self {
+ let secs = timestamp.seconds.get();
+ let nanos = timestamp.nanoseconds.get();
+ if secs >= 0 {
+ UNIX_EPOCH + Duration::new(secs as u64, nanos)
+ } else {
+ UNIX_EPOCH - Duration::new((-secs) as u64, nanos)
+ }
+ }
+}
+
fn read_hg_path(
on_disk: &[u8],
slice: Slice,
@@ -356,33 +444,43 @@
}
};
on_disk_nodes.push(match node {
- NodeRef::InMemory(path, node) => Node {
- children,
- copy_source,
- full_path,
- base_name_start: u32::try_from(path.base_name_start())
- // Could only panic for paths over 4 GiB
- .expect("dirstate-v2 offset overflow")
- .into(),
- tracked_descendants_count: node
- .tracked_descendants_count
- .into(),
- entry: if let Some(entry) = &node.entry {
- OptEntry {
- state: entry.state.into(),
- mode: entry.mode.into(),
- mtime: entry.mtime.into(),
- size: entry.size.into(),
+ NodeRef::InMemory(path, node) => {
+ let (state, data) = match &node.data {
+ dirstate_map::NodeData::Entry(entry) => (
+ entry.state.into(),
+ Entry {
+ mode: entry.mode.into(),
+ mtime: entry.mtime.into(),
+ size: entry.size.into(),
+ },
+ ),
+ dirstate_map::NodeData::CachedDirectory { mtime } => {
+ (b'd', Entry::from_timestamp(*mtime))
}
- } else {
- OptEntry {
- state: b'\0',
- mode: 0.into(),
- mtime: 0.into(),
- size: 0.into(),
- }
- },
- },
+ dirstate_map::NodeData::None => (
+ b'\0',
+ Entry {
+ mode: 0.into(),
+ mtime: 0.into(),
+ size: 0.into(),
+ },
+ ),
+ };
+ Node {
+ children,
+ copy_source,
+ full_path,
+ base_name_start: u32::try_from(path.base_name_start())
+ // Could only panic for paths over 4 GiB
+ .expect("dirstate-v2 offset overflow")
+ .into(),
+ tracked_descendants_count: node
+ .tracked_descendants_count
+ .into(),
+ state,
+ data,
+ }
+ }
NodeRef::OnDisk(node) => Node {
children,
copy_source,
diff --git a/rust/hg-core/src/dirstate_tree/dirstate_map.rs b/rust/hg-core/src/dirstate_tree/dirstate_map.rs
--- a/rust/hg-core/src/dirstate_tree/dirstate_map.rs
+++ b/rust/hg-core/src/dirstate_tree/dirstate_map.rs
@@ -295,18 +295,13 @@
}
}
- pub(super) fn has_entry(&self) -> bool {
- match self {
- NodeRef::InMemory(_path, node) => node.entry.is_some(),
- NodeRef::OnDisk(node) => node.has_entry(),
- }
- }
-
pub(super) fn entry(
&self,
) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
match self {
- NodeRef::InMemory(_path, node) => Ok(node.entry),
+ NodeRef::InMemory(_path, node) => {
+ Ok(node.data.as_entry().copied())
+ }
NodeRef::OnDisk(node) => node.entry(),
}
}
@@ -316,7 +311,7 @@
) -> Result<Option<EntryState>, DirstateV2ParseError> {
match self {
NodeRef::InMemory(_path, node) => {
- Ok(node.entry.as_ref().map(|entry| entry.state))
+ Ok(node.data.as_entry().map(|entry| entry.state))
}
NodeRef::OnDisk(node) => node.state(),
}
@@ -333,8 +328,7 @@
/// Represents a file or a directory
#[derive(Default)]
pub(super) struct Node<'on_disk> {
- /// `None` for directories
- pub(super) entry: Option<DirstateEntry>,
+ pub(super) data: NodeData,
pub(super) copy_source: Option<Cow<'on_disk, HgPath>>,
@@ -344,6 +338,34 @@
pub(super) tracked_descendants_count: u32,
}
+pub(super) enum NodeData {
+ Entry(DirstateEntry),
+ CachedDirectory { mtime: on_disk::Timestamp },
+ None,
+}
+
+impl Default for NodeData {
+ fn default() -> Self {
+ NodeData::None
+ }
+}
+
+impl NodeData {
+ fn has_entry(&self) -> bool {
+ match self {
+ NodeData::Entry(_) => true,
+ _ => false,
+ }
+ }
+
+ fn as_entry(&self) -> Option<&DirstateEntry> {
+ match self {
+ NodeData::Entry(entry) => Some(entry),
+ _ => None,
+ }
+ }
+}
+
impl<'on_disk> DirstateMap<'on_disk> {
pub(super) fn empty(on_disk: &'on_disk [u8]) -> Self {
Self {
@@ -386,14 +408,14 @@
},
)?;
assert!(
- node.entry.is_none(),
+ !node.data.has_entry(),
"duplicate dirstate entry in read"
);
assert!(
node.copy_source.is_none(),
"duplicate dirstate entry in read"
);
- node.entry = Some(*entry);
+ node.data = NodeData::Entry(*entry);
node.copy_source = copy_source.map(Cow::Borrowed);
map.nodes_with_entry_count += 1;
if copy_source.is_some() {
@@ -519,10 +541,10 @@
}
},
)?;
- if node.entry.is_none() {
+ if !node.data.has_entry() {
self.nodes_with_entry_count += 1
}
- node.entry = Some(new_entry);
+ node.data = NodeData::Entry(new_entry);
Ok(())
}
@@ -587,7 +609,7 @@
&mut self.root,
path.as_ref(),
)? {
- if let Some(entry) = node.entry.as_mut() {
+ if let NodeData::Entry(entry) = &mut node.data {
entry.clear_mtime();
}
}
@@ -703,18 +725,22 @@
return Ok(None);
}
} else {
+ let had_entry = node.data.has_entry();
+ if had_entry {
+ node.data = NodeData::None
+ }
dropped = Dropped {
was_tracked: node
- .entry
- .as_ref()
+ .data
+ .as_entry()
.map_or(false, |entry| entry.state.is_tracked()),
- had_entry: node.entry.take().is_some(),
+ had_entry,
had_copy_source: node.copy_source.take().is_some(),
};
}
// After recursion, for both leaf (rest_of_path is None) nodes and
// parent nodes, remove a node if it just became empty.
- if node.entry.is_none()
+ if !node.data.has_entry()
&& node.copy_source.is_none()
&& node.children.is_empty()
{
@@ -746,7 +772,7 @@
if let Some(node) =
Self::get_node_mut(self.on_disk, &mut self.root, &filename)?
{
- if let Some(entry) = node.entry.as_mut() {
+ if let NodeData::Entry(entry) = &mut node.data {
entry.clear_ambiguous_mtime(now);
}
}
@@ -815,7 +841,8 @@
if let Some(node) = self.get_node(directory)? {
// A node without a `DirstateEntry` was created to hold child
// nodes, and is therefore a directory.
- Ok(!node.has_entry() && node.tracked_descendants_count() > 0)
+ let state = node.state()?;
+ Ok(state.is_none() && node.tracked_descendants_count() > 0)
} else {
Ok(false)
}
@@ -825,7 +852,7 @@
if let Some(node) = self.get_node(directory)? {
// A node without a `DirstateEntry` was created to hold child
// nodes, and is therefore a directory.
- Ok(!node.has_entry())
+ Ok(node.state()?.is_none())
} else {
Ok(false)
}
To: SimonSapin, #hg-reviewers
Cc: mercurial-patches, mercurial-devel
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mercurial-scm.org/pipermail/mercurial-patches/attachments/20210601/57aa6a53/attachment-0001.html>
More information about the Mercurial-patches
mailing list