[Updated] D11655: dirstate-v2: Add storage space for nanoseconds precision in file mtimes
SimonSapin
phabricator at mercurial-scm.org
Wed Oct 13 22:29:39 UTC 2021
Closed by commit rHG308d9c245337: dirstate-v2: Add storage space for nanoseconds precision in file mtimes (authored by SimonSapin).
This revision was automatically updated to reflect the committed changes.
REPOSITORY
rHG Mercurial
CHANGES SINCE LAST UPDATE
https://phab.mercurial-scm.org/D11655?vs=30770&id=30795
CHANGES SINCE LAST ACTION
https://phab.mercurial-scm.org/D11655/new/
REVISION DETAIL
https://phab.mercurial-scm.org/D11655
AFFECTED FILES
mercurial/helptext/internals/dirstate-v2.txt
rust/hg-core/src/dirstate_tree/on_disk.rs
CHANGE DETAILS
diff --git a/rust/hg-core/src/dirstate_tree/on_disk.rs b/rust/hg-core/src/dirstate_tree/on_disk.rs
--- a/rust/hg-core/src/dirstate_tree/on_disk.rs
+++ b/rust/hg-core/src/dirstate_tree/on_disk.rs
@@ -97,7 +97,8 @@
pub(super) descendants_with_entry_count: Size,
pub(super) tracked_descendants_count: Size,
flags: Flags,
- data: Entry,
+ size: U32Be,
+ mtime: PackedTruncatedTimestamp,
}
bitflags! {
@@ -110,23 +111,14 @@
const HAS_MODE_AND_SIZE = 1 << 3;
const HAS_MTIME = 1 << 4;
const MODE_EXEC_PERM = 1 << 5;
- const MODE_IS_SYMLINK = 1 << 7;
+ const MODE_IS_SYMLINK = 1 << 6;
}
}
-#[derive(BytesCast, Copy, Clone, Debug)]
-#[repr(C)]
-struct Entry {
- _padding: U32Be,
- size: U32Be,
- mtime: U32Be,
-}
-
/// Duration since the Unix epoch
#[derive(BytesCast, Copy, Clone)]
#[repr(C)]
-struct PackedTimestamp {
- _padding: U32Be,
+struct PackedTruncatedTimestamp {
truncated_seconds: U32Be,
nanoseconds: U32Be,
}
@@ -329,7 +321,7 @@
) -> Result<Option<TruncatedTimestamp>, DirstateV2ParseError> {
Ok(
if self.flags.contains(Flags::HAS_MTIME) && !self.has_entry() {
- Some(self.data.as_timestamp()?)
+ Some(self.mtime.try_into()?)
} else {
None
},
@@ -356,12 +348,12 @@
let p1_tracked = self.flags.contains(Flags::P1_TRACKED);
let p2_info = self.flags.contains(Flags::P2_INFO);
let mode_size = if self.flags.contains(Flags::HAS_MODE_AND_SIZE) {
- Some((self.synthesize_unix_mode(), self.data.size.into()))
+ Some((self.synthesize_unix_mode(), self.size.into()))
} else {
None
};
let mtime = if self.flags.contains(Flags::HAS_MTIME) {
- Some(self.data.mtime.into())
+ Some(self.mtime.truncated_seconds.into())
} else {
None
};
@@ -407,10 +399,10 @@
tracked_descendants_count: self.tracked_descendants_count.get(),
})
}
-}
-impl Entry {
- fn from_dirstate_entry(entry: &DirstateEntry) -> (Flags, Self) {
+ fn from_dirstate_entry(
+ entry: &DirstateEntry,
+ ) -> (Flags, U32Be, PackedTruncatedTimestamp) {
let (wdir_tracked, p1_tracked, p2_info, mode_size_opt, mtime_opt) =
entry.v2_data();
// TODO: convert throug raw flag bits instead?
@@ -418,53 +410,26 @@
flags.set(Flags::WDIR_TRACKED, wdir_tracked);
flags.set(Flags::P1_TRACKED, p1_tracked);
flags.set(Flags::P2_INFO, p2_info);
- let (size, mtime);
- if let Some((m, s)) = mode_size_opt {
+ let size = if let Some((m, s)) = mode_size_opt {
let exec_perm = m & libc::S_IXUSR != 0;
let is_symlink = m & libc::S_IFMT == libc::S_IFLNK;
flags.set(Flags::MODE_EXEC_PERM, exec_perm);
flags.set(Flags::MODE_IS_SYMLINK, is_symlink);
- size = s;
- flags.insert(Flags::HAS_MODE_AND_SIZE)
+ flags.insert(Flags::HAS_MODE_AND_SIZE);
+ s.into()
} else {
- size = 0;
- }
- if let Some(m) = mtime_opt {
- mtime = m;
- flags.insert(Flags::HAS_MTIME);
- } else {
- mtime = 0;
- }
- let raw_entry = Entry {
- _padding: 0.into(),
- size: size.into(),
- mtime: mtime.into(),
+ 0.into()
};
- (flags, raw_entry)
- }
-
- fn from_timestamp(timestamp: TruncatedTimestamp) -> Self {
- let packed = PackedTimestamp {
- _padding: 0.into(),
- truncated_seconds: timestamp.truncated_seconds().into(),
- nanoseconds: timestamp.nanoseconds().into(),
+ let mtime = if let Some(m) = mtime_opt {
+ flags.insert(Flags::HAS_MTIME);
+ PackedTruncatedTimestamp {
+ truncated_seconds: m.into(),
+ nanoseconds: 0.into(),
+ }
+ } else {
+ PackedTruncatedTimestamp::null()
};
- // Safety: both types implement the `ByteCast` trait, so we could
- // safely use `as_bytes` and `from_bytes` to do this conversion. Using
- // `transmute` instead makes the compiler check that the two types
- // have the same size, which eliminates the error case of
- // `from_bytes`.
- unsafe { std::mem::transmute::<PackedTimestamp, Entry>(packed) }
- }
-
- fn as_timestamp(self) -> Result<TruncatedTimestamp, DirstateV2ParseError> {
- // Safety: same as above in `from_timestamp`
- let packed =
- unsafe { std::mem::transmute::<Entry, PackedTimestamp>(self) };
- TruncatedTimestamp::from_already_truncated(
- packed.truncated_seconds.get(),
- packed.nanoseconds.get(),
- )
+ (flags, size, mtime)
}
}
@@ -610,20 +575,17 @@
};
on_disk_nodes.push(match node {
NodeRef::InMemory(path, node) => {
- let (flags, data) = match &node.data {
+ let (flags, size, mtime) = match &node.data {
dirstate_map::NodeData::Entry(entry) => {
- Entry::from_dirstate_entry(entry)
+ Node::from_dirstate_entry(entry)
}
dirstate_map::NodeData::CachedDirectory { mtime } => {
- (Flags::HAS_MTIME, Entry::from_timestamp(*mtime))
+ (Flags::HAS_MTIME, 0.into(), (*mtime).into())
}
dirstate_map::NodeData::None => (
Flags::empty(),
- Entry {
- _padding: 0.into(),
- size: 0.into(),
- mtime: 0.into(),
- },
+ 0.into(),
+ PackedTruncatedTimestamp::null(),
),
};
Node {
@@ -641,7 +603,8 @@
.tracked_descendants_count
.into(),
flags,
- data,
+ size,
+ mtime,
}
}
NodeRef::OnDisk(node) => Node {
@@ -725,3 +688,33 @@
.expect("dirstate-v2 path length overflow")
.into()
}
+
+impl From<TruncatedTimestamp> for PackedTruncatedTimestamp {
+ fn from(timestamp: TruncatedTimestamp) -> Self {
+ Self {
+ truncated_seconds: timestamp.truncated_seconds().into(),
+ nanoseconds: timestamp.nanoseconds().into(),
+ }
+ }
+}
+
+impl TryFrom<PackedTruncatedTimestamp> for TruncatedTimestamp {
+ type Error = DirstateV2ParseError;
+
+ fn try_from(
+ timestamp: PackedTruncatedTimestamp,
+ ) -> Result<Self, Self::Error> {
+ Self::from_already_truncated(
+ timestamp.truncated_seconds.get(),
+ timestamp.nanoseconds.get(),
+ )
+ }
+}
+impl PackedTruncatedTimestamp {
+ fn null() -> Self {
+ Self {
+ truncated_seconds: 0.into(),
+ nanoseconds: 0.into(),
+ }
+ }
+}
diff --git a/mercurial/helptext/internals/dirstate-v2.txt b/mercurial/helptext/internals/dirstate-v2.txt
--- a/mercurial/helptext/internals/dirstate-v2.txt
+++ b/mercurial/helptext/internals/dirstate-v2.txt
@@ -372,7 +372,7 @@
This counter is used to implement `has_tracked_dir`.
* Offset 30:
- Some boolean values packed as bits of a single byte.
+ A single `flags` byte that packs some boolean values as bits.
Starting from least-significant, bit masks are::
WDIR_TRACKED = 1 << 0
@@ -381,110 +381,116 @@
HAS_MODE_AND_SIZE = 1 << 3
HAS_MTIME = 1 << 4
MODE_EXEC_PERM = 1 << 5
- MODE_IS_SYMLINK = 1 << 7
-
-
- Other bits are unset. The meaning of these bits are:
-
- `WDIR_TRACKED`
- Set if the working directory contains a tracked file at this node’s path.
- This is typically set and unset by `hg add` and `hg rm`.
-
- `P1_TRACKED`
- set if the working directory’s first parent changeset
- (whose node identifier is found in tree metadata)
- contains a tracked file at this node’s path.
- This is a cache to reduce manifest lookups.
-
- `P2_INFO`
- Set if the file has been involved in some merge operation.
- Either because it was actually merged,
- or because the version in the second parent p2 version was ahead,
- or because some rename moved it there.
- In either case `hg status` will want it displayed as modified.
+ MODE_IS_SYMLINK = 1 << 6
- Files that would be mentioned at all in the `dirstate-v1` file format
- have a node with at least one of the above three bits set in `dirstate-v2`.
- Let’s call these files "tracked anywhere",
- and "untracked" the nodes with all three of these bits unset.
- Untracked nodes are typically for directories:
- they hold child nodes and form the tree structure.
- Additional untracked nodes may also exist.
- Although implementations should strive to clean up nodes
- that are entirely unused, other untracked nodes may also exist.
- For example, a future version of Mercurial might in some cases
- add nodes for untracked files or/and ignored files in the working directory
- in order to optimize `hg status`
- by enabling it to skip `readdir` in more cases.
+ The meaning of each bit is described below.
+ Other bits are unset.
- When a node is for a file tracked anywhere:
- - If `HAS_MODE_AND_SIZE` is set, the file is expected
- to be a symbolic link or a normal file based on `MODE_IS_SYMLINK`.
- - If `HAS_MODE_AND_SIZE` is set, the file’s owner is expected
- to have execute permission or not based on `MODE_EXEC_PERM`.
- - If `HAS_MODE_AND_SIZE` is unset,
- the expected type of file and permission are unknown.
- The rest of the node data is three fields:
-
- * Offset 31:
- 4 unused bytes, set to zero
+* Offset 31:
+ A `size` field described below, as a 32-bit integer.
+ Unlike in dirstate-v1, negative values are not used.
- * Offset 35:
- If `HAS_MODE_AND_SIZE` is unset, four zero bytes.
- Otherwise, a 32-bit integer for expected size of the file
- truncated to its 31 least-significant bits.
- Unlike in dirstate-v1, negative values are not used.
-
- * Offset 39:
- If `HAS_MTIME` is unset, four zero bytes.
- Otherwise, a 32-bit integer for expected modified time of the file
- (as in `stat_result.st_mtime`),
- truncated to its 31 least-significant bits.
- Unlike in dirstate-v1, negative values are not used.
-
- If an untracked node `HAS_MTIME` *unset*, this space is unused:
-
- * Offset 31:
- 12 unused bytes, set to zero
-
- If an untracked node `HAS_MTIME` *set*,
- what follows is the modification time of a directory
- represented similarly to the C `timespec` struct:
-
- * Offset 31:
- 4 unused bytes, set to zero
+* Offset 35:
+ The seconds component of an `mtime` field described below,
+ as a 32-bit integer.
+ Unlike in dirstate-v1, negative values are not used.
- * Offset 35:
- The number of seconds elapsed since the Unix epoch,
- truncated to its lower 31 bits,
- as a 32-bit integer.
-
- * Offset 39:
- The sub-second number of nanoseconds elapsed since the Unix epoch,
- as 32-bit integer.
- Always greater than or equal to zero, and strictly less than a billion.
-
- The presence of a directory modification time means that at some point,
- this path in the working directory was observed:
-
- - To be a directory
- - With the given modification time
- - That time was already strictly in the past when observed,
- meaning that later changes cannot happen in the same clock tick
- and must cause a different modification time
- (unless the system clock jumps back and we get unlucky,
- which is not impossible but deemed unlikely enough).
- - All direct children of this directory
- (as returned by `std::fs::read_dir`)
- either have a corresponding dirstate node,
- or are ignored by ignore patterns whose hash is in tree metadata.
-
- This means that if `std::fs::symlink_metadata` later reports
- the same modification time
- and ignored patterns haven’t changed,
- a run of status that is not listing ignored files
- can skip calling `std::fs::read_dir` again for this directory,
- and iterate child dirstate nodes instead.
-
+* Offset 39:
+ The nanoseconds component of an `mtime` field described below,
+ as a 32-bit integer.
* (Offset 43: end of this node)
+
+The meaning of the boolean values packed in `flags` is:
+
+`WDIR_TRACKED`
+ Set if the working directory contains a tracked file at this node’s path.
+ This is typically set and unset by `hg add` and `hg rm`.
+
+`P1_TRACKED`
+ Set if the working directory’s first parent changeset
+ (whose node identifier is found in tree metadata)
+ contains a tracked file at this node’s path.
+ This is a cache to reduce manifest lookups.
+
+`P2_INFO`
+ Set if the file has been involved in some merge operation.
+ Either because it was actually merged,
+ or because the version in the second parent p2 version was ahead,
+ or because some rename moved it there.
+ In either case `hg status` will want it displayed as modified.
+
+Files that would be mentioned at all in the `dirstate-v1` file format
+have a node with at least one of the above three bits set in `dirstate-v2`.
+Let’s call these files "tracked anywhere",
+and "untracked" the nodes with all three of these bits unset.
+Untracked nodes are typically for directories:
+they hold child nodes and form the tree structure.
+Additional untracked nodes may also exist.
+Although implementations should strive to clean up nodes
+that are entirely unused, other untracked nodes may also exist.
+For example, a future version of Mercurial might in some cases
+add nodes for untracked files or/and ignored files in the working directory
+in order to optimize `hg status`
+by enabling it to skip `readdir` in more cases.
+
+`HAS_MODE_AND_SIZE`
+ Must be unset for untracked nodes.
+ For files tracked anywhere, if this is set:
+ - The `size` field is the expected file size,
+ in bytes truncated its lower to 31 bits,
+ for the file to be clean.
+ - The expected execute permission for the file’s owner
+ is given by `MODE_EXEC_PERM`
+ - The expected file type is given by `MODE_IS_SIMLINK`:
+ a symbolic link if set, or a normal file if unset.
+ If this is unset the expected size, permission, and file type are unknown.
+ The `size` field is unused (set to zero).
+
+`HAS_MTIME`
+ If unset, the `mtime` field is unused (set to zero).
+ If set, it contains a timestamp represented as
+ - the number of seconds since the Unix epoch,
+ truncated to its lower 31 bits.
+ - and the number of nanoseconds since `mtime.seconds`,
+ always stritctly less than one billion.
+ This may be zero if more precision is not available.
+ (This can happen because of limitations in any of Mercurial, Python,
+ libc, the operating system, …)
+
+ If set for a file tracked anywhere,
+ `mtime` is the expected modification time for the file to be clean.
+
+ If set for an untracked node, at some point,
+ this path in the working directory was observed:
+
+ - To be a directory
+ - With the modification time given in `mtime`
+ - That time was already strictly in the past when observed,
+ meaning that later changes cannot happen in the same clock tick
+ and must cause a different modification time
+ (unless the system clock jumps back and we get unlucky,
+ which is not impossible but deemed unlikely enough).
+ - All direct children of this directory
+ (as returned by `std::fs::read_dir`)
+ either have a corresponding dirstate node,
+ or are ignored by ignore patterns whose hash is in tree metadata.
+
+ This means that if `std::fs::symlink_metadata` later reports
+ the same modification time
+ and ignored patterns haven’t changed,
+ a run of status that is not listing ignored files
+ can skip calling `std::fs::read_dir` again for this directory,
+ and iterate child dirstate nodes instead.
+
+`MODE_EXEC_PERM`
+ Must be unset if `HAS_MODE_AND_SIZE` is unset.
+ If `HAS_MODE_AND_SIZE` is set,
+ this indicates whether the file’s own is expected
+ to have execute permission.
+
+`MODE_IS_SYMLINK`
+ Must be unset if `HAS_MODE_AND_SIZE` is unset.
+ If `HAS_MODE_AND_SIZE` is set,
+ this indicates whether the file is expected to be a symlink
+ as opposed to a normal file.
To: SimonSapin, #hg-reviewers, pulkit
Cc: mercurial-patches
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.mercurial-scm.org/pipermail/mercurial-patches/attachments/20211013/d404e346/attachment-0002.html>
More information about the Mercurial-patches
mailing list