[Updated] D11655: dirstate-v2: Add storage space for nanoseconds precision in file mtimes

SimonSapin phabricator at mercurial-scm.org
Wed Oct 13 22:29:39 UTC 2021


Closed by commit rHG308d9c245337: dirstate-v2: Add storage space for nanoseconds precision in file mtimes (authored by SimonSapin).
This revision was automatically updated to reflect the committed changes.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D11655?vs=30770&id=30795

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D11655/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D11655

AFFECTED FILES
  mercurial/helptext/internals/dirstate-v2.txt
  rust/hg-core/src/dirstate_tree/on_disk.rs

CHANGE DETAILS

diff --git a/rust/hg-core/src/dirstate_tree/on_disk.rs b/rust/hg-core/src/dirstate_tree/on_disk.rs
--- a/rust/hg-core/src/dirstate_tree/on_disk.rs
+++ b/rust/hg-core/src/dirstate_tree/on_disk.rs
@@ -97,7 +97,8 @@
     pub(super) descendants_with_entry_count: Size,
     pub(super) tracked_descendants_count: Size,
     flags: Flags,
-    data: Entry,
+    size: U32Be,
+    mtime: PackedTruncatedTimestamp,
 }
 
 bitflags! {
@@ -110,23 +111,14 @@
         const HAS_MODE_AND_SIZE = 1 << 3;
         const HAS_MTIME = 1 << 4;
         const MODE_EXEC_PERM = 1 << 5;
-        const MODE_IS_SYMLINK = 1 << 7;
+        const MODE_IS_SYMLINK = 1 << 6;
     }
 }
 
-#[derive(BytesCast, Copy, Clone, Debug)]
-#[repr(C)]
-struct Entry {
-    _padding: U32Be,
-    size: U32Be,
-    mtime: U32Be,
-}
-
 /// Duration since the Unix epoch
 #[derive(BytesCast, Copy, Clone)]
 #[repr(C)]
-struct PackedTimestamp {
-    _padding: U32Be,
+struct PackedTruncatedTimestamp {
     truncated_seconds: U32Be,
     nanoseconds: U32Be,
 }
@@ -329,7 +321,7 @@
     ) -> Result<Option<TruncatedTimestamp>, DirstateV2ParseError> {
         Ok(
             if self.flags.contains(Flags::HAS_MTIME) && !self.has_entry() {
-                Some(self.data.as_timestamp()?)
+                Some(self.mtime.try_into()?)
             } else {
                 None
             },
@@ -356,12 +348,12 @@
         let p1_tracked = self.flags.contains(Flags::P1_TRACKED);
         let p2_info = self.flags.contains(Flags::P2_INFO);
         let mode_size = if self.flags.contains(Flags::HAS_MODE_AND_SIZE) {
-            Some((self.synthesize_unix_mode(), self.data.size.into()))
+            Some((self.synthesize_unix_mode(), self.size.into()))
         } else {
             None
         };
         let mtime = if self.flags.contains(Flags::HAS_MTIME) {
-            Some(self.data.mtime.into())
+            Some(self.mtime.truncated_seconds.into())
         } else {
             None
         };
@@ -407,10 +399,10 @@
             tracked_descendants_count: self.tracked_descendants_count.get(),
         })
     }
-}
 
-impl Entry {
-    fn from_dirstate_entry(entry: &DirstateEntry) -> (Flags, Self) {
+    fn from_dirstate_entry(
+        entry: &DirstateEntry,
+    ) -> (Flags, U32Be, PackedTruncatedTimestamp) {
         let (wdir_tracked, p1_tracked, p2_info, mode_size_opt, mtime_opt) =
             entry.v2_data();
         // TODO: convert throug raw flag bits instead?
@@ -418,53 +410,26 @@
         flags.set(Flags::WDIR_TRACKED, wdir_tracked);
         flags.set(Flags::P1_TRACKED, p1_tracked);
         flags.set(Flags::P2_INFO, p2_info);
-        let (size, mtime);
-        if let Some((m, s)) = mode_size_opt {
+        let size = if let Some((m, s)) = mode_size_opt {
             let exec_perm = m & libc::S_IXUSR != 0;
             let is_symlink = m & libc::S_IFMT == libc::S_IFLNK;
             flags.set(Flags::MODE_EXEC_PERM, exec_perm);
             flags.set(Flags::MODE_IS_SYMLINK, is_symlink);
-            size = s;
-            flags.insert(Flags::HAS_MODE_AND_SIZE)
+            flags.insert(Flags::HAS_MODE_AND_SIZE);
+            s.into()
         } else {
-            size = 0;
-        }
-        if let Some(m) = mtime_opt {
-            mtime = m;
-            flags.insert(Flags::HAS_MTIME);
-        } else {
-            mtime = 0;
-        }
-        let raw_entry = Entry {
-            _padding: 0.into(),
-            size: size.into(),
-            mtime: mtime.into(),
+            0.into()
         };
-        (flags, raw_entry)
-    }
-
-    fn from_timestamp(timestamp: TruncatedTimestamp) -> Self {
-        let packed = PackedTimestamp {
-            _padding: 0.into(),
-            truncated_seconds: timestamp.truncated_seconds().into(),
-            nanoseconds: timestamp.nanoseconds().into(),
+        let mtime = if let Some(m) = mtime_opt {
+            flags.insert(Flags::HAS_MTIME);
+            PackedTruncatedTimestamp {
+                truncated_seconds: m.into(),
+                nanoseconds: 0.into(),
+            }
+        } else {
+            PackedTruncatedTimestamp::null()
         };
-        // Safety: both types implement the `ByteCast` trait, so we could
-        // safely use `as_bytes` and `from_bytes` to do this conversion. Using
-        // `transmute` instead makes the compiler check that the two types
-        // have the same size, which eliminates the error case of
-        // `from_bytes`.
-        unsafe { std::mem::transmute::<PackedTimestamp, Entry>(packed) }
-    }
-
-    fn as_timestamp(self) -> Result<TruncatedTimestamp, DirstateV2ParseError> {
-        // Safety: same as above in `from_timestamp`
-        let packed =
-            unsafe { std::mem::transmute::<Entry, PackedTimestamp>(self) };
-        TruncatedTimestamp::from_already_truncated(
-            packed.truncated_seconds.get(),
-            packed.nanoseconds.get(),
-        )
+        (flags, size, mtime)
     }
 }
 
@@ -610,20 +575,17 @@
             };
             on_disk_nodes.push(match node {
                 NodeRef::InMemory(path, node) => {
-                    let (flags, data) = match &node.data {
+                    let (flags, size, mtime) = match &node.data {
                         dirstate_map::NodeData::Entry(entry) => {
-                            Entry::from_dirstate_entry(entry)
+                            Node::from_dirstate_entry(entry)
                         }
                         dirstate_map::NodeData::CachedDirectory { mtime } => {
-                            (Flags::HAS_MTIME, Entry::from_timestamp(*mtime))
+                            (Flags::HAS_MTIME, 0.into(), (*mtime).into())
                         }
                         dirstate_map::NodeData::None => (
                             Flags::empty(),
-                            Entry {
-                                _padding: 0.into(),
-                                size: 0.into(),
-                                mtime: 0.into(),
-                            },
+                            0.into(),
+                            PackedTruncatedTimestamp::null(),
                         ),
                     };
                     Node {
@@ -641,7 +603,8 @@
                             .tracked_descendants_count
                             .into(),
                         flags,
-                        data,
+                        size,
+                        mtime,
                     }
                 }
                 NodeRef::OnDisk(node) => Node {
@@ -725,3 +688,33 @@
         .expect("dirstate-v2 path length overflow")
         .into()
 }
+
+impl From<TruncatedTimestamp> for PackedTruncatedTimestamp {
+    fn from(timestamp: TruncatedTimestamp) -> Self {
+        Self {
+            truncated_seconds: timestamp.truncated_seconds().into(),
+            nanoseconds: timestamp.nanoseconds().into(),
+        }
+    }
+}
+
+impl TryFrom<PackedTruncatedTimestamp> for TruncatedTimestamp {
+    type Error = DirstateV2ParseError;
+
+    fn try_from(
+        timestamp: PackedTruncatedTimestamp,
+    ) -> Result<Self, Self::Error> {
+        Self::from_already_truncated(
+            timestamp.truncated_seconds.get(),
+            timestamp.nanoseconds.get(),
+        )
+    }
+}
+impl PackedTruncatedTimestamp {
+    fn null() -> Self {
+        Self {
+            truncated_seconds: 0.into(),
+            nanoseconds: 0.into(),
+        }
+    }
+}
diff --git a/mercurial/helptext/internals/dirstate-v2.txt b/mercurial/helptext/internals/dirstate-v2.txt
--- a/mercurial/helptext/internals/dirstate-v2.txt
+++ b/mercurial/helptext/internals/dirstate-v2.txt
@@ -372,7 +372,7 @@
   This counter is used to implement `has_tracked_dir`.
 
 * Offset 30:
-  Some boolean values packed as bits of a single byte.
+  A single `flags` byte that packs some boolean values as bits.
   Starting from least-significant, bit masks are::
 
     WDIR_TRACKED = 1 << 0
@@ -381,110 +381,116 @@
     HAS_MODE_AND_SIZE = 1 << 3
     HAS_MTIME = 1 << 4
     MODE_EXEC_PERM = 1 << 5
-    MODE_IS_SYMLINK = 1 << 7
-
-
-  Other bits are unset. The meaning of these bits are:
-
-  `WDIR_TRACKED`
-      Set if the working directory contains a tracked file at this node’s path.
-      This is typically set and unset by `hg add` and `hg rm`.
-
-  `P1_TRACKED`
-      set if the working directory’s first parent changeset
-      (whose node identifier is found in tree metadata)
-      contains a tracked file at this node’s path.
-      This is a cache to reduce manifest lookups.
-
-  `P2_INFO`
-      Set if the file has been involved in some merge operation.
-      Either because it was actually merged,
-      or because the version in the second parent p2 version was ahead,
-      or because some rename moved it there.
-      In either case `hg status` will want it displayed as modified.
+    MODE_IS_SYMLINK = 1 << 6
 
-  Files that would be mentioned at all in the `dirstate-v1` file format
-  have a node with at least one of the above three bits set in `dirstate-v2`.
-  Let’s call these files "tracked anywhere",
-  and "untracked" the nodes with all three of these bits unset.
-  Untracked nodes are typically for directories:
-  they hold child nodes and form the tree structure.
-  Additional untracked nodes may also exist.
-  Although implementations should strive to clean up nodes
-  that are entirely unused, other untracked nodes may also exist.
-  For example, a future version of Mercurial might in some cases
-  add nodes for untracked files or/and ignored files in the working directory
-  in order to optimize `hg status`
-  by enabling it to skip `readdir` in more cases.
+  The meaning of each bit is described below.
+  Other bits are unset.
 
-  When a node is for a file tracked anywhere:
-  - If `HAS_MODE_AND_SIZE` is set, the file is expected
-    to be a symbolic link or a normal file based on `MODE_IS_SYMLINK`.
-  - If `HAS_MODE_AND_SIZE` is set, the file’s owner is expected
-    to have execute permission or not based on `MODE_EXEC_PERM`.
-  - If `HAS_MODE_AND_SIZE` is unset,
-    the expected type of file and permission are unknown.
-  The rest of the node data is three fields:
-
-  * Offset 31:
-    4 unused bytes, set to zero
+* Offset 31:
+  A `size` field described below, as a 32-bit integer.
+  Unlike in dirstate-v1, negative values are not used.
 
-  * Offset 35:
-    If `HAS_MODE_AND_SIZE` is unset, four zero bytes.
-    Otherwise, a 32-bit integer for expected size of the file
-    truncated to its 31 least-significant bits.
-    Unlike in dirstate-v1, negative values are not used.
-
-  * Offset 39:
-    If `HAS_MTIME` is unset, four zero bytes.
-    Otherwise, a 32-bit integer for expected modified time of the file
-    (as in `stat_result.st_mtime`),
-    truncated to its 31 least-significant bits.
-    Unlike in dirstate-v1, negative values are not used.
-
-  If an untracked node `HAS_MTIME` *unset*, this space is unused:
-
-  * Offset 31:
-    12 unused bytes, set to zero
-
-  If an untracked node `HAS_MTIME` *set*,
-  what follows is the modification time of a directory
-  represented similarly to the C `timespec` struct:
-
-  * Offset 31:
-    4 unused bytes, set to zero
+* Offset 35:
+  The seconds component of an `mtime` field described below,
+  as a 32-bit integer.
+  Unlike in dirstate-v1, negative values are not used.
 
-  * Offset 35:
-    The number of seconds elapsed since the Unix epoch,
-    truncated to its lower 31 bits,
-    as a 32-bit integer.
-
-  * Offset 39:
-    The sub-second number of nanoseconds elapsed since the Unix epoch,
-    as 32-bit integer.
-    Always greater than or equal to zero, and strictly less than a billion.
-
-  The presence of a directory modification time means that at some point,
-  this path in the working directory was observed:
-
-  - To be a directory
-  - With the given modification time
-  - That time was already strictly in the past when observed,
-    meaning that later changes cannot happen in the same clock tick
-    and must cause a different modification time
-    (unless the system clock jumps back and we get unlucky,
-    which is not impossible but deemed unlikely enough).
-  - All direct children of this directory
-    (as returned by `std::fs::read_dir`)
-    either have a corresponding dirstate node,
-    or are ignored by ignore patterns whose hash is in tree metadata.
-
-  This means that if `std::fs::symlink_metadata` later reports
-  the same modification time
-  and ignored patterns haven’t changed,
-  a run of status that is not listing ignored files
-  can skip calling `std::fs::read_dir` again for this directory,
-  and iterate child dirstate nodes instead.
-
+* Offset 39:
+  The nanoseconds component of an `mtime` field described below,
+  as a 32-bit integer.
 
 * (Offset 43: end of this node)
+
+The meaning of the boolean values packed in `flags` is:
+
+`WDIR_TRACKED`
+    Set if the working directory contains a tracked file at this node’s path.
+    This is typically set and unset by `hg add` and `hg rm`.
+
+`P1_TRACKED`
+    Set if the working directory’s first parent changeset
+    (whose node identifier is found in tree metadata)
+    contains a tracked file at this node’s path.
+    This is a cache to reduce manifest lookups.
+
+`P2_INFO`
+    Set if the file has been involved in some merge operation.
+    Either because it was actually merged,
+    or because the version in the second parent p2 version was ahead,
+    or because some rename moved it there.
+    In either case `hg status` will want it displayed as modified.
+
+Files that would be mentioned at all in the `dirstate-v1` file format
+have a node with at least one of the above three bits set in `dirstate-v2`.
+Let’s call these files "tracked anywhere",
+and "untracked" the nodes with all three of these bits unset.
+Untracked nodes are typically for directories:
+they hold child nodes and form the tree structure.
+Additional untracked nodes may also exist.
+Although implementations should strive to clean up nodes
+that are entirely unused, other untracked nodes may also exist.
+For example, a future version of Mercurial might in some cases
+add nodes for untracked files or/and ignored files in the working directory
+in order to optimize `hg status`
+by enabling it to skip `readdir` in more cases.
+
+`HAS_MODE_AND_SIZE`
+    Must be unset for untracked nodes.
+    For files tracked anywhere, if this is set:
+    - The `size` field is the expected file size,
+      in bytes truncated its lower to 31 bits,
+      for the file to be clean.
+    - The expected execute permission for the file’s owner
+      is given by `MODE_EXEC_PERM`
+    - The expected file type is given by `MODE_IS_SIMLINK`:
+      a symbolic link if set, or a normal file if unset.
+    If this is unset the expected size, permission, and file type are unknown.
+    The `size` field is unused (set to zero).
+
+`HAS_MTIME`
+    If unset, the `mtime` field is unused (set to zero).
+    If set, it contains a timestamp represented as
+    - the number of seconds since the Unix epoch,
+      truncated to its lower 31 bits.
+    - and the number of nanoseconds since `mtime.seconds`,
+      always stritctly less than one billion.
+      This may be zero if more precision is not available.
+      (This can happen because of limitations in any of Mercurial, Python,
+      libc, the operating system, …)
+
+    If set for a file tracked anywhere,
+    `mtime` is the expected modification time for the file to be clean.
+
+    If set for an untracked node, at some point,
+    this path in the working directory was observed:
+
+    - To be a directory
+    - With the modification time given in `mtime`
+    - That time was already strictly in the past when observed,
+      meaning that later changes cannot happen in the same clock tick
+      and must cause a different modification time
+      (unless the system clock jumps back and we get unlucky,
+      which is not impossible but deemed unlikely enough).
+    - All direct children of this directory
+      (as returned by `std::fs::read_dir`)
+      either have a corresponding dirstate node,
+      or are ignored by ignore patterns whose hash is in tree metadata.
+
+    This means that if `std::fs::symlink_metadata` later reports
+    the same modification time
+    and ignored patterns haven’t changed,
+    a run of status that is not listing ignored files
+    can skip calling `std::fs::read_dir` again for this directory,
+    and iterate child dirstate nodes instead.
+
+`MODE_EXEC_PERM`
+    Must be unset if `HAS_MODE_AND_SIZE` is unset.
+    If `HAS_MODE_AND_SIZE` is set,
+    this indicates whether the file’s own is expected
+    to have execute permission.
+
+`MODE_IS_SYMLINK`
+    Must be unset if `HAS_MODE_AND_SIZE` is unset.
+    If `HAS_MODE_AND_SIZE` is set,
+    this indicates whether the file is expected to be a symlink
+    as opposed to a normal file.



To: SimonSapin, #hg-reviewers, pulkit
Cc: mercurial-patches
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.mercurial-scm.org/pipermail/mercurial-patches/attachments/20211013/d404e346/attachment-0002.html>


More information about the Mercurial-patches mailing list