D10920: dirstate-v2: Use 32-bit integers instead of 64-bit for offsets

SimonSapin phabricator at mercurial-scm.org
Fri Jul 2 12:36:35 UTC 2021


SimonSapin created this revision.
Herald added a reviewer: hg-reviewers.
Herald added a subscriber: mercurial-patches.

REVISION SUMMARY
  This saves 12 bytes per node. (Nodes representing files or directories.)
  
  These are offsets to other parts of the file. This would only be a limitation
  for a `.hg/dirstate` file larger than 4 GiB, which would only happen for a
  repository with dozens of millions of files and directories.

REPOSITORY
  rHG Mercurial

BRANCH
  default

REVISION DETAIL
  https://phab.mercurial-scm.org/D10920

AFFECTED FILES
  rust/hg-core/src/dirstate_tree/on_disk.rs
  tests/test-hgignore.t

CHANGE DETAILS

diff --git a/tests/test-hgignore.t b/tests/test-hgignore.t
--- a/tests/test-hgignore.t
+++ b/tests/test-hgignore.t
@@ -406,19 +406,19 @@
 #if dirstate-v2
 
 Check the hash of ignore patterns written in the dirstate at offset
-12 + 20 + 20 + 8 + 4 + 4 + 4 = 72
+12 + 20 + 20 + 4 + 4 + 4 + 4 = 68
 
   $ hg status > /dev/null
   $ cat .hg/testhgignore .hg/testhgignorerel .hgignore dir2/.hgignore dir1/.hgignore dir1/.hgignoretwo | $TESTDIR/f --sha1
   sha1=6e315b60f15fb5dfa02be00f3e2c8f923051f5ff
-  >>> import binascii; print(binascii.hexlify(open(".hg/dirstate", "rb").read()[72:][:20]).decode())
+  >>> import binascii; print(binascii.hexlify(open(".hg/dirstate", "rb").read()[68:][:20]).decode())
   6e315b60f15fb5dfa02be00f3e2c8f923051f5ff
 
   $ echo rel > .hg/testhgignorerel
   $ hg status > /dev/null
   $ cat .hg/testhgignore .hg/testhgignorerel .hgignore dir2/.hgignore dir1/.hgignore dir1/.hgignoretwo | $TESTDIR/f --sha1
   sha1=dea19cc7119213f24b6b582a4bae7b0cb063e34e
-  >>> import binascii; print(binascii.hexlify(open(".hg/dirstate", "rb").read()[72:][:20]).decode())
+  >>> import binascii; print(binascii.hexlify(open(".hg/dirstate", "rb").read()[68:][:20]).decode())
   dea19cc7119213f24b6b582a4bae7b0cb063e34e
 
 #endif
diff --git a/rust/hg-core/src/dirstate_tree/on_disk.rs b/rust/hg-core/src/dirstate_tree/on_disk.rs
--- a/rust/hg-core/src/dirstate_tree/on_disk.rs
+++ b/rust/hg-core/src/dirstate_tree/on_disk.rs
@@ -17,7 +17,7 @@
 use crate::DirstateError;
 use crate::DirstateParents;
 use crate::EntryState;
-use bytes_cast::unaligned::{I32Be, I64Be, U32Be, U64Be};
+use bytes_cast::unaligned::{I32Be, I64Be, U32Be};
 use bytes_cast::BytesCast;
 use std::borrow::Cow;
 use std::convert::TryFrom;
@@ -135,9 +135,8 @@
 
 /// Counted in bytes from the start of the file
 ///
-/// NOTE: If we decide to never support `.hg/dirstate` files larger than 4 GiB
-/// we could save space by using `U32Be` instead.
-type Offset = U64Be;
+/// NOTE: not supporting `.hg/dirstate` files larger than 4 GiB.
+type Offset = U32Be;
 
 /// Counted in number of items
 ///
@@ -172,8 +171,8 @@
 
 /// Make sure that size-affecting changes are made knowingly
 fn _static_assert_size_of() {
-    let _ = std::mem::transmute::<Header, [u8; 92]>;
-    let _ = std::mem::transmute::<Node, [u8; 57]>;
+    let _ = std::mem::transmute::<Header, [u8; 88]>;
+    let _ = std::mem::transmute::<Node, [u8; 45]>;
 }
 
 /// Unexpected file format found in `.hg/dirstate` with the "v2" format.
@@ -589,8 +588,8 @@
 where
     T: BytesCast,
 {
-    let start = u64::try_from(out.len())
-        // Could only panic on a 128-bit CPU with a dirstate over 16 EiB
+    let start = u32::try_from(out.len())
+        // Could only panic for a dirstate file larger than 4 GiB
         .expect("dirstate-v2 offset overflow")
         .into();
     let len = u32::try_from(slice.len())



To: SimonSapin, #hg-reviewers
Cc: mercurial-patches, mercurial-devel


More information about the Mercurial-devel mailing list