D9278: transaction: split new files into a separate set

joerg.sonnenberger (Joerg Sonnenberger) phabricator at mercurial-scm.org
Sat Nov 7 21:32:23 UTC 2020


joerg.sonnenberger created this revision.
Herald added a reviewer: hg-reviewers.
Herald added a subscriber: mercurial-patches.

REVISION SUMMARY
  Journal entries with size 0 are common as they represent new revlog
  files. Move them from the dictionary into a set as the former is more
  dense. This reduces peak RSS by 70MB for the NetBSD test repository with
  around 450k files under .hg/store.

REPOSITORY
  rHG Mercurial

BRANCH
  default

REVISION DETAIL
  https://phab.mercurial-scm.org/D9278

AFFECTED FILES
  mercurial/repair.py
  mercurial/transaction.py

CHANGE DETAILS

diff --git a/mercurial/transaction.py b/mercurial/transaction.py
--- a/mercurial/transaction.py
+++ b/mercurial/transaction.py
@@ -159,6 +159,7 @@
         self._vfsmap = vfsmap
         self._after = after
         self._offsetmap = {}
+        self._newfiles = set()
         self._journal = journalname
         self._undoname = undoname
         self._queue = []
@@ -248,7 +249,11 @@
     @active
     def add(self, file, offset):
         """record the state of an append-only file before update"""
-        if file in self._offsetmap or file in self._backupmap:
+        if (
+            file in self._newfiles
+            or file in self._offsetmap
+            or file in self._backupmap
+        ):
             return
         if self._queue:
             self._queue[-1].append((file, offset))
@@ -258,9 +263,16 @@
 
     def _addentry(self, file, offset):
         """add a append-only entry to memory and on-disk state"""
-        if file in self._offsetmap or file in self._backupmap:
+        if (
+            file in self._newfiles
+            or file in self._offsetmap
+            or file in self._backupmap
+        ):
             return
-        self._offsetmap[file] = offset
+        if offset:
+            self._offsetmap[file] = offset
+        else:
+            self._newfiles.add(file)
         # add enough data to the journal to do the truncate
         self._file.write(b"%s\0%d\n" % (file, offset))
         self._file.flush()
@@ -280,7 +292,11 @@
             msg = b'cannot use transaction.addbackup inside "group"'
             raise error.ProgrammingError(msg)
 
-        if file in self._offsetmap or file in self._backupmap:
+        if (
+            file in self._newfiles
+            or file in self._offsetmap
+            or file in self._backupmap
+        ):
             return
         vfs = self._vfsmap[location]
         dirname, filename = vfs.split(file)
@@ -394,6 +410,8 @@
 
     @active
     def findoffset(self, file):
+        if file in self._newfiles:
+            return 0
         return self._offsetmap.get(file)
 
     @active
@@ -402,10 +420,19 @@
         replace can only replace already committed entries
         that are not pending in the queue
         '''
-
-        if file not in self._offsetmap:
+        if file in self._newfiles:
+            if not offset:
+                return
+            self._newfiles.remove(file)
+            self._offsetmap[file] = offset
+        elif file in self._offsetmap:
+            if not offset:
+                del self._offsetmap[file]
+                self._newfiles.add(file)
+            else:
+                self._offsetmap[file] = offset
+        else:
             raise KeyError(file)
-        self._offsetmap[file] = offset
         self._file.write(b"%s\0%d\n" % (file, offset))
         self._file.flush()
 
@@ -546,6 +573,7 @@
                         b"couldn't remove %s: %s\n" % (vfs.join(b), inst)
                     )
         self._offsetmap = {}
+        self._newfiles = set()
         self._writeundo()
         if self._after:
             self._after()
@@ -627,10 +655,12 @@
         self._file.close()
         self._backupsfile.close()
         entries = list(pycompat.iteritems(self._offsetmap))
+        for file in self._newfiles:
+            entries.append((file, 0))
         entries.sort()
 
         try:
-            if not self._offsetmap and not self._backupentries:
+            if not entries and not self._backupentries:
                 if self._backupjournal:
                     self._opener.unlink(self._backupjournal)
                 if self._journal:
diff --git a/mercurial/repair.py b/mercurial/repair.py
--- a/mercurial/repair.py
+++ b/mercurial/repair.py
@@ -210,6 +210,7 @@
                 # using append-only files. We'll need some kind of storage
                 # API to handle stripping for us.
                 oldfiles = set(tr._offsetmap.keys())
+                oldfiles.update(tr._newfiles)
 
                 tr.startgroup()
                 cl.strip(striprev, tr)
@@ -220,6 +221,7 @@
                 tr.endgroup()
 
                 newfiles = set(tr._offsetmap.keys())
+                newfiles.update(tr._newfiles)
                 newfiles.difference_update(oldfiles)
 
                 # The processing order doesn't matter during normal



To: joerg.sonnenberger, #hg-reviewers
Cc: mercurial-patches, mercurial-devel


More information about the Mercurial-devel mailing list