D9278: transaction: split new files into a separate set
joerg.sonnenberger (Joerg Sonnenberger)
phabricator at mercurial-scm.org
Sat Nov 7 21:32:23 UTC 2020
joerg.sonnenberger created this revision.
Herald added a reviewer: hg-reviewers.
Herald added a subscriber: mercurial-patches.
REVISION SUMMARY
Journal entries with size 0 are common as they represent new revlog
files. Move them from the dictionary into a set as the former is more
dense. This reduces peak RSS by 70MB for the NetBSD test repository with
around 450k files under .hg/store.
REPOSITORY
rHG Mercurial
BRANCH
default
REVISION DETAIL
https://phab.mercurial-scm.org/D9278
AFFECTED FILES
mercurial/repair.py
mercurial/transaction.py
CHANGE DETAILS
diff --git a/mercurial/transaction.py b/mercurial/transaction.py
--- a/mercurial/transaction.py
+++ b/mercurial/transaction.py
@@ -159,6 +159,7 @@
self._vfsmap = vfsmap
self._after = after
self._offsetmap = {}
+ self._newfiles = set()
self._journal = journalname
self._undoname = undoname
self._queue = []
@@ -248,7 +249,11 @@
@active
def add(self, file, offset):
"""record the state of an append-only file before update"""
- if file in self._offsetmap or file in self._backupmap:
+ if (
+ file in self._newfiles
+ or file in self._offsetmap
+ or file in self._backupmap
+ ):
return
if self._queue:
self._queue[-1].append((file, offset))
@@ -258,9 +263,16 @@
def _addentry(self, file, offset):
"""add a append-only entry to memory and on-disk state"""
- if file in self._offsetmap or file in self._backupmap:
+ if (
+ file in self._newfiles
+ or file in self._offsetmap
+ or file in self._backupmap
+ ):
return
- self._offsetmap[file] = offset
+ if offset:
+ self._offsetmap[file] = offset
+ else:
+ self._newfiles.add(file)
# add enough data to the journal to do the truncate
self._file.write(b"%s\0%d\n" % (file, offset))
self._file.flush()
@@ -280,7 +292,11 @@
msg = b'cannot use transaction.addbackup inside "group"'
raise error.ProgrammingError(msg)
- if file in self._offsetmap or file in self._backupmap:
+ if (
+ file in self._newfiles
+ or file in self._offsetmap
+ or file in self._backupmap
+ ):
return
vfs = self._vfsmap[location]
dirname, filename = vfs.split(file)
@@ -394,6 +410,8 @@
@active
def findoffset(self, file):
+ if file in self._newfiles:
+ return 0
return self._offsetmap.get(file)
@active
@@ -402,10 +420,19 @@
replace can only replace already committed entries
that are not pending in the queue
'''
-
- if file not in self._offsetmap:
+ if file in self._newfiles:
+ if not offset:
+ return
+ self._newfiles.remove(file)
+ self._offsetmap[file] = offset
+ elif file in self._offsetmap:
+ if not offset:
+ del self._offsetmap[file]
+ self._newfiles.add(file)
+ else:
+ self._offsetmap[file] = offset
+ else:
raise KeyError(file)
- self._offsetmap[file] = offset
self._file.write(b"%s\0%d\n" % (file, offset))
self._file.flush()
@@ -546,6 +573,7 @@
b"couldn't remove %s: %s\n" % (vfs.join(b), inst)
)
self._offsetmap = {}
+ self._newfiles = set()
self._writeundo()
if self._after:
self._after()
@@ -627,10 +655,12 @@
self._file.close()
self._backupsfile.close()
entries = list(pycompat.iteritems(self._offsetmap))
+ for file in self._newfiles:
+ entries.append((file, 0))
entries.sort()
try:
- if not self._offsetmap and not self._backupentries:
+ if not entries and not self._backupentries:
if self._backupjournal:
self._opener.unlink(self._backupjournal)
if self._journal:
diff --git a/mercurial/repair.py b/mercurial/repair.py
--- a/mercurial/repair.py
+++ b/mercurial/repair.py
@@ -210,6 +210,7 @@
# using append-only files. We'll need some kind of storage
# API to handle stripping for us.
oldfiles = set(tr._offsetmap.keys())
+ oldfiles.update(tr._newfiles)
tr.startgroup()
cl.strip(striprev, tr)
@@ -220,6 +221,7 @@
tr.endgroup()
newfiles = set(tr._offsetmap.keys())
+ newfiles.update(tr._newfiles)
newfiles.difference_update(oldfiles)
# The processing order doesn't matter during normal
To: joerg.sonnenberger, #hg-reviewers
Cc: mercurial-patches, mercurial-devel
More information about the Mercurial-devel
mailing list