[PATCH 2 of 2] hgext: add a new packed repository extension, packrepo
Bryan O'Sullivan
bos at serpentine.com
Wed Jun 27 17:37:57 UTC 2012
# HG changeset patch
# User Bryan O'Sullivan <bryano at fb.com>
# Date 1340818576 25200
# Node ID 989483d028d1881eb775815c9d66361f9bed4d06
# Parent 080b8d275cfe8f355fbe778a175a1c4cf1e08083
hgext: add a new packed repository extension, packrepo
diff --git a/hgext/packrepo.py b/hgext/packrepo.py
new file mode 100644
--- /dev/null
+++ b/hgext/packrepo.py
@@ -0,0 +1,404 @@
+# packrepo.py - packed repository support for mercurial
+#
+# Copyright 2012 Facebook
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+
+'''pack small metadata files to reduce disk seeks
+
+Mercurial's revlog storage format uses a large number of files, many
+of which are small. In a repository that contains many thousands of
+revlog files, the disk seeks associated with reading or writing these
+files can be expensive.
+
+This extension is aimed at a narrow use case: the performance of "hg
+clone --uncompressed", of a repository that contains many revlogs, over
+a fast LAN or WAN.
+
+In these cases, the performance of "hg clone --uncompressed" can be
+limited by the seek rate of the client's disk. Use of this extension
+can improve performance by a factor of 10. (Please be sure to measure
+performance in your own environment before you decide to use this
+extension.)
+
+A repository created using "hg clone" without "--uncompressed" will
+not be packed (too messy to get right, and seeks are less of a
+factor); neither will a repository created from scratch.
+
+When a repository is first cloned with "--uncompressed", this
+extension redirects writes of small revlogs to a single immutable pack
+file. Large revlog files do not benefit from being packed, and are
+written normally. Afterwards, metadata reads are transparently
+serviced from either the pack file or a normal revlog file. When
+Mercurial needs to write to a packed revlog, it is first copied out to
+a normal revlog file.
+
+To find out whether a repository is packed (and if so, how much is
+still packed), run "hg packinfo".
+
+To repack all revlog files in an already packed repository, use "hg
+repack". An existing repository can be converted to packed form using
+"hg repack --convert".
+'''
+
+from mercurial import error, extensions, localrepo, scmutil, store, util
+from mercurial import cmdutil
+from mercurial.i18n import _
+import errno, os, struct
+
+ at extensions.replaceclass(localrepo, 'localrepository')
+class packrepo(localrepo.localrepository):
+ def __init__(self, baseui, path=None, create=False):
+ super(packrepo, self).__init__(baseui, path, create)
+ if 'packrepo' in self.requirements:
+ self._replacestore()
+
+ def _replacestore(self, newopener=None):
+ def opener(path, audit=True):
+ if newopener is None:
+ op = packopener(path, audit)
+ op.readindex()
+ else:
+ op = newopener
+ self._packopener = op
+ return op
+ mode = self.store.createmode
+ self.store = store.store(self.requirements, self.sharedpath, opener)
+ self.store._packopener = self._packopener
+ self.sopener = self.store.opener
+ self.sopener.createmode = self.store.createmode = mode
+
+ def _makepacked(self):
+ if 'packrepo' in self.requirements:
+ return
+ self._replacestore()
+ reqs = set(self.requirements)
+ reqs.add('packrepo')
+ self._applyrequirements(reqs)
+ self._writerequirements()
+
+ def stream_in(self, remote, requirements):
+ pack = self.ui.configbool('format', 'pack', True)
+ if pack:
+ self._makepacked()
+ self._packopener.packstart()
+ try:
+ return super(packrepo, self).stream_in(remote, requirements)
+ finally:
+ if pack:
+ self._packopener.packend()
+
+ def packiter(self, label=None):
+ df = list(self.store.datafiles())
+ total = len(df)
+ progress = self.ui.progress
+ try:
+ lookup = self._packopener._index.get
+ except AttributeError:
+ lookup = {}.get
+ for i, (f, f2, size) in enumerate(df):
+ if label:
+ progress(label, i, total=total)
+ sl = lookup(f2)
+ if sl:
+ start = sl[0]
+ else:
+ start = None
+ yield f, f2, start, size
+
+ def repack(self):
+ if self.revs('draft()'):
+ pass # raise util.Abort(_('cannot repack with draft commits present'))
+ df = list(self.store.datafiles())
+ total = len(df)
+ read = self._packopener.read
+ opener = packopener(self.store.path)
+ unlink = []
+ try:
+ opener.packstart()
+ packed = 0
+ unpacked = 0
+ for (f, f2, start, size) in self.packiter(_('repacking')):
+ if size < opener._maxpacksize:
+ opener.write(f2, read(f2))
+ packed += size
+ unlink.append(self.sjoin(f))
+ else:
+ unpacked += size
+ except:
+ opener.packabort()
+ raise
+ opener.packend()
+ self._replacestore(opener)
+ unlinked = 0
+ progress = self.ui.progress
+ unlinking = _('unlinking')
+ total = len(unlink)
+ for i, f in enumerate(unlink):
+ progress(unlinking, i, total=total)
+ try:
+ os.unlink(f)
+ unlinked += 1
+ except OSError, err:
+ if err.errno != errno.ENOENT:
+ raise
+ self.ui.status(_('%s packed, %s untouched, %d unlinked\n') %
+ (util.bytecount(packed), util.bytecount(unpacked),
+ unlinked))
+
+cls = localrepo.localrepository
+for reqs in 'supported openerreqs'.split():
+ getattr(cls, reqs).add('packrepo')
+
+class writefile(object):
+ def __init__(self, path, closed):
+ self.path = path
+ self.written = []
+ self.write = self.written.append
+ self._closed = closed
+
+ def close(self):
+ self._closed(self.path, self.written)
+
+class readfile(object):
+ def __init__(self, fp, path, start, len):
+ self._fp = fp
+ self.path = path
+ self._start = start
+ self._len = len
+ self._pos = 0
+
+ def read(self, count=None):
+ self._fp.seek(self._start + self._pos)
+ if count is None:
+ return self._fp.read(self._len - self._pos)
+ return self._fp.read(min(count, self._len - self._pos))
+
+ def seek(self, offset, whence=0):
+ if whence == 2:
+ assert offset == 0
+ offset = self._len
+ npos = min(offset, self._len)
+ self._fp.seek(self._start + npos)
+ self._pos = npos
+
+ def tell(self):
+ return self._pos
+
+ def close(self):
+ pass
+
+_copylist = ['store/00pack.dat', 'store/00pack.idx']
+
+ at extensions.replaceclass(store, 'basicstore')
+class packbasicstore(store.basicstore):
+ def copylist(self):
+ return _copylist + super(packbasicstore, self).copylist()
+
+ def _walk(self, relpath, recurse):
+ l = super(packbasicstore, self)._walk(relpath, recurse)
+ try:
+ ll = [(store.decodedir(n), n, f[1])
+ for n, f in self._packopener._index.iteritems()]
+ if ll:
+ l.extend(ll)
+ l.sort()
+ except AttributeError:
+ pass
+ return l
+
+ at extensions.replaceclass(store, 'encodedstore')
+class packencodedstore(store.encodedstore):
+ def copylist(self):
+ return _copylist + super(packencodedstore, self).copylist()
+
+ at extensions.replaceclass(store, 'fncachestore')
+class packfncachestore(store.fncachestore):
+ def copylist(self):
+ return _copylist + super(packfncachestore, self).copylist()
+
+ def getsize(self, path):
+ try:
+ return self.opener.opener._index[path][1]
+ except (AttributeError, KeyError):
+ return super(packfncachestore, self).getsize(path)
+
+class packopener(scmutil.abstractopener):
+ _maxpacksize = 128 * 1024
+ _idxformat = '>HIIH'
+ _hdrformat = '>H'
+
+ def __init__(self, path, audit=True):
+ self.opener = scmutil.opener(path, audit)
+ self._packing = False
+ self._dfp = None
+ self._ifp = None
+ self._index = {}
+
+ def _getcreatemode(self):
+ return self.opener.createmode
+
+ def _setcreatemode(self, m):
+ self.opener.createmode = m
+
+ createmode = property(_getcreatemode, _setcreatemode)
+
+ def readindex(self):
+ index = self._index = {}
+ data = self.opener.tryread('00pack.idx')
+ end = len(data)
+ if end < 6:
+ return
+ flags, = struct.unpack_from(self._hdrformat, data, 4)
+ assert data.startswith('HGPX') and flags == 0, repr(data[:6])
+ n = struct.calcsize(self._idxformat)
+ i = 6
+ while i < end:
+ hi, lo, l, lp = struct.unpack_from(self._idxformat, data, i)
+ i += n
+ j = i + lp
+ path = data[i:j]
+ start = hi << 32 | lo
+ if start:
+ index[path] = start, l
+ else:
+ del index[path]
+ i = j
+
+ def packstart(self):
+ if self._dfp:
+ self._dfp.close()
+ if self._ifp:
+ self._ifp.close()
+ self._dfp = self.opener('00pack.dat', 'w+b', atomictemp=True)
+ self._ifp = self.opener('00pack.idx', 'w+b', atomictemp=True)
+ self._dfp.write('HGPD')
+ self._ifp.write('HGPX')
+ flags = 0
+ self._ifp.write(struct.pack(self._hdrformat, flags))
+ self._packing = True
+
+ def packend(self):
+ self._packing = False
+ self._dfp.close()
+ self._ifp.close()
+ self._dfp = None
+ self._ifp = None
+
+ def packabort(self):
+ self._dfp.discard()
+ self._dfp = None
+ self._ifp.discard()
+ self._ifp = None
+ self._index = {}
+
+ def _writeindex(self, path, start, length):
+ self._ifp.write(struct.pack(self._idxformat, start >> 32,
+ start & 0xffffffff, length, len(path)))
+ self._ifp.write(path)
+
+ def _written(self, path, data):
+ l = sum(map(len, data))
+ if not l:
+ return
+ if l < self._maxpacksize:
+ start = self._dfp.tell()
+ for d in data:
+ self._dfp.write(d)
+ self._index[path] = start, l
+ self._writeindex(path, start, l)
+ else:
+ fp = self.opener(path, 'w')
+ for d in data:
+ fp.write(d)
+ fp.close()
+
+ packable = set(('.i', '.d'))
+
+ def __call__(self, path, mode='r', text=False, atomictemp=False):
+ if (self._packing and mode.startswith('w') and
+ path.startswith('data') and path[-2:] in self.packable):
+ return writefile(path, self._written)
+ ix = self._index.get(path)
+ if ix and mode.startswith('r'):
+ if not self._dfp:
+ self._dfp = self.opener('00pack.dat', 'r')
+ return readfile(self._dfp, path, ix[0], ix[1])
+ fp = self.opener(path, mode, text, atomictemp)
+ if ix and mode[0] in 'aw':
+ fp.write(self.read(path))
+ if self._ifp is None:
+ self._ifp = self.opener('00pack.idx', 'a+')
+ self._writeindex(path, 0, 0)
+ del self._index[path]
+ return fp
+
+cmdtable = {}
+command = cmdutil.command(cmdtable)
+
+ at command('repack',
+ [('', 'convert', None, _('convert an unpacked repo to packed'))],
+ _('hg repack'))
+def repack(ui, repo, **opts):
+ '''repack a packed repository, or convert an unpacked repository
+
+ Any revlog files that are packed will be deleted.'''
+
+ if 'packrepo' not in repo.requirements:
+ if opts['convert']:
+ repo._makepacked()
+ else:
+ raise util.Abort(_('this repository is not packed '
+ '(use --convert to convert)'))
+ repo.repack()
+
+def packlist(ui, repo):
+ notpacked = ' ' * 9 + '-'
+ for f, f2, start, size in repo.packiter(_('scanning')):
+ if start is None:
+ start = notpacked
+ else:
+ start = '%10d' % start
+ if len(f) > 60:
+ ui.write(f + '\n')
+ f = ''
+ ui.write('%s %s %8d\n' % (f.ljust(60), start, size))
+
+def packsummary(ui, repo):
+ bpacked = fpacked = 0
+ bunpacked = funpacked = 0
+ btoobig = ftoobig = 0
+ for f, f2, start, size in repo.packiter(_('scanning')):
+ if start is None:
+ if size < packopener._maxpacksize:
+ bunpacked += size
+ funpacked += 1
+ else:
+ btoobig += size
+ ftoobig += 1
+ else:
+ bpacked += size
+ fpacked += 1
+ ui.write(_('%s packed, saving %d files\n') %
+ (util.bytecount(bpacked), fpacked))
+ ui.write(_('%s not packed in %d files (could be packed)\n')
+ % (util.bytecount(bunpacked), funpacked))
+ ui.write(_('%s large revlogs in %d files (no need to pack)\n')
+ % (util.bytecount(btoobig), ftoobig))
+
+ at command('packinfo',
+ [('l', 'list', None,
+ _('list packed/unpacked files (useful for debugging)')),
+ ('s', 'summary', None, _('overview of what is and is not packed'))],
+ _('hg packinfo [OPTION]...'))
+def packinfo(ui, repo, **opts):
+ '''print pack-related information about a repository'''
+
+ if not opts['list'] and not opts['summary']:
+ opts['summary'] = 1
+
+ if opts['list']:
+ packlist(ui, repo)
+ if opts['summary']:
+ packsummary(ui, repo)
diff --git a/mercurial/util.py b/mercurial/util.py
--- a/mercurial/util.py
+++ b/mercurial/util.py
@@ -819,6 +819,7 @@ class atomictempfile(object):
# delegated methods
self.write = self._fp.write
+ self.tell = self._fp.tell
self.fileno = self._fp.fileno
def close(self):
diff --git a/tests/test-packrepo.t b/tests/test-packrepo.t
new file mode 100644
--- /dev/null
+++ b/tests/test-packrepo.t
@@ -0,0 +1,192 @@
+create a normal source repo
+
+ $ hg init a
+ $ cd a
+ $ echo a>a
+ $ hg ci -q -A -m a
+ $ hg verify
+ checking changesets
+ checking manifests
+ crosschecking files in changesets and manifests
+ checking files
+ 1 files, 1 changesets, 1 total revisions
+
+ $ echo "[extensions]" >> $HGRCPATH
+ $ echo "packrepo=" >> $HGRCPATH
+
+try repacking
+
+ $ hg clone . ../repacked
+ updating to branch default
+ 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
+ $ cd ../repacked
+ $ cp .hg/requires .hg/oldrequires
+ $ hg packinfo
+ 0 bytes packed, saving 0 files
+ 67 bytes not packed in 1 files (could be packed)
+ 0 bytes large revlogs in 0 files (no need to pack)
+ $ hg repack
+ abort: this repository is not packed (use --convert to convert)
+ [255]
+ $ hg repack --convert
+ 67 bytes packed, 0 bytes untouched, 1 unlinked
+ $ diff -u .hg/oldrequires .hg/requires | grep '^[-+]'
+ --- .hg/oldrequires* (glob)
+ +++ .hg/requires* (glob)
+ +packrepo
+ $ hg packinfo
+ 67 bytes packed, saving 1 files
+ 0 bytes not packed in 0 files (could be packed)
+ 0 bytes large revlogs in 0 files (no need to pack)
+ $ hg verify
+ checking changesets
+ checking manifests
+ crosschecking files in changesets and manifests
+ checking files
+ 1 files, 1 changesets, 1 total revisions
+
+repeated repack should be idempotent
+
+ $ hg repack
+ 67 bytes packed, 0 bytes untouched, 0 unlinked
+ $ hg repack
+ 67 bytes packed, 0 bytes untouched, 0 unlinked
+
+ $ cd ../a
+ $ hg --config server.uncompressed=True serve -p $HGPORT -d --pid-file=../hg1.pid
+
+ $ hg clone --uncompressed http://localhost:$HGPORT/ ../b
+ streaming all changes
+ 3 files to transfer, 294 bytes of data
+ transferred * bytes in * seconds (*/sec) (glob)
+ updating to branch default
+ 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
+
+ensure that dest is packed and sane
+
+ $ cd ../b
+ $ ls .hg/store/00pack.*
+ .hg/store/00pack.dat
+ .hg/store/00pack.idx
+ $ ls .hg/store/data/a.[id]
+ ls: .hg/store/data/a.[id]: No such file or directory
+ [1]
+ $ grep packrepo .hg/requires
+ packrepo
+ $ hg verify
+ checking changesets
+ checking manifests
+ crosschecking files in changesets and manifests
+ checking files
+ 1 files, 1 changesets, 1 total revisions
+
+get some info
+
+ $ hg packinfo
+ 67 bytes packed, saving 1 files
+ 0 bytes not packed in 0 files (could be packed)
+ 0 bytes large revlogs in 0 files (no need to pack)
+
+does a hardlinked clone work?
+
+ $ hg clone . ../hl
+ updating to branch default
+ 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
+ $ hg --cwd ../hl verify
+ checking changesets
+ checking manifests
+ crosschecking files in changesets and manifests
+ checking files
+ 1 files, 1 changesets, 1 total revisions
+
+try a regular commit
+
+ $ echo b >> b
+ $ hg ci -A -m b
+ adding b
+ $ hg packinfo -l -s
+ data/b.i - 67
+ data/a.i 4 67
+ 67 bytes packed, saving 1 files
+ 67 bytes not packed in 1 files (could be packed)
+ 0 bytes large revlogs in 0 files (no need to pack)
+
+does COW work?
+
+ $ echo a >> a
+ $ hg ci -m b
+ $ ls .hg/store/data/a.[id]
+ .hg/store/data/a.i
+
+get some post-COW info
+
+ $ hg packinfo
+ 0 bytes packed, saving 0 files
+ 203 bytes not packed in 2 files (could be packed)
+ 0 bytes large revlogs in 0 files (no need to pack)
+
+does a hardlinked clone work after COW?
+
+ $ hg clone . ../hl2
+ updating to branch default
+ 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
+ $ hg --cwd ../hl2 verify
+ checking changesets
+ checking manifests
+ crosschecking files in changesets and manifests
+ checking files
+ 2 files, 3 changesets, 3 total revisions
+
+disable fncache, then check functionality
+
+ $ hg --config format.usefncache=False clone --uncompressed http://localhost:$HGPORT/ ../c
+ streaming all changes
+ 3 files to transfer, 294 bytes of data
+ transferred * bytes in * seconds (*/sec) (glob)
+ updating to branch default
+ 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
+
+no fncache: ensure that dest is packed and sane
+
+ $ cd ../c
+ $ ls .hg/store/00pack.*
+ .hg/store/00pack.dat
+ .hg/store/00pack.idx
+ $ ls .hg/store/data/a.[id]
+ ls: .hg/store/data/a.[id]: No such file or directory
+ [1]
+ $ sort .hg/requires
+ packrepo
+ revlogv1
+ store
+ $ hg verify
+ checking changesets
+ checking manifests
+ crosschecking files in changesets and manifests
+ checking files
+ 1 files, 1 changesets, 1 total revisions
+
+no fncache: try a clone
+
+ $ hg clone . ../nofncache
+ updating to branch default
+ 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
+ $ hg --cwd ../nofncache verify
+ checking changesets
+ checking manifests
+ crosschecking files in changesets and manifests
+ checking files
+ 1 files, 1 changesets, 1 total revisions
+
+no fncache: does COW work?
+
+ $ cd ../nofncache
+ $ echo a >> a
+ $ hg ci -m b
+ $ ls .hg/store/data/a.[id]
+ .hg/store/data/a.i
+
+all done
+
+ $ cd ..
+ $ cat hg1.pid >> $DAEMON_PIDS
More information about the Mercurial-devel
mailing list