[Updated] D10789: revlog: move censoring code in a dedicated module
marmoute (Pierre-Yves David)
phabricator at mercurial-scm.org
Mon Jun 7 09:25:33 UTC 2021
Closed by commit rHG33d626910374: revlog: move censoring code in a dedicated module (authored by marmoute).
This revision was automatically updated to reflect the committed changes.
REPOSITORY
rHG Mercurial
CHANGES SINCE LAST UPDATE
https://phab.mercurial-scm.org/D10789?vs=28470&id=28492
CHANGES SINCE LAST ACTION
https://phab.mercurial-scm.org/D10789/new/
REVISION DETAIL
https://phab.mercurial-scm.org/D10789
AFFECTED FILES
mercurial/revlog.py
mercurial/revlogutils/censor.py
CHANGE DETAILS
diff --git a/mercurial/revlog.py b/mercurial/revlogutils/censor.py
copy from mercurial/revlog.py
copy to mercurial/revlogutils/censor.py
--- a/mercurial/revlog.py
+++ b/mercurial/revlogutils/censor.py
@@ -1,3535 +1,102 @@
-# revlog.py - storage back-end for mercurial
-# coding: utf8
+# censor code related to censoring revision
#
-# Copyright 2005-2007 Olivia Mackall <olivia at selenic.com>
+# Copyright 2021 Pierre-Yves David <pierre-yves.david at octobus.net>
+# Copyright 2015 Google, Inc <martinvonz at google.com>
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
-"""Storage back-end for Mercurial.
-
-This provides efficient delta storage with O(1) retrieve and append
-and O(changes) merge between branches.
-"""
-
-from __future__ import absolute_import
-
-import binascii
-import collections
-import contextlib
-import errno
-import io
-import os
-import struct
-import zlib
-
-# import stuff from node for others to import from revlog
-from .node import (
- bin,
- hex,
+from ..node import (
nullrev,
- sha1nodeconstants,
- short,
- wdirrev,
-)
-from .i18n import _
-from .pycompat import getattr
-from .revlogutils.constants import (
- ALL_KINDS,
- CHANGELOGV2,
- COMP_MODE_DEFAULT,
- COMP_MODE_INLINE,
- COMP_MODE_PLAIN,
- FEATURES_BY_VERSION,
- FLAG_GENERALDELTA,
- FLAG_INLINE_DATA,
- INDEX_HEADER,
- KIND_CHANGELOG,
- REVLOGV0,
- REVLOGV1,
- REVLOGV1_FLAGS,
- REVLOGV2,
- REVLOGV2_FLAGS,
- REVLOG_DEFAULT_FLAGS,
- REVLOG_DEFAULT_FORMAT,
- REVLOG_DEFAULT_VERSION,
- SUPPORTED_FLAGS,
-)
-from .revlogutils.flagutil import (
- REVIDX_DEFAULT_FLAGS,
- REVIDX_ELLIPSIS,
- REVIDX_EXTSTORED,
- REVIDX_FLAGS_ORDER,
- REVIDX_HASCOPIESINFO,
- REVIDX_ISCENSORED,
- REVIDX_RAWTEXT_CHANGING_FLAGS,
-)
-from .thirdparty import attr
-from . import (
- ancestor,
- dagop,
- error,
- mdiff,
- policy,
- pycompat,
- templatefilters,
- util,
-)
-from .interfaces import (
- repository,
- util as interfaceutil,
-)
-from .revlogutils import (
- deltas as deltautil,
- docket as docketutil,
- flagutil,
- nodemap as nodemaputil,
- revlogv0,
- sidedata as sidedatautil,
-)
-from .utils import (
- storageutil,
- stringutil,
-)
-
-# blanked usage of all the name to prevent pyflakes constraints
-# We need these name available in the module for extensions.
-
-REVLOGV0
-REVLOGV1
-REVLOGV2
-FLAG_INLINE_DATA
-FLAG_GENERALDELTA
-REVLOG_DEFAULT_FLAGS
-REVLOG_DEFAULT_FORMAT
-REVLOG_DEFAULT_VERSION
-REVLOGV1_FLAGS
-REVLOGV2_FLAGS
-REVIDX_ISCENSORED
-REVIDX_ELLIPSIS
-REVIDX_HASCOPIESINFO
-REVIDX_EXTSTORED
-REVIDX_DEFAULT_FLAGS
-REVIDX_FLAGS_ORDER
-REVIDX_RAWTEXT_CHANGING_FLAGS
-
-parsers = policy.importmod('parsers')
-rustancestor = policy.importrust('ancestor')
-rustdagop = policy.importrust('dagop')
-rustrevlog = policy.importrust('revlog')
-
-# Aliased for performance.
-_zlibdecompress = zlib.decompress
-
-# max size of revlog with inline data
-_maxinline = 131072
-_chunksize = 1048576
-
-# Flag processors for REVIDX_ELLIPSIS.
-def ellipsisreadprocessor(rl, text):
- return text, False
-
-
-def ellipsiswriteprocessor(rl, text):
- return text, False
-
-
-def ellipsisrawprocessor(rl, text):
- return False
-
-
-ellipsisprocessor = (
- ellipsisreadprocessor,
- ellipsiswriteprocessor,
- ellipsisrawprocessor,
)
-
-
-def offset_type(offset, type):
- if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
- raise ValueError(b'unknown revlog index flags')
- return int(int(offset) << 16 | type)
-
-
-def _verify_revision(rl, skipflags, state, node):
- """Verify the integrity of the given revlog ``node`` while providing a hook
- point for extensions to influence the operation."""
- if skipflags:
- state[b'skipread'].add(node)
- else:
- # Side-effect: read content and verify hash.
- rl.revision(node)
-
-
-# True if a fast implementation for persistent-nodemap is available
-#
-# We also consider we have a "fast" implementation in "pure" python because
-# people using pure don't really have performance consideration (and a
-# wheelbarrow of other slowness source)
-HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
- parsers, 'BaseIndexObject'
+from ..i18n import _
+from .. import (
+ error,
)
-
-
- at attr.s(slots=True, frozen=True)
-class _revisioninfo(object):
- """Information about a revision that allows building its fulltext
- node: expected hash of the revision
- p1, p2: parent revs of the revision
- btext: built text cache consisting of a one-element list
- cachedelta: (baserev, uncompressed_delta) or None
- flags: flags associated to the revision storage
-
- One of btext[0] or cachedelta must be set.
- """
-
- node = attr.ib()
- p1 = attr.ib()
- p2 = attr.ib()
- btext = attr.ib()
- textlen = attr.ib()
- cachedelta = attr.ib()
- flags = attr.ib()
-
-
- at interfaceutil.implementer(repository.irevisiondelta)
- at attr.s(slots=True)
-class revlogrevisiondelta(object):
- node = attr.ib()
- p1node = attr.ib()
- p2node = attr.ib()
- basenode = attr.ib()
- flags = attr.ib()
- baserevisionsize = attr.ib()
- revision = attr.ib()
- delta = attr.ib()
- sidedata = attr.ib()
- protocol_flags = attr.ib()
- linknode = attr.ib(default=None)
-
-
- at interfaceutil.implementer(repository.iverifyproblem)
- at attr.s(frozen=True)
-class revlogproblem(object):
- warning = attr.ib(default=None)
- error = attr.ib(default=None)
- node = attr.ib(default=None)
-
-
-def parse_index_v1(data, inline):
- # call the C implementation to parse the index data
- index, cache = parsers.parse_index2(data, inline)
- return index, cache
-
-
-def parse_index_v2(data, inline):
- # call the C implementation to parse the index data
- index, cache = parsers.parse_index2(data, inline, revlogv2=True)
- return index, cache
-
-
-def parse_index_cl_v2(data, inline):
- # call the C implementation to parse the index data
- assert not inline
- from .pure.parsers import parse_index_cl_v2
-
- index, cache = parse_index_cl_v2(data)
- return index, cache
-
-
-if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
-
- def parse_index_v1_nodemap(data, inline):
- index, cache = parsers.parse_index_devel_nodemap(data, inline)
- return index, cache
-
-
-else:
- parse_index_v1_nodemap = None
-
-
-def parse_index_v1_mixed(data, inline):
- index, cache = parse_index_v1(data, inline)
- return rustrevlog.MixedIndex(index), cache
-
-
-# corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
-# signed integer)
-_maxentrysize = 0x7FFFFFFF
-
-PARTIAL_READ_MSG = _(
- b'partial read of revlog %s; expected %d bytes from offset %d, got %d'
+from ..utils import (
+ storageutil,
)
-
-FILE_TOO_SHORT_MSG = _(
- b'cannot read from revlog %s;'
- b' expected %d bytes from offset %d, data size is %d'
-)
-
-
-class revlog(object):
- """
- the underlying revision storage object
-
- A revlog consists of two parts, an index and the revision data.
-
- The index is a file with a fixed record size containing
- information on each revision, including its nodeid (hash), the
- nodeids of its parents, the position and offset of its data within
- the data file, and the revision it's based on. Finally, each entry
- contains a linkrev entry that can serve as a pointer to external
- data.
-
- The revision data itself is a linear collection of data chunks.
- Each chunk represents a revision and is usually represented as a
- delta against the previous chunk. To bound lookup time, runs of
- deltas are limited to about 2 times the length of the original
- version data. This makes retrieval of a version proportional to
- its size, or O(1) relative to the number of revisions.
-
- Both pieces of the revlog are written to in an append-only
- fashion, which means we never need to rewrite a file to insert or
- remove data, and can use some simple techniques to avoid the need
- for locking while reading.
-
- If checkambig, indexfile is opened with checkambig=True at
- writing, to avoid file stat ambiguity.
-
- If mmaplargeindex is True, and an mmapindexthreshold is set, the
- index will be mmapped rather than read if it is larger than the
- configured threshold.
-
- If censorable is True, the revlog can have censored revisions.
-
- If `upperboundcomp` is not None, this is the expected maximal gain from
- compression for the data content.
-
- `concurrencychecker` is an optional function that receives 3 arguments: a
- file handle, a filename, and an expected position. It should check whether
- the current position in the file handle is valid, and log/warn/fail (by
- raising).
+from . import constants
- Internal details
- ----------------
-
- A large part of the revlog logic deals with revisions' "index entries", tuple
- objects that contains the same "items" whatever the revlog version.
- Different versions will have different ways of storing these items (sometimes
- not having them at all), but the tuple will always be the same. New fields
- are usually added at the end to avoid breaking existing code that relies
- on the existing order. The field are defined as follows:
-
- [0] offset:
- The byte index of the start of revision data chunk.
- That value is shifted up by 16 bits. use "offset = field >> 16" to
- retrieve it.
-
- flags:
- A flag field that carries special information or changes the behavior
- of the revision. (see `REVIDX_*` constants for details)
- The flag field only occupies the first 16 bits of this field,
- use "flags = field & 0xFFFF" to retrieve the value.
-
- [1] compressed length:
- The size, in bytes, of the chunk on disk
-
- [2] uncompressed length:
- The size, in bytes, of the full revision once reconstructed.
-
- [3] base rev:
- Either the base of the revision delta chain (without general
- delta), or the base of the delta (stored in the data chunk)
- with general delta.
-
- [4] link rev:
- Changelog revision number of the changeset introducing this
- revision.
-
- [5] parent 1 rev:
- Revision number of the first parent
-
- [6] parent 2 rev:
- Revision number of the second parent
-
- [7] node id:
- The node id of the current revision
-
- [8] sidedata offset:
- The byte index of the start of the revision's side-data chunk.
-
- [9] sidedata chunk length:
- The size, in bytes, of the revision's side-data chunk.
-
- [10] data compression mode:
- two bits that detail the way the data chunk is compressed on disk.
- (see "COMP_MODE_*" constants for details). For revlog version 0 and
- 1 this will always be COMP_MODE_INLINE.
-
- [11] side-data compression mode:
- two bits that detail the way the sidedata chunk is compressed on disk.
- (see "COMP_MODE_*" constants for details)
- """
-
- _flagserrorclass = error.RevlogError
-
- def __init__(
- self,
- opener,
- target,
- radix,
- postfix=None, # only exist for `tmpcensored` now
- checkambig=False,
- mmaplargeindex=False,
- censorable=False,
- upperboundcomp=None,
- persistentnodemap=False,
- concurrencychecker=None,
- trypending=False,
- ):
- """
- create a revlog object
-
- opener is a function that abstracts the file opening operation
- and can be used to implement COW semantics or the like.
-
- `target`: a (KIND, ID) tuple that identify the content stored in
- this revlog. It help the rest of the code to understand what the revlog
- is about without having to resort to heuristic and index filename
- analysis. Note: that this must be reliably be set by normal code, but
- that test, debug, or performance measurement code might not set this to
- accurate value.
- """
- self.upperboundcomp = upperboundcomp
-
- self.radix = radix
-
- self._docket_file = None
- self._indexfile = None
- self._datafile = None
- self._sidedatafile = None
- self._nodemap_file = None
- self.postfix = postfix
- self._trypending = trypending
- self.opener = opener
- if persistentnodemap:
- self._nodemap_file = nodemaputil.get_nodemap_file(self)
+def v1_censor(rl, tr, censornode, tombstone=b''):
+ """censors a revision in a "version 1" revlog"""
+ assert rl._format_version == constants.REVLOGV1, rl._format_version
- assert target[0] in ALL_KINDS
- assert len(target) == 2
- self.target = target
- # When True, indexfile is opened with checkambig=True at writing, to
- # avoid file stat ambiguity.
- self._checkambig = checkambig
- self._mmaplargeindex = mmaplargeindex
- self._censorable = censorable
- # 3-tuple of (node, rev, text) for a raw revision.
- self._revisioncache = None
- # Maps rev to chain base rev.
- self._chainbasecache = util.lrucachedict(100)
- # 2-tuple of (offset, data) of raw data from the revlog at an offset.
- self._chunkcache = (0, b'')
- # How much data to read and cache into the raw revlog data cache.
- self._chunkcachesize = 65536
- self._maxchainlen = None
- self._deltabothparents = True
- self.index = None
- self._docket = None
- self._nodemap_docket = None
- # Mapping of partial identifiers to full nodes.
- self._pcache = {}
- # Mapping of revision integer to full node.
- self._compengine = b'zlib'
- self._compengineopts = {}
- self._maxdeltachainspan = -1
- self._withsparseread = False
- self._sparserevlog = False
- self.hassidedata = False
- self._srdensitythreshold = 0.50
- self._srmingapsize = 262144
-
- # Make copy of flag processors so each revlog instance can support
- # custom flags.
- self._flagprocessors = dict(flagutil.flagprocessors)
-
- # 3-tuple of file handles being used for active writing.
- self._writinghandles = None
- # prevent nesting of addgroup
- self._adding_group = None
-
- self._loadindex()
-
- self._concurrencychecker = concurrencychecker
-
- def _init_opts(self):
- """process options (from above/config) to setup associated default revlog mode
-
- These values might be affected when actually reading on disk information.
-
- The relevant values are returned for use in _loadindex().
-
- * newversionflags:
- version header to use if we need to create a new revlog
-
- * mmapindexthreshold:
- minimal index size for start to use mmap
-
- * force_nodemap:
- force the usage of a "development" version of the nodemap code
- """
- mmapindexthreshold = None
- opts = self.opener.options
-
- if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
- new_header = CHANGELOGV2
- elif b'revlogv2' in opts:
- new_header = REVLOGV2
- elif b'revlogv1' in opts:
- new_header = REVLOGV1 | FLAG_INLINE_DATA
- if b'generaldelta' in opts:
- new_header |= FLAG_GENERALDELTA
- elif b'revlogv0' in self.opener.options:
- new_header = REVLOGV0
- else:
- new_header = REVLOG_DEFAULT_VERSION
-
- if b'chunkcachesize' in opts:
- self._chunkcachesize = opts[b'chunkcachesize']
- if b'maxchainlen' in opts:
- self._maxchainlen = opts[b'maxchainlen']
- if b'deltabothparents' in opts:
- self._deltabothparents = opts[b'deltabothparents']
- self._lazydelta = bool(opts.get(b'lazydelta', True))
- self._lazydeltabase = False
- if self._lazydelta:
- self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
- if b'compengine' in opts:
- self._compengine = opts[b'compengine']
- if b'zlib.level' in opts:
- self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
- if b'zstd.level' in opts:
- self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
- if b'maxdeltachainspan' in opts:
- self._maxdeltachainspan = opts[b'maxdeltachainspan']
- if self._mmaplargeindex and b'mmapindexthreshold' in opts:
- mmapindexthreshold = opts[b'mmapindexthreshold']
- self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
- withsparseread = bool(opts.get(b'with-sparse-read', False))
- # sparse-revlog forces sparse-read
- self._withsparseread = self._sparserevlog or withsparseread
- if b'sparse-read-density-threshold' in opts:
- self._srdensitythreshold = opts[b'sparse-read-density-threshold']
- if b'sparse-read-min-gap-size' in opts:
- self._srmingapsize = opts[b'sparse-read-min-gap-size']
- if opts.get(b'enableellipsis'):
- self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
-
- # revlog v0 doesn't have flag processors
- for flag, processor in pycompat.iteritems(
- opts.get(b'flagprocessors', {})
- ):
- flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
-
- if self._chunkcachesize <= 0:
- raise error.RevlogError(
- _(b'revlog chunk cache size %r is not greater than 0')
- % self._chunkcachesize
- )
- elif self._chunkcachesize & (self._chunkcachesize - 1):
- raise error.RevlogError(
- _(b'revlog chunk cache size %r is not a power of 2')
- % self._chunkcachesize
- )
- force_nodemap = opts.get(b'devel-force-nodemap', False)
- return new_header, mmapindexthreshold, force_nodemap
+ # avoid cycle
+ from .. import revlog
- def _get_data(self, filepath, mmap_threshold, size=None):
- """return a file content with or without mmap
-
- If the file is missing return the empty string"""
- try:
- with self.opener(filepath) as fp:
- if mmap_threshold is not None:
- file_size = self.opener.fstat(fp).st_size
- if file_size >= mmap_threshold:
- if size is not None:
- # avoid potentiel mmap crash
- size = min(file_size, size)
- # TODO: should .close() to release resources without
- # relying on Python GC
- if size is None:
- return util.buffer(util.mmapread(fp))
- else:
- return util.buffer(util.mmapread(fp, size))
- if size is None:
- return fp.read()
- else:
- return fp.read(size)
- except IOError as inst:
- if inst.errno != errno.ENOENT:
- raise
- return b''
-
- def _loadindex(self):
-
- new_header, mmapindexthreshold, force_nodemap = self._init_opts()
-
- if self.postfix is not None:
- entry_point = b'%s.i.%s' % (self.radix, self.postfix)
- elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
- entry_point = b'%s.i.a' % self.radix
- else:
- entry_point = b'%s.i' % self.radix
-
- entry_data = b''
- self._initempty = True
- entry_data = self._get_data(entry_point, mmapindexthreshold)
- if len(entry_data) > 0:
- header = INDEX_HEADER.unpack(entry_data[:4])[0]
- self._initempty = False
- else:
- header = new_header
-
- self._format_flags = header & ~0xFFFF
- self._format_version = header & 0xFFFF
-
- supported_flags = SUPPORTED_FLAGS.get(self._format_version)
- if supported_flags is None:
- msg = _(b'unknown version (%d) in revlog %s')
- msg %= (self._format_version, self.display_id)
- raise error.RevlogError(msg)
- elif self._format_flags & ~supported_flags:
- msg = _(b'unknown flags (%#04x) in version %d revlog %s')
- display_flag = self._format_flags >> 16
- msg %= (display_flag, self._format_version, self.display_id)
- raise error.RevlogError(msg)
-
- features = FEATURES_BY_VERSION[self._format_version]
- self._inline = features[b'inline'](self._format_flags)
- self._generaldelta = features[b'generaldelta'](self._format_flags)
- self.hassidedata = features[b'sidedata']
-
- if not features[b'docket']:
- self._indexfile = entry_point
- index_data = entry_data
- else:
- self._docket_file = entry_point
- if self._initempty:
- self._docket = docketutil.default_docket(self, header)
- else:
- self._docket = docketutil.parse_docket(
- self, entry_data, use_pending=self._trypending
- )
- self._indexfile = self._docket.index_filepath()
- index_data = b''
- index_size = self._docket.index_end
- if index_size > 0:
- index_data = self._get_data(
- self._indexfile, mmapindexthreshold, size=index_size
- )
- if len(index_data) < index_size:
- msg = _(b'too few index data for %s: got %d, expected %d')
- msg %= (self.display_id, len(index_data), index_size)
- raise error.RevlogError(msg)
-
- self._inline = False
- # generaldelta implied by version 2 revlogs.
- self._generaldelta = True
- # the logic for persistent nodemap will be dealt with within the
- # main docket, so disable it for now.
- self._nodemap_file = None
-
- if self._docket is not None:
- self._datafile = self._docket.data_filepath()
- self._sidedatafile = self._docket.sidedata_filepath()
- elif self.postfix is None:
- self._datafile = b'%s.d' % self.radix
- else:
- self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
-
- self.nodeconstants = sha1nodeconstants
- self.nullid = self.nodeconstants.nullid
-
- # sparse-revlog can't be on without general-delta (issue6056)
- if not self._generaldelta:
- self._sparserevlog = False
-
- self._storedeltachains = True
+ censorrev = rl.rev(censornode)
+ tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
- devel_nodemap = (
- self._nodemap_file
- and force_nodemap
- and parse_index_v1_nodemap is not None
- )
-
- use_rust_index = False
- if rustrevlog is not None:
- if self._nodemap_file is not None:
- use_rust_index = True
- else:
- use_rust_index = self.opener.options.get(b'rust.index')
-
- self._parse_index = parse_index_v1
- if self._format_version == REVLOGV0:
- self._parse_index = revlogv0.parse_index_v0
- elif self._format_version == REVLOGV2:
- self._parse_index = parse_index_v2
- elif self._format_version == CHANGELOGV2:
- self._parse_index = parse_index_cl_v2
- elif devel_nodemap:
- self._parse_index = parse_index_v1_nodemap
- elif use_rust_index:
- self._parse_index = parse_index_v1_mixed
- try:
- d = self._parse_index(index_data, self._inline)
- index, _chunkcache = d
- use_nodemap = (
- not self._inline
- and self._nodemap_file is not None
- and util.safehasattr(index, 'update_nodemap_data')
- )
- if use_nodemap:
- nodemap_data = nodemaputil.persisted_data(self)
- if nodemap_data is not None:
- docket = nodemap_data[0]
- if (
- len(d[0]) > docket.tip_rev
- and d[0][docket.tip_rev][7] == docket.tip_node
- ):
- # no changelog tampering
- self._nodemap_docket = docket
- index.update_nodemap_data(*nodemap_data)
- except (ValueError, IndexError):
- raise error.RevlogError(
- _(b"index %s is corrupted") % self.display_id
- )
- self.index, self._chunkcache = d
- if not self._chunkcache:
- self._chunkclear()
- # revnum -> (chain-length, sum-delta-length)
- self._chaininfocache = util.lrucachedict(500)
- # revlog header -> revlog compressor
- self._decompressors = {}
-
- @util.propertycache
- def revlog_kind(self):
- return self.target[0]
-
- @util.propertycache
- def display_id(self):
- """The public facing "ID" of the revlog that we use in message"""
- # Maybe we should build a user facing representation of
- # revlog.target instead of using `self.radix`
- return self.radix
-
- def _get_decompressor(self, t):
- try:
- compressor = self._decompressors[t]
- except KeyError:
- try:
- engine = util.compengines.forrevlogheader(t)
- compressor = engine.revlogcompressor(self._compengineopts)
- self._decompressors[t] = compressor
- except KeyError:
- raise error.RevlogError(
- _(b'unknown compression type %s') % binascii.hexlify(t)
- )
- return compressor
-
- @util.propertycache
- def _compressor(self):
- engine = util.compengines[self._compengine]
- return engine.revlogcompressor(self._compengineopts)
-
- @util.propertycache
- def _decompressor(self):
- """the default decompressor"""
- if self._docket is None:
- return None
- t = self._docket.default_compression_header
- c = self._get_decompressor(t)
- return c.decompress
-
- def _indexfp(self):
- """file object for the revlog's index file"""
- return self.opener(self._indexfile, mode=b"r")
-
- def __index_write_fp(self):
- # You should not use this directly and use `_writing` instead
- try:
- f = self.opener(
- self._indexfile, mode=b"r+", checkambig=self._checkambig
- )
- if self._docket is None:
- f.seek(0, os.SEEK_END)
- else:
- f.seek(self._docket.index_end, os.SEEK_SET)
- return f
- except IOError as inst:
- if inst.errno != errno.ENOENT:
- raise
- return self.opener(
- self._indexfile, mode=b"w+", checkambig=self._checkambig
- )
-
- def __index_new_fp(self):
- # You should not use this unless you are upgrading from inline revlog
- return self.opener(
- self._indexfile,
- mode=b"w",
- checkambig=self._checkambig,
- atomictemp=True,
+ if len(tombstone) > rl.rawsize(censorrev):
+ raise error.Abort(
+ _(b'censor tombstone must be no longer than censored data')
)
- def _datafp(self, mode=b'r'):
- """file object for the revlog's data file"""
- return self.opener(self._datafile, mode=mode)
-
- @contextlib.contextmanager
- def _datareadfp(self, existingfp=None):
- """file object suitable to read data"""
- # Use explicit file handle, if given.
- if existingfp is not None:
- yield existingfp
-
- # Use a file handle being actively used for writes, if available.
- # There is some danger to doing this because reads will seek the
- # file. However, _writeentry() performs a SEEK_END before all writes,
- # so we should be safe.
- elif self._writinghandles:
- if self._inline:
- yield self._writinghandles[0]
- else:
- yield self._writinghandles[1]
-
- # Otherwise open a new file handle.
- else:
- if self._inline:
- func = self._indexfp
- else:
- func = self._datafp
- with func() as fp:
- yield fp
-
- @contextlib.contextmanager
- def _sidedatareadfp(self):
- """file object suitable to read sidedata"""
- if self._writinghandles:
- yield self._writinghandles[2]
- else:
- with self.opener(self._sidedatafile) as fp:
- yield fp
-
- def tiprev(self):
- return len(self.index) - 1
-
- def tip(self):
- return self.node(self.tiprev())
-
- def __contains__(self, rev):
- return 0 <= rev < len(self)
-
- def __len__(self):
- return len(self.index)
-
- def __iter__(self):
- return iter(pycompat.xrange(len(self)))
-
- def revs(self, start=0, stop=None):
- """iterate over all rev in this revlog (from start to stop)"""
- return storageutil.iterrevs(len(self), start=start, stop=stop)
-
- @property
- def nodemap(self):
- msg = (
- b"revlog.nodemap is deprecated, "
- b"use revlog.index.[has_node|rev|get_rev]"
- )
- util.nouideprecwarn(msg, b'5.3', stacklevel=2)
- return self.index.nodemap
-
- @property
- def _nodecache(self):
- msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
- util.nouideprecwarn(msg, b'5.3', stacklevel=2)
- return self.index.nodemap
-
- def hasnode(self, node):
- try:
- self.rev(node)
- return True
- except KeyError:
- return False
-
- def candelta(self, baserev, rev):
- """whether two revisions (baserev, rev) can be delta-ed or not"""
- # Disable delta if either rev requires a content-changing flag
- # processor (ex. LFS). This is because such flag processor can alter
- # the rawtext content that the delta will be based on, and two clients
- # could have a same revlog node with different flags (i.e. different
- # rawtext contents) and the delta could be incompatible.
- if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
- self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
- ):
- return False
- return True
-
- def update_caches(self, transaction):
- if self._nodemap_file is not None:
- if transaction is None:
- nodemaputil.update_persistent_nodemap(self)
- else:
- nodemaputil.setup_persistent_nodemap(transaction, self)
-
- def clearcaches(self):
- self._revisioncache = None
- self._chainbasecache.clear()
- self._chunkcache = (0, b'')
- self._pcache = {}
- self._nodemap_docket = None
- self.index.clearcaches()
- # The python code is the one responsible for validating the docket, we
- # end up having to refresh it here.
- use_nodemap = (
- not self._inline
- and self._nodemap_file is not None
- and util.safehasattr(self.index, 'update_nodemap_data')
- )
- if use_nodemap:
- nodemap_data = nodemaputil.persisted_data(self)
- if nodemap_data is not None:
- self._nodemap_docket = nodemap_data[0]
- self.index.update_nodemap_data(*nodemap_data)
-
- def rev(self, node):
- try:
- return self.index.rev(node)
- except TypeError:
- raise
- except error.RevlogError:
- # parsers.c radix tree lookup failed
- if (
- node == self.nodeconstants.wdirid
- or node in self.nodeconstants.wdirfilenodeids
- ):
- raise error.WdirUnsupported
- raise error.LookupError(node, self.display_id, _(b'no node'))
-
- # Accessors for index entries.
-
- # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
- # are flags.
- def start(self, rev):
- return int(self.index[rev][0] >> 16)
-
- def sidedata_cut_off(self, rev):
- sd_cut_off = self.index[rev][8]
- if sd_cut_off != 0:
- return sd_cut_off
- # This is some annoying dance, because entries without sidedata
- # currently use 0 as their ofsset. (instead of previous-offset +
- # previous-size)
- #
- # We should reconsider this sidedata â 0 sidata_offset policy.
- # In the meantime, we need this.
- while 0 <= rev:
- e = self.index[rev]
- if e[9] != 0:
- return e[8] + e[9]
- rev -= 1
- return 0
-
- def flags(self, rev):
- return self.index[rev][0] & 0xFFFF
-
- def length(self, rev):
- return self.index[rev][1]
-
- def sidedata_length(self, rev):
- if not self.hassidedata:
- return 0
- return self.index[rev][9]
-
- def rawsize(self, rev):
- """return the length of the uncompressed text for a given revision"""
- l = self.index[rev][2]
- if l >= 0:
- return l
-
- t = self.rawdata(rev)
- return len(t)
-
- def size(self, rev):
- """length of non-raw text (processed by a "read" flag processor)"""
- # fast path: if no "read" flag processor could change the content,
- # size is rawsize. note: ELLIPSIS is known to not change the content.
- flags = self.flags(rev)
- if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
- return self.rawsize(rev)
-
- return len(self.revision(rev, raw=False))
-
- def chainbase(self, rev):
- base = self._chainbasecache.get(rev)
- if base is not None:
- return base
-
- index = self.index
- iterrev = rev
- base = index[iterrev][3]
- while base != iterrev:
- iterrev = base
- base = index[iterrev][3]
-
- self._chainbasecache[rev] = base
- return base
-
- def linkrev(self, rev):
- return self.index[rev][4]
-
- def parentrevs(self, rev):
- try:
- entry = self.index[rev]
- except IndexError:
- if rev == wdirrev:
- raise error.WdirUnsupported
- raise
- if entry[5] == nullrev:
- return entry[6], entry[5]
- else:
- return entry[5], entry[6]
-
- # fast parentrevs(rev) where rev isn't filtered
- _uncheckedparentrevs = parentrevs
-
- def node(self, rev):
- try:
- return self.index[rev][7]
- except IndexError:
- if rev == wdirrev:
- raise error.WdirUnsupported
- raise
-
- # Derived from index values.
-
- def end(self, rev):
- return self.start(rev) + self.length(rev)
-
- def parents(self, node):
- i = self.index
- d = i[self.rev(node)]
- # inline node() to avoid function call overhead
- if d[5] == self.nullid:
- return i[d[6]][7], i[d[5]][7]
- else:
- return i[d[5]][7], i[d[6]][7]
-
- def chainlen(self, rev):
- return self._chaininfo(rev)[0]
-
- def _chaininfo(self, rev):
- chaininfocache = self._chaininfocache
- if rev in chaininfocache:
- return chaininfocache[rev]
- index = self.index
- generaldelta = self._generaldelta
- iterrev = rev
- e = index[iterrev]
- clen = 0
- compresseddeltalen = 0
- while iterrev != e[3]:
- clen += 1
- compresseddeltalen += e[1]
- if generaldelta:
- iterrev = e[3]
- else:
- iterrev -= 1
- if iterrev in chaininfocache:
- t = chaininfocache[iterrev]
- clen += t[0]
- compresseddeltalen += t[1]
- break
- e = index[iterrev]
- else:
- # Add text length of base since decompressing that also takes
- # work. For cache hits the length is already included.
- compresseddeltalen += e[1]
- r = (clen, compresseddeltalen)
- chaininfocache[rev] = r
- return r
-
- def _deltachain(self, rev, stoprev=None):
- """Obtain the delta chain for a revision.
-
- ``stoprev`` specifies a revision to stop at. If not specified, we
- stop at the base of the chain.
-
- Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
- revs in ascending order and ``stopped`` is a bool indicating whether
- ``stoprev`` was hit.
- """
- # Try C implementation.
- try:
- return self.index.deltachain(rev, stoprev, self._generaldelta)
- except AttributeError:
- pass
-
- chain = []
-
- # Alias to prevent attribute lookup in tight loop.
- index = self.index
- generaldelta = self._generaldelta
-
- iterrev = rev
- e = index[iterrev]
- while iterrev != e[3] and iterrev != stoprev:
- chain.append(iterrev)
- if generaldelta:
- iterrev = e[3]
- else:
- iterrev -= 1
- e = index[iterrev]
-
- if iterrev == stoprev:
- stopped = True
- else:
- chain.append(iterrev)
- stopped = False
-
- chain.reverse()
- return chain, stopped
-
- def ancestors(self, revs, stoprev=0, inclusive=False):
- """Generate the ancestors of 'revs' in reverse revision order.
- Does not generate revs lower than stoprev.
-
- See the documentation for ancestor.lazyancestors for more details."""
-
- # first, make sure start revisions aren't filtered
- revs = list(revs)
- checkrev = self.node
- for r in revs:
- checkrev(r)
- # and we're sure ancestors aren't filtered as well
-
- if rustancestor is not None and self.index.rust_ext_compat:
- lazyancestors = rustancestor.LazyAncestors
- arg = self.index
- else:
- lazyancestors = ancestor.lazyancestors
- arg = self._uncheckedparentrevs
- return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
-
- def descendants(self, revs):
- return dagop.descendantrevs(revs, self.revs, self.parentrevs)
-
- def findcommonmissing(self, common=None, heads=None):
- """Return a tuple of the ancestors of common and the ancestors of heads
- that are not ancestors of common. In revset terminology, we return the
- tuple:
-
- ::common, (::heads) - (::common)
-
- The list is sorted by revision number, meaning it is
- topologically sorted.
-
- 'heads' and 'common' are both lists of node IDs. If heads is
- not supplied, uses all of the revlog's heads. If common is not
- supplied, uses nullid."""
- if common is None:
- common = [self.nullid]
- if heads is None:
- heads = self.heads()
-
- common = [self.rev(n) for n in common]
- heads = [self.rev(n) for n in heads]
-
- # we want the ancestors, but inclusive
- class lazyset(object):
- def __init__(self, lazyvalues):
- self.addedvalues = set()
- self.lazyvalues = lazyvalues
-
- def __contains__(self, value):
- return value in self.addedvalues or value in self.lazyvalues
-
- def __iter__(self):
- added = self.addedvalues
- for r in added:
- yield r
- for r in self.lazyvalues:
- if not r in added:
- yield r
-
- def add(self, value):
- self.addedvalues.add(value)
-
- def update(self, values):
- self.addedvalues.update(values)
-
- has = lazyset(self.ancestors(common))
- has.add(nullrev)
- has.update(common)
-
- # take all ancestors from heads that aren't in has
- missing = set()
- visit = collections.deque(r for r in heads if r not in has)
- while visit:
- r = visit.popleft()
- if r in missing:
- continue
- else:
- missing.add(r)
- for p in self.parentrevs(r):
- if p not in has:
- visit.append(p)
- missing = list(missing)
- missing.sort()
- return has, [self.node(miss) for miss in missing]
-
- def incrementalmissingrevs(self, common=None):
- """Return an object that can be used to incrementally compute the
- revision numbers of the ancestors of arbitrary sets that are not
- ancestors of common. This is an ancestor.incrementalmissingancestors
- object.
+ # Rewriting the revlog in place is hard. Our strategy for censoring is
+ # to create a new revlog, copy all revisions to it, then replace the
+ # revlogs on transaction close.
+ #
+ # This is a bit dangerous. We could easily have a mismatch of state.
+ newrl = revlog.revlog(
+ rl.opener,
+ target=rl.target,
+ radix=rl.radix,
+ postfix=b'tmpcensored',
+ censorable=True,
+ )
+ newrl._format_version = rl._format_version
+ newrl._format_flags = rl._format_flags
+ newrl._generaldelta = rl._generaldelta
+ newrl._parse_index = rl._parse_index
- 'common' is a list of revision numbers. If common is not supplied, uses
- nullrev.
- """
- if common is None:
- common = [nullrev]
-
- if rustancestor is not None and self.index.rust_ext_compat:
- return rustancestor.MissingAncestors(self.index, common)
- return ancestor.incrementalmissingancestors(self.parentrevs, common)
-
- def findmissingrevs(self, common=None, heads=None):
- """Return the revision numbers of the ancestors of heads that
- are not ancestors of common.
-
- More specifically, return a list of revision numbers corresponding to
- nodes N such that every N satisfies the following constraints:
-
- 1. N is an ancestor of some node in 'heads'
- 2. N is not an ancestor of any node in 'common'
-
- The list is sorted by revision number, meaning it is
- topologically sorted.
-
- 'heads' and 'common' are both lists of revision numbers. If heads is
- not supplied, uses all of the revlog's heads. If common is not
- supplied, uses nullid."""
- if common is None:
- common = [nullrev]
- if heads is None:
- heads = self.headrevs()
-
- inc = self.incrementalmissingrevs(common=common)
- return inc.missingancestors(heads)
-
- def findmissing(self, common=None, heads=None):
- """Return the ancestors of heads that are not ancestors of common.
-
- More specifically, return a list of nodes N such that every N
- satisfies the following constraints:
-
- 1. N is an ancestor of some node in 'heads'
- 2. N is not an ancestor of any node in 'common'
-
- The list is sorted by revision number, meaning it is
- topologically sorted.
-
- 'heads' and 'common' are both lists of node IDs. If heads is
- not supplied, uses all of the revlog's heads. If common is not
- supplied, uses nullid."""
- if common is None:
- common = [self.nullid]
- if heads is None:
- heads = self.heads()
-
- common = [self.rev(n) for n in common]
- heads = [self.rev(n) for n in heads]
-
- inc = self.incrementalmissingrevs(common=common)
- return [self.node(r) for r in inc.missingancestors(heads)]
-
- def nodesbetween(self, roots=None, heads=None):
- """Return a topological path from 'roots' to 'heads'.
-
- Return a tuple (nodes, outroots, outheads) where 'nodes' is a
- topologically sorted list of all nodes N that satisfy both of
- these constraints:
-
- 1. N is a descendant of some node in 'roots'
- 2. N is an ancestor of some node in 'heads'
-
- Every node is considered to be both a descendant and an ancestor
- of itself, so every reachable node in 'roots' and 'heads' will be
- included in 'nodes'.
-
- 'outroots' is the list of reachable nodes in 'roots', i.e., the
- subset of 'roots' that is returned in 'nodes'. Likewise,
- 'outheads' is the subset of 'heads' that is also in 'nodes'.
-
- 'roots' and 'heads' are both lists of node IDs. If 'roots' is
- unspecified, uses nullid as the only root. If 'heads' is
- unspecified, uses list of all of the revlog's heads."""
- nonodes = ([], [], [])
- if roots is not None:
- roots = list(roots)
- if not roots:
- return nonodes
- lowestrev = min([self.rev(n) for n in roots])
- else:
- roots = [self.nullid] # Everybody's a descendant of nullid
- lowestrev = nullrev
- if (lowestrev == nullrev) and (heads is None):
- # We want _all_ the nodes!
- return (
- [self.node(r) for r in self],
- [self.nullid],
- list(self.heads()),
- )
- if heads is None:
- # All nodes are ancestors, so the latest ancestor is the last
- # node.
- highestrev = len(self) - 1
- # Set ancestors to None to signal that every node is an ancestor.
- ancestors = None
- # Set heads to an empty dictionary for later discovery of heads
- heads = {}
- else:
- heads = list(heads)
- if not heads:
- return nonodes
- ancestors = set()
- # Turn heads into a dictionary so we can remove 'fake' heads.
- # Also, later we will be using it to filter out the heads we can't
- # find from roots.
- heads = dict.fromkeys(heads, False)
- # Start at the top and keep marking parents until we're done.
- nodestotag = set(heads)
- # Remember where the top was so we can use it as a limit later.
- highestrev = max([self.rev(n) for n in nodestotag])
- while nodestotag:
- # grab a node to tag
- n = nodestotag.pop()
- # Never tag nullid
- if n == self.nullid:
- continue
- # A node's revision number represents its place in a
- # topologically sorted list of nodes.
- r = self.rev(n)
- if r >= lowestrev:
- if n not in ancestors:
- # If we are possibly a descendant of one of the roots
- # and we haven't already been marked as an ancestor
- ancestors.add(n) # Mark as ancestor
- # Add non-nullid parents to list of nodes to tag.
- nodestotag.update(
- [p for p in self.parents(n) if p != self.nullid]
- )
- elif n in heads: # We've seen it before, is it a fake head?
- # So it is, real heads should not be the ancestors of
- # any other heads.
- heads.pop(n)
- if not ancestors:
- return nonodes
- # Now that we have our set of ancestors, we want to remove any
- # roots that are not ancestors.
-
- # If one of the roots was nullid, everything is included anyway.
- if lowestrev > nullrev:
- # But, since we weren't, let's recompute the lowest rev to not
- # include roots that aren't ancestors.
+ for rev in rl.revs():
+ node = rl.node(rev)
+ p1, p2 = rl.parents(node)
- # Filter out roots that aren't ancestors of heads
- roots = [root for root in roots if root in ancestors]
- # Recompute the lowest revision
- if roots:
- lowestrev = min([self.rev(root) for root in roots])
- else:
- # No more roots? Return empty list
- return nonodes
- else:
- # We are descending from nullid, and don't need to care about
- # any other roots.
- lowestrev = nullrev
- roots = [self.nullid]
- # Transform our roots list into a set.
- descendants = set(roots)
- # Also, keep the original roots so we can filter out roots that aren't
- # 'real' roots (i.e. are descended from other roots).
- roots = descendants.copy()
- # Our topologically sorted list of output nodes.
- orderedout = []
- # Don't start at nullid since we don't want nullid in our output list,
- # and if nullid shows up in descendants, empty parents will look like
- # they're descendants.
- for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
- n = self.node(r)
- isdescendant = False
- if lowestrev == nullrev: # Everybody is a descendant of nullid
- isdescendant = True
- elif n in descendants:
- # n is already a descendant
- isdescendant = True
- # This check only needs to be done here because all the roots
- # will start being marked is descendants before the loop.
- if n in roots:
- # If n was a root, check if it's a 'real' root.
- p = tuple(self.parents(n))
- # If any of its parents are descendants, it's not a root.
- if (p[0] in descendants) or (p[1] in descendants):
- roots.remove(n)
- else:
- p = tuple(self.parents(n))
- # A node is a descendant if either of its parents are
- # descendants. (We seeded the dependents list with the roots
- # up there, remember?)
- if (p[0] in descendants) or (p[1] in descendants):
- descendants.add(n)
- isdescendant = True
- if isdescendant and ((ancestors is None) or (n in ancestors)):
- # Only include nodes that are both descendants and ancestors.
- orderedout.append(n)
- if (ancestors is not None) and (n in heads):
- # We're trying to figure out which heads are reachable
- # from roots.
- # Mark this head as having been reached
- heads[n] = True
- elif ancestors is None:
- # Otherwise, we're trying to discover the heads.
- # Assume this is a head because if it isn't, the next step
- # will eventually remove it.
- heads[n] = True
- # But, obviously its parents aren't.
- for p in self.parents(n):
- heads.pop(p, None)
- heads = [head for head, flag in pycompat.iteritems(heads) if flag]
- roots = list(roots)
- assert orderedout
- assert roots
- assert heads
- return (orderedout, roots, heads)
-
- def headrevs(self, revs=None):
- if revs is None:
- try:
- return self.index.headrevs()
- except AttributeError:
- return self._headrevs()
- if rustdagop is not None and self.index.rust_ext_compat:
- return rustdagop.headrevs(self.index, revs)
- return dagop.headrevs(revs, self._uncheckedparentrevs)
-
- def computephases(self, roots):
- return self.index.computephasesmapsets(roots)
-
- def _headrevs(self):
- count = len(self)
- if not count:
- return [nullrev]
- # we won't iter over filtered rev so nobody is a head at start
- ishead = [0] * (count + 1)
- index = self.index
- for r in self:
- ishead[r] = 1 # I may be an head
- e = index[r]
- ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
- return [r for r, val in enumerate(ishead) if val]
-
- def heads(self, start=None, stop=None):
- """return the list of all nodes that have no children
-
- if start is specified, only heads that are descendants of
- start will be returned
- if stop is specified, it will consider all the revs from stop
- as if they had no children
- """
- if start is None and stop is None:
- if not len(self):
- return [self.nullid]
- return [self.node(r) for r in self.headrevs()]
-
- if start is None:
- start = nullrev
- else:
- start = self.rev(start)
-
- stoprevs = {self.rev(n) for n in stop or []}
-
- revs = dagop.headrevssubset(
- self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
- )
-
- return [self.node(rev) for rev in revs]
-
- def children(self, node):
- """find the children of a given node"""
- c = []
- p = self.rev(node)
- for r in self.revs(start=p + 1):
- prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
- if prevs:
- for pr in prevs:
- if pr == p:
- c.append(self.node(r))
- elif p == nullrev:
- c.append(self.node(r))
- return c
-
- def commonancestorsheads(self, a, b):
- """calculate all the heads of the common ancestors of nodes a and b"""
- a, b = self.rev(a), self.rev(b)
- ancs = self._commonancestorsheads(a, b)
- return pycompat.maplist(self.node, ancs)
-
- def _commonancestorsheads(self, *revs):
- """calculate all the heads of the common ancestors of revs"""
- try:
- ancs = self.index.commonancestorsheads(*revs)
- except (AttributeError, OverflowError): # C implementation failed
- ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
- return ancs
-
- def isancestor(self, a, b):
- """return True if node a is an ancestor of node b
-
- A revision is considered an ancestor of itself."""
- a, b = self.rev(a), self.rev(b)
- return self.isancestorrev(a, b)
-
- def isancestorrev(self, a, b):
- """return True if revision a is an ancestor of revision b
-
- A revision is considered an ancestor of itself.
-
- The implementation of this is trivial but the use of
- reachableroots is not."""
- if a == nullrev:
- return True
- elif a == b:
- return True
- elif a > b:
- return False
- return bool(self.reachableroots(a, [b], [a], includepath=False))
-
- def reachableroots(self, minroot, heads, roots, includepath=False):
- """return (heads(::(<roots> and <roots>::<heads>)))
-
- If includepath is True, return (<roots>::<heads>)."""
- try:
- return self.index.reachableroots2(
- minroot, heads, roots, includepath
- )
- except AttributeError:
- return dagop._reachablerootspure(
- self.parentrevs, minroot, roots, heads, includepath
- )
-
- def ancestor(self, a, b):
- """calculate the "best" common ancestor of nodes a and b"""
-
- a, b = self.rev(a), self.rev(b)
- try:
- ancs = self.index.ancestors(a, b)
- except (AttributeError, OverflowError):
- ancs = ancestor.ancestors(self.parentrevs, a, b)
- if ancs:
- # choose a consistent winner when there's a tie
- return min(map(self.node, ancs))
- return self.nullid
-
- def _match(self, id):
- if isinstance(id, int):
- # rev
- return self.node(id)
- if len(id) == self.nodeconstants.nodelen:
- # possibly a binary node
- # odds of a binary node being all hex in ASCII are 1 in 10**25
- try:
- node = id
- self.rev(node) # quick search the index
- return node
- except error.LookupError:
- pass # may be partial hex id
- try:
- # str(rev)
- rev = int(id)
- if b"%d" % rev != id:
- raise ValueError
- if rev < 0:
- rev = len(self) + rev
- if rev < 0 or rev >= len(self):
- raise ValueError
- return self.node(rev)
- except (ValueError, OverflowError):
- pass
- if len(id) == 2 * self.nodeconstants.nodelen:
- try:
- # a full hex nodeid?
- node = bin(id)
- self.rev(node)
- return node
- except (TypeError, error.LookupError):
- pass
-
- def _partialmatch(self, id):
- # we don't care wdirfilenodeids as they should be always full hash
- maybewdir = self.nodeconstants.wdirhex.startswith(id)
- ambiguous = False
- try:
- partial = self.index.partialmatch(id)
- if partial and self.hasnode(partial):
- if maybewdir:
- # single 'ff...' match in radix tree, ambiguous with wdir
- ambiguous = True
- else:
- return partial
- elif maybewdir:
- # no 'ff...' match in radix tree, wdir identified
- raise error.WdirUnsupported
- else:
- return None
- except error.RevlogError:
- # parsers.c radix tree lookup gave multiple matches
- # fast path: for unfiltered changelog, radix tree is accurate
- if not getattr(self, 'filteredrevs', None):
- ambiguous = True
- # fall through to slow path that filters hidden revisions
- except (AttributeError, ValueError):
- # we are pure python, or key was too short to search radix tree
- pass
- if ambiguous:
- raise error.AmbiguousPrefixLookupError(
- id, self.display_id, _(b'ambiguous identifier')
+ if rev == censorrev:
+ newrl.addrawrevision(
+ tombstone,
+ tr,
+ rl.linkrev(censorrev),
+ p1,
+ p2,
+ censornode,
+ constants.REVIDX_ISCENSORED,
)
- if id in self._pcache:
- return self._pcache[id]
-
- if len(id) <= 40:
- try:
- # hex(node)[:...]
- l = len(id) // 2 # grab an even number of digits
- prefix = bin(id[: l * 2])
- nl = [e[7] for e in self.index if e[7].startswith(prefix)]
- nl = [
- n for n in nl if hex(n).startswith(id) and self.hasnode(n)
- ]
- if self.nodeconstants.nullhex.startswith(id):
- nl.append(self.nullid)
- if len(nl) > 0:
- if len(nl) == 1 and not maybewdir:
- self._pcache[id] = nl[0]
- return nl[0]
- raise error.AmbiguousPrefixLookupError(
- id, self.display_id, _(b'ambiguous identifier')
- )
- if maybewdir:
- raise error.WdirUnsupported
- return None
- except TypeError:
- pass
-
- def lookup(self, id):
- """locate a node based on:
- - revision number or str(revision number)
- - nodeid or subset of hex nodeid
- """
- n = self._match(id)
- if n is not None:
- return n
- n = self._partialmatch(id)
- if n:
- return n
-
- raise error.LookupError(id, self.display_id, _(b'no match found'))
-
- def shortest(self, node, minlength=1):
- """Find the shortest unambiguous prefix that matches node."""
-
- def isvalid(prefix):
- try:
- matchednode = self._partialmatch(prefix)
- except error.AmbiguousPrefixLookupError:
- return False
- except error.WdirUnsupported:
- # single 'ff...' match
- return True
- if matchednode is None:
- raise error.LookupError(node, self.display_id, _(b'no node'))
- return True
-
- def maybewdir(prefix):
- return all(c == b'f' for c in pycompat.iterbytestr(prefix))
-
- hexnode = hex(node)
-
- def disambiguate(hexnode, minlength):
- """Disambiguate against wdirid."""
- for length in range(minlength, len(hexnode) + 1):
- prefix = hexnode[:length]
- if not maybewdir(prefix):
- return prefix
-
- if not getattr(self, 'filteredrevs', None):
- try:
- length = max(self.index.shortest(node), minlength)
- return disambiguate(hexnode, length)
- except error.RevlogError:
- if node != self.nodeconstants.wdirid:
- raise error.LookupError(
- node, self.display_id, _(b'no node')
- )
- except AttributeError:
- # Fall through to pure code
- pass
-
- if node == self.nodeconstants.wdirid:
- for length in range(minlength, len(hexnode) + 1):
- prefix = hexnode[:length]
- if isvalid(prefix):
- return prefix
-
- for length in range(minlength, len(hexnode) + 1):
- prefix = hexnode[:length]
- if isvalid(prefix):
- return disambiguate(hexnode, length)
-
- def cmp(self, node, text):
- """compare text with a given file revision
-
- returns True if text is different than what is stored.
- """
- p1, p2 = self.parents(node)
- return storageutil.hashrevisionsha1(text, p1, p2) != node
-
- def _cachesegment(self, offset, data):
- """Add a segment to the revlog cache.
-
- Accepts an absolute offset and the data that is at that location.
- """
- o, d = self._chunkcache
- # try to add to existing cache
- if o + len(d) == offset and len(d) + len(data) < _chunksize:
- self._chunkcache = o, d + data
- else:
- self._chunkcache = offset, data
-
- def _readsegment(self, offset, length, df=None):
- """Load a segment of raw data from the revlog.
-
- Accepts an absolute offset, length to read, and an optional existing
- file handle to read from.
-
- If an existing file handle is passed, it will be seeked and the
- original seek position will NOT be restored.
-
- Returns a str or buffer of raw byte data.
-
- Raises if the requested number of bytes could not be read.
- """
- # Cache data both forward and backward around the requested
- # data, in a fixed size window. This helps speed up operations
- # involving reading the revlog backwards.
- cachesize = self._chunkcachesize
- realoffset = offset & ~(cachesize - 1)
- reallength = (
- (offset + length + cachesize) & ~(cachesize - 1)
- ) - realoffset
- with self._datareadfp(df) as df:
- df.seek(realoffset)
- d = df.read(reallength)
-
- self._cachesegment(realoffset, d)
- if offset != realoffset or reallength != length:
- startoffset = offset - realoffset
- if len(d) - startoffset < length:
- filename = self._indexfile if self._inline else self._datafile
- got = len(d) - startoffset
- m = PARTIAL_READ_MSG % (filename, length, offset, got)
- raise error.RevlogError(m)
- return util.buffer(d, startoffset, length)
-
- if len(d) < length:
- filename = self._indexfile if self._inline else self._datafile
- got = len(d) - startoffset
- m = PARTIAL_READ_MSG % (filename, length, offset, got)
- raise error.RevlogError(m)
-
- return d
-
- def _getsegment(self, offset, length, df=None):
- """Obtain a segment of raw data from the revlog.
-
- Accepts an absolute offset, length of bytes to obtain, and an
- optional file handle to the already-opened revlog. If the file
- handle is used, it's original seek position will not be preserved.
-
- Requests for data may be returned from a cache.
-
- Returns a str or a buffer instance of raw byte data.
- """
- o, d = self._chunkcache
- l = len(d)
-
- # is it in the cache?
- cachestart = offset - o
- cacheend = cachestart + length
- if cachestart >= 0 and cacheend <= l:
- if cachestart == 0 and cacheend == l:
- return d # avoid a copy
- return util.buffer(d, cachestart, cacheend - cachestart)
-
- return self._readsegment(offset, length, df=df)
-
- def _getsegmentforrevs(self, startrev, endrev, df=None):
- """Obtain a segment of raw data corresponding to a range of revisions.
-
- Accepts the start and end revisions and an optional already-open
- file handle to be used for reading. If the file handle is read, its
- seek position will not be preserved.
-
- Requests for data may be satisfied by a cache.
-
- Returns a 2-tuple of (offset, data) for the requested range of
- revisions. Offset is the integer offset from the beginning of the
- revlog and data is a str or buffer of the raw byte data.
-
- Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
- to determine where each revision's data begins and ends.
- """
- # Inlined self.start(startrev) & self.end(endrev) for perf reasons
- # (functions are expensive).
- index = self.index
- istart = index[startrev]
- start = int(istart[0] >> 16)
- if startrev == endrev:
- end = start + istart[1]
- else:
- iend = index[endrev]
- end = int(iend[0] >> 16) + iend[1]
-
- if self._inline:
- start += (startrev + 1) * self.index.entry_size
- end += (endrev + 1) * self.index.entry_size
- length = end - start
-
- return start, self._getsegment(start, length, df=df)
-
- def _chunk(self, rev, df=None):
- """Obtain a single decompressed chunk for a revision.
-
- Accepts an integer revision and an optional already-open file handle
- to be used for reading. If used, the seek position of the file will not
- be preserved.
-
- Returns a str holding uncompressed data for the requested revision.
- """
- compression_mode = self.index[rev][10]
- data = self._getsegmentforrevs(rev, rev, df=df)[1]
- if compression_mode == COMP_MODE_PLAIN:
- return data
- elif compression_mode == COMP_MODE_DEFAULT:
- return self._decompressor(data)
- elif compression_mode == COMP_MODE_INLINE:
- return self.decompress(data)
- else:
- msg = 'unknown compression mode %d'
- msg %= compression_mode
- raise error.RevlogError(msg)
-
- def _chunks(self, revs, df=None, targetsize=None):
- """Obtain decompressed chunks for the specified revisions.
-
- Accepts an iterable of numeric revisions that are assumed to be in
- ascending order. Also accepts an optional already-open file handle
- to be used for reading. If used, the seek position of the file will
- not be preserved.
-
- This function is similar to calling ``self._chunk()`` multiple times,
- but is faster.
-
- Returns a list with decompressed data for each requested revision.
- """
- if not revs:
- return []
- start = self.start
- length = self.length
- inline = self._inline
- iosize = self.index.entry_size
- buffer = util.buffer
-
- l = []
- ladd = l.append
-
- if not self._withsparseread:
- slicedchunks = (revs,)
- else:
- slicedchunks = deltautil.slicechunk(
- self, revs, targetsize=targetsize
- )
-
- for revschunk in slicedchunks:
- firstrev = revschunk[0]
- # Skip trailing revisions with empty diff
- for lastrev in revschunk[::-1]:
- if length(lastrev) != 0:
- break
-
- try:
- offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
- except OverflowError:
- # issue4215 - we can't cache a run of chunks greater than
- # 2G on Windows
- return [self._chunk(rev, df=df) for rev in revschunk]
-
- decomp = self.decompress
- # self._decompressor might be None, but will not be used in that case
- def_decomp = self._decompressor
- for rev in revschunk:
- chunkstart = start(rev)
- if inline:
- chunkstart += (rev + 1) * iosize
- chunklength = length(rev)
- comp_mode = self.index[rev][10]
- c = buffer(data, chunkstart - offset, chunklength)
- if comp_mode == COMP_MODE_PLAIN:
- ladd(c)
- elif comp_mode == COMP_MODE_INLINE:
- ladd(decomp(c))
- elif comp_mode == COMP_MODE_DEFAULT:
- ladd(def_decomp(c))
- else:
- msg = 'unknown compression mode %d'
- msg %= comp_mode
- raise error.RevlogError(msg)
-
- return l
-
- def _chunkclear(self):
- """Clear the raw chunk cache."""
- self._chunkcache = (0, b'')
-
- def deltaparent(self, rev):
- """return deltaparent of the given revision"""
- base = self.index[rev][3]
- if base == rev:
- return nullrev
- elif self._generaldelta:
- return base
- else:
- return rev - 1
-
- def issnapshot(self, rev):
- """tells whether rev is a snapshot"""
- if not self._sparserevlog:
- return self.deltaparent(rev) == nullrev
- elif util.safehasattr(self.index, b'issnapshot'):
- # directly assign the method to cache the testing and access
- self.issnapshot = self.index.issnapshot
- return self.issnapshot(rev)
- if rev == nullrev:
- return True
- entry = self.index[rev]
- base = entry[3]
- if base == rev:
- return True
- if base == nullrev:
- return True
- p1 = entry[5]
- p2 = entry[6]
- if base == p1 or base == p2:
- return False
- return self.issnapshot(base)
-
- def snapshotdepth(self, rev):
- """number of snapshot in the chain before this one"""
- if not self.issnapshot(rev):
- raise error.ProgrammingError(b'revision %d not a snapshot')
- return len(self._deltachain(rev)[0]) - 1
-
- def revdiff(self, rev1, rev2):
- """return or calculate a delta between two revisions
-
- The delta calculated is in binary form and is intended to be written to
- revlog data directly. So this function needs raw revision data.
- """
- if rev1 != nullrev and self.deltaparent(rev2) == rev1:
- return bytes(self._chunk(rev2))
-
- return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
-
- def _processflags(self, text, flags, operation, raw=False):
- """deprecated entry point to access flag processors"""
- msg = b'_processflag(...) use the specialized variant'
- util.nouideprecwarn(msg, b'5.2', stacklevel=2)
- if raw:
- return text, flagutil.processflagsraw(self, text, flags)
- elif operation == b'read':
- return flagutil.processflagsread(self, text, flags)
- else: # write operation
- return flagutil.processflagswrite(self, text, flags)
-
- def revision(self, nodeorrev, _df=None, raw=False):
- """return an uncompressed revision of a given node or revision
- number.
-
- _df - an existing file handle to read from. (internal-only)
- raw - an optional argument specifying if the revision data is to be
- treated as raw data when applying flag transforms. 'raw' should be set
- to True when generating changegroups or in debug commands.
- """
- if raw:
- msg = (
- b'revlog.revision(..., raw=True) is deprecated, '
- b'use revlog.rawdata(...)'
- )
- util.nouideprecwarn(msg, b'5.2', stacklevel=2)
- return self._revisiondata(nodeorrev, _df, raw=raw)
-
- def sidedata(self, nodeorrev, _df=None):
- """a map of extra data related to the changeset but not part of the hash
-
- This function currently return a dictionary. However, more advanced
- mapping object will likely be used in the future for a more
- efficient/lazy code.
- """
- # deal with <nodeorrev> argument type
- if isinstance(nodeorrev, int):
- rev = nodeorrev
- else:
- rev = self.rev(nodeorrev)
- return self._sidedata(rev)
+ if newrl.deltaparent(rev) != nullrev:
+ m = _(b'censored revision stored as delta; cannot censor')
+ h = _(
+ b'censoring of revlogs is not fully implemented;'
+ b' please report this bug'
+ )
+ raise error.Abort(m, hint=h)
+ continue
- def _revisiondata(self, nodeorrev, _df=None, raw=False):
- # deal with <nodeorrev> argument type
- if isinstance(nodeorrev, int):
- rev = nodeorrev
- node = self.node(rev)
- else:
- node = nodeorrev
- rev = None
-
- # fast path the special `nullid` rev
- if node == self.nullid:
- return b""
-
- # ``rawtext`` is the text as stored inside the revlog. Might be the
- # revision or might need to be processed to retrieve the revision.
- rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
-
- if raw and validated:
- # if we don't want to process the raw text and that raw
- # text is cached, we can exit early.
- return rawtext
- if rev is None:
- rev = self.rev(node)
- # the revlog's flag for this revision
- # (usually alter its state or content)
- flags = self.flags(rev)
-
- if validated and flags == REVIDX_DEFAULT_FLAGS:
- # no extra flags set, no flag processor runs, text = rawtext
- return rawtext
-
- if raw:
- validatehash = flagutil.processflagsraw(self, rawtext, flags)
- text = rawtext
+ if rl.iscensored(rev):
+ if rl.deltaparent(rev) != nullrev:
+ m = _(
+ b'cannot censor due to censored '
+ b'revision having delta stored'
+ )
+ raise error.Abort(m)
+ rawtext = rl._chunk(rev)
else:
- r = flagutil.processflagsread(self, rawtext, flags)
- text, validatehash = r
- if validatehash:
- self.checkhash(text, node, rev=rev)
- if not validated:
- self._revisioncache = (node, rev, rawtext)
-
- return text
-
- def _rawtext(self, node, rev, _df=None):
- """return the possibly unvalidated rawtext for a revision
-
- returns (rev, rawtext, validated)
- """
-
- # revision in the cache (could be useful to apply delta)
- cachedrev = None
- # An intermediate text to apply deltas to
- basetext = None
-
- # Check if we have the entry in cache
- # The cache entry looks like (node, rev, rawtext)
- if self._revisioncache:
- if self._revisioncache[0] == node:
- return (rev, self._revisioncache[2], True)
- cachedrev = self._revisioncache[1]
-
- if rev is None:
- rev = self.rev(node)
-
- chain, stopped = self._deltachain(rev, stoprev=cachedrev)
- if stopped:
- basetext = self._revisioncache[2]
-
- # drop cache to save memory, the caller is expected to
- # update self._revisioncache after validating the text
- self._revisioncache = None
-
- targetsize = None
- rawsize = self.index[rev][2]
- if 0 <= rawsize:
- targetsize = 4 * rawsize
-
- bins = self._chunks(chain, df=_df, targetsize=targetsize)
- if basetext is None:
- basetext = bytes(bins[0])
- bins = bins[1:]
-
- rawtext = mdiff.patches(basetext, bins)
- del basetext # let us have a chance to free memory early
- return (rev, rawtext, False)
-
- def _sidedata(self, rev):
- """Return the sidedata for a given revision number."""
- index_entry = self.index[rev]
- sidedata_offset = index_entry[8]
- sidedata_size = index_entry[9]
-
- if self._inline:
- sidedata_offset += self.index.entry_size * (1 + rev)
- if sidedata_size == 0:
- return {}
-
- # XXX this need caching, as we do for data
- with self._sidedatareadfp() as sdf:
- if self._docket.sidedata_end < sidedata_offset + sidedata_size:
- filename = self._sidedatafile
- end = self._docket.sidedata_end
- offset = sidedata_offset
- length = sidedata_size
- m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
- raise error.RevlogError(m)
-
- sdf.seek(sidedata_offset, os.SEEK_SET)
- comp_segment = sdf.read(sidedata_size)
-
- if len(comp_segment) < sidedata_size:
- filename = self._sidedatafile
- length = sidedata_size
- offset = sidedata_offset
- got = len(comp_segment)
- m = PARTIAL_READ_MSG % (filename, length, offset, got)
- raise error.RevlogError(m)
-
- comp = self.index[rev][11]
- if comp == COMP_MODE_PLAIN:
- segment = comp_segment
- elif comp == COMP_MODE_DEFAULT:
- segment = self._decompressor(comp_segment)
- elif comp == COMP_MODE_INLINE:
- segment = self.decompress(comp_segment)
- else:
- msg = 'unknown compression mode %d'
- msg %= comp
- raise error.RevlogError(msg)
-
- sidedata = sidedatautil.deserialize_sidedata(segment)
- return sidedata
-
- def rawdata(self, nodeorrev, _df=None):
- """return an uncompressed raw data of a given node or revision number.
-
- _df - an existing file handle to read from. (internal-only)
- """
- return self._revisiondata(nodeorrev, _df, raw=True)
-
- def hash(self, text, p1, p2):
- """Compute a node hash.
-
- Available as a function so that subclasses can replace the hash
- as needed.
- """
- return storageutil.hashrevisionsha1(text, p1, p2)
-
- def checkhash(self, text, node, p1=None, p2=None, rev=None):
- """Check node hash integrity.
-
- Available as a function so that subclasses can extend hash mismatch
- behaviors as needed.
- """
- try:
- if p1 is None and p2 is None:
- p1, p2 = self.parents(node)
- if node != self.hash(text, p1, p2):
- # Clear the revision cache on hash failure. The revision cache
- # only stores the raw revision and clearing the cache does have
- # the side-effect that we won't have a cache hit when the raw
- # revision data is accessed. But this case should be rare and
- # it is extra work to teach the cache about the hash
- # verification state.
- if self._revisioncache and self._revisioncache[0] == node:
- self._revisioncache = None
-
- revornode = rev
- if revornode is None:
- revornode = templatefilters.short(hex(node))
- raise error.RevlogError(
- _(b"integrity check failed on %s:%s")
- % (self.display_id, pycompat.bytestr(revornode))
- )
- except error.RevlogError:
- if self._censorable and storageutil.iscensoredtext(text):
- raise error.CensoredNodeError(self.display_id, node, text)
- raise
-
- def _enforceinlinesize(self, tr):
- """Check if the revlog is too big for inline and convert if so.
-
- This should be called after revisions are added to the revlog. If the
- revlog has grown too large to be an inline revlog, it will convert it
- to use multiple index and data files.
- """
- tiprev = len(self) - 1
- total_size = self.start(tiprev) + self.length(tiprev)
- if not self._inline or total_size < _maxinline:
- return
-
- troffset = tr.findoffset(self._indexfile)
- if troffset is None:
- raise error.RevlogError(
- _(b"%s not found in the transaction") % self._indexfile
- )
- trindex = 0
- tr.add(self._datafile, 0)
+ rawtext = rl.rawdata(rev)
- existing_handles = False
- if self._writinghandles is not None:
- existing_handles = True
- fp = self._writinghandles[0]
- fp.flush()
- fp.close()
- # We can't use the cached file handle after close(). So prevent
- # its usage.
- self._writinghandles = None
-
- new_dfh = self._datafp(b'w+')
- new_dfh.truncate(0) # drop any potentially existing data
- try:
- with self._indexfp() as read_ifh:
- for r in self:
- new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
- if troffset <= self.start(r) + r * self.index.entry_size:
- trindex = r
- new_dfh.flush()
-
- with self.__index_new_fp() as fp:
- self._format_flags &= ~FLAG_INLINE_DATA
- self._inline = False
- for i in self:
- e = self.index.entry_binary(i)
- if i == 0 and self._docket is None:
- header = self._format_flags | self._format_version
- header = self.index.pack_header(header)
- e = header + e
- fp.write(e)
- if self._docket is not None:
- self._docket.index_end = fp.tell()
-
- # There is a small transactional race here. If the rename of
- # the index fails, we should remove the datafile. It is more
- # important to ensure that the data file is not truncated
- # when the index is replaced as otherwise data is lost.
- tr.replace(self._datafile, self.start(trindex))
-
- # the temp file replace the real index when we exit the context
- # manager
-
- tr.replace(self._indexfile, trindex * self.index.entry_size)
- nodemaputil.setup_persistent_nodemap(tr, self)
- self._chunkclear()
-
- if existing_handles:
- # switched from inline to conventional reopen the index
- ifh = self.__index_write_fp()
- self._writinghandles = (ifh, new_dfh, None)
- new_dfh = None
- finally:
- if new_dfh is not None:
- new_dfh.close()
-
- def _nodeduplicatecallback(self, transaction, node):
- """called when trying to add a node already stored."""
-
- @contextlib.contextmanager
- def _writing(self, transaction):
- if self._trypending:
- msg = b'try to write in a `trypending` revlog: %s'
- msg %= self.display_id
- raise error.ProgrammingError(msg)
- if self._writinghandles is not None:
- yield
- else:
- ifh = dfh = sdfh = None
- try:
- r = len(self)
- # opening the data file.
- dsize = 0
- if r:
- dsize = self.end(r - 1)
- dfh = None
- if not self._inline:
- try:
- dfh = self._datafp(b"r+")
- if self._docket is None:
- dfh.seek(0, os.SEEK_END)
- else:
- dfh.seek(self._docket.data_end, os.SEEK_SET)
- except IOError as inst:
- if inst.errno != errno.ENOENT:
- raise
- dfh = self._datafp(b"w+")
- transaction.add(self._datafile, dsize)
- if self._sidedatafile is not None:
- try:
- sdfh = self.opener(self._sidedatafile, mode=b"r+")
- dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
- except IOError as inst:
- if inst.errno != errno.ENOENT:
- raise
- sdfh = self.opener(self._sidedatafile, mode=b"w+")
- transaction.add(
- self._sidedatafile, self._docket.sidedata_end
- )
-
- # opening the index file.
- isize = r * self.index.entry_size
- ifh = self.__index_write_fp()
- if self._inline:
- transaction.add(self._indexfile, dsize + isize)
- else:
- transaction.add(self._indexfile, isize)
- # exposing all file handle for writing.
- self._writinghandles = (ifh, dfh, sdfh)
- yield
- if self._docket is not None:
- self._write_docket(transaction)
- finally:
- self._writinghandles = None
- if dfh is not None:
- dfh.close()
- if sdfh is not None:
- dfh.close()
- # closing the index file last to avoid exposing referent to
- # potential unflushed data content.
- if ifh is not None:
- ifh.close()
-
- def _write_docket(self, transaction):
- """write the current docket on disk
-
- Exist as a method to help changelog to implement transaction logic
-
- We could also imagine using the same transaction logic for all revlog
- since docket are cheap."""
- self._docket.write(transaction)
-
- def addrevision(
- self,
- text,
- transaction,
- link,
- p1,
- p2,
- cachedelta=None,
- node=None,
- flags=REVIDX_DEFAULT_FLAGS,
- deltacomputer=None,
- sidedata=None,
- ):
- """add a revision to the log
-
- text - the revision data to add
- transaction - the transaction object used for rollback
- link - the linkrev data to add
- p1, p2 - the parent nodeids of the revision
- cachedelta - an optional precomputed delta
- node - nodeid of revision; typically node is not specified, and it is
- computed by default as hash(text, p1, p2), however subclasses might
- use different hashing method (and override checkhash() in such case)
- flags - the known flags to set on the revision
- deltacomputer - an optional deltacomputer instance shared between
- multiple calls
- """
- if link == nullrev:
- raise error.RevlogError(
- _(b"attempted to add linkrev -1 to %s") % self.display_id
- )
-
- if sidedata is None:
- sidedata = {}
- elif sidedata and not self.hassidedata:
- raise error.ProgrammingError(
- _(b"trying to add sidedata to a revlog who don't support them")
- )
-
- if flags:
- node = node or self.hash(text, p1, p2)
-
- rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
-
- # If the flag processor modifies the revision data, ignore any provided
- # cachedelta.
- if rawtext != text:
- cachedelta = None
-
- if len(rawtext) > _maxentrysize:
- raise error.RevlogError(
- _(
- b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
- )
- % (self.display_id, len(rawtext))
- )
-
- node = node or self.hash(rawtext, p1, p2)
- rev = self.index.get_rev(node)
- if rev is not None:
- return rev
-
- if validatehash:
- self.checkhash(rawtext, node, p1=p1, p2=p2)
-
- return self.addrawrevision(
- rawtext,
- transaction,
- link,
- p1,
- p2,
- node,
- flags,
- cachedelta=cachedelta,
- deltacomputer=deltacomputer,
- sidedata=sidedata,
+ newrl.addrawrevision(
+ rawtext, tr, rl.linkrev(rev), p1, p2, node, rl.flags(rev)
)
- def addrawrevision(
- self,
- rawtext,
- transaction,
- link,
- p1,
- p2,
- node,
- flags,
- cachedelta=None,
- deltacomputer=None,
- sidedata=None,
- ):
- """add a raw revision with known flags, node and parents
- useful when reusing a revision not stored in this revlog (ex: received
- over wire, or read from an external bundle).
- """
- with self._writing(transaction):
- return self._addrevision(
- node,
- rawtext,
- transaction,
- link,
- p1,
- p2,
- flags,
- cachedelta,
- deltacomputer=deltacomputer,
- sidedata=sidedata,
- )
-
- def compress(self, data):
- """Generate a possibly-compressed representation of data."""
- if not data:
- return b'', data
-
- compressed = self._compressor.compress(data)
-
- if compressed:
- # The revlog compressor added the header in the returned data.
- return b'', compressed
-
- if data[0:1] == b'\0':
- return b'', data
- return b'u', data
-
- def decompress(self, data):
- """Decompress a revlog chunk.
-
- The chunk is expected to begin with a header identifying the
- format type so it can be routed to an appropriate decompressor.
- """
- if not data:
- return data
-
- # Revlogs are read much more frequently than they are written and many
- # chunks only take microseconds to decompress, so performance is
- # important here.
- #
- # We can make a few assumptions about revlogs:
- #
- # 1) the majority of chunks will be compressed (as opposed to inline
- # raw data).
- # 2) decompressing *any* data will likely by at least 10x slower than
- # returning raw inline data.
- # 3) we want to prioritize common and officially supported compression
- # engines
- #
- # It follows that we want to optimize for "decompress compressed data
- # when encoded with common and officially supported compression engines"
- # case over "raw data" and "data encoded by less common or non-official
- # compression engines." That is why we have the inline lookup first
- # followed by the compengines lookup.
- #
- # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
- # compressed chunks. And this matters for changelog and manifest reads.
- t = data[0:1]
-
- if t == b'x':
- try:
- return _zlibdecompress(data)
- except zlib.error as e:
- raise error.RevlogError(
- _(b'revlog decompress error: %s')
- % stringutil.forcebytestr(e)
- )
- # '\0' is more common than 'u' so it goes first.
- elif t == b'\0':
- return data
- elif t == b'u':
- return util.buffer(data, 1)
-
- compressor = self._get_decompressor(t)
-
- return compressor.decompress(data)
-
- def _addrevision(
- self,
- node,
- rawtext,
- transaction,
- link,
- p1,
- p2,
- flags,
- cachedelta,
- alwayscache=False,
- deltacomputer=None,
- sidedata=None,
- ):
- """internal function to add revisions to the log
-
- see addrevision for argument descriptions.
-
- note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
-
- if "deltacomputer" is not provided or None, a defaultdeltacomputer will
- be used.
-
- invariants:
- - rawtext is optional (can be None); if not set, cachedelta must be set.
- if both are set, they must correspond to each other.
- """
- if node == self.nullid:
- raise error.RevlogError(
- _(b"%s: attempt to add null revision") % self.display_id
- )
- if (
- node == self.nodeconstants.wdirid
- or node in self.nodeconstants.wdirfilenodeids
- ):
- raise error.RevlogError(
- _(b"%s: attempt to add wdir revision") % self.display_id
- )
- if self._writinghandles is None:
- msg = b'adding revision outside `revlog._writing` context'
- raise error.ProgrammingError(msg)
-
- if self._inline:
- fh = self._writinghandles[0]
- else:
- fh = self._writinghandles[1]
-
- btext = [rawtext]
-
- curr = len(self)
- prev = curr - 1
-
- offset = self._get_data_offset(prev)
-
- if self._concurrencychecker:
- ifh, dfh, sdfh = self._writinghandles
- # XXX no checking for the sidedata file
- if self._inline:
- # offset is "as if" it were in the .d file, so we need to add on
- # the size of the entry metadata.
- self._concurrencychecker(
- ifh, self._indexfile, offset + curr * self.index.entry_size
- )
- else:
- # Entries in the .i are a consistent size.
- self._concurrencychecker(
- ifh, self._indexfile, curr * self.index.entry_size
- )
- self._concurrencychecker(dfh, self._datafile, offset)
-
- p1r, p2r = self.rev(p1), self.rev(p2)
-
- # full versions are inserted when the needed deltas
- # become comparable to the uncompressed text
- if rawtext is None:
- # need rawtext size, before changed by flag processors, which is
- # the non-raw size. use revlog explicitly to avoid filelog's extra
- # logic that might remove metadata size.
- textlen = mdiff.patchedsize(
- revlog.size(self, cachedelta[0]), cachedelta[1]
- )
- else:
- textlen = len(rawtext)
-
- if deltacomputer is None:
- deltacomputer = deltautil.deltacomputer(self)
-
- revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
-
- deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
-
- compression_mode = COMP_MODE_INLINE
- if self._docket is not None:
- h, d = deltainfo.data
- if not h and not d:
- # not data to store at all... declare them uncompressed
- compression_mode = COMP_MODE_PLAIN
- elif not h:
- t = d[0:1]
- if t == b'\0':
- compression_mode = COMP_MODE_PLAIN
- elif t == self._docket.default_compression_header:
- compression_mode = COMP_MODE_DEFAULT
- elif h == b'u':
- # we have a more efficient way to declare uncompressed
- h = b''
- compression_mode = COMP_MODE_PLAIN
- deltainfo = deltautil.drop_u_compression(deltainfo)
-
- sidedata_compression_mode = COMP_MODE_INLINE
- if sidedata and self.hassidedata:
- sidedata_compression_mode = COMP_MODE_PLAIN
- serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
- sidedata_offset = self._docket.sidedata_end
- h, comp_sidedata = self.compress(serialized_sidedata)
- if (
- h != b'u'
- and comp_sidedata[0:1] != b'\0'
- and len(comp_sidedata) < len(serialized_sidedata)
- ):
- assert not h
- if (
- comp_sidedata[0:1]
- == self._docket.default_compression_header
- ):
- sidedata_compression_mode = COMP_MODE_DEFAULT
- serialized_sidedata = comp_sidedata
- else:
- sidedata_compression_mode = COMP_MODE_INLINE
- serialized_sidedata = comp_sidedata
- else:
- serialized_sidedata = b""
- # Don't store the offset if the sidedata is empty, that way
- # we can easily detect empty sidedata and they will be no different
- # than ones we manually add.
- sidedata_offset = 0
-
- e = (
- offset_type(offset, flags),
- deltainfo.deltalen,
- textlen,
- deltainfo.base,
- link,
- p1r,
- p2r,
- node,
- sidedata_offset,
- len(serialized_sidedata),
- compression_mode,
- sidedata_compression_mode,
- )
-
- self.index.append(e)
- entry = self.index.entry_binary(curr)
- if curr == 0 and self._docket is None:
- header = self._format_flags | self._format_version
- header = self.index.pack_header(header)
- entry = header + entry
- self._writeentry(
- transaction,
- entry,
- deltainfo.data,
- link,
- offset,
- serialized_sidedata,
- sidedata_offset,
- )
-
- rawtext = btext[0]
-
- if alwayscache and rawtext is None:
- rawtext = deltacomputer.buildtext(revinfo, fh)
-
- if type(rawtext) == bytes: # only accept immutable objects
- self._revisioncache = (node, curr, rawtext)
- self._chainbasecache[curr] = deltainfo.chainbase
- return curr
-
- def _get_data_offset(self, prev):
- """Returns the current offset in the (in-transaction) data file.
- Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
- file to store that information: since sidedata can be rewritten to the
- end of the data file within a transaction, you can have cases where, for
- example, rev `n` does not have sidedata while rev `n - 1` does, leading
- to `n - 1`'s sidedata being written after `n`'s data.
-
- TODO cache this in a docket file before getting out of experimental."""
- if self._docket is None:
- return self.end(prev)
- else:
- return self._docket.data_end
-
- def _writeentry(
- self, transaction, entry, data, link, offset, sidedata, sidedata_offset
- ):
- # Files opened in a+ mode have inconsistent behavior on various
- # platforms. Windows requires that a file positioning call be made
- # when the file handle transitions between reads and writes. See
- # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
- # platforms, Python or the platform itself can be buggy. Some versions
- # of Solaris have been observed to not append at the end of the file
- # if the file was seeked to before the end. See issue4943 for more.
- #
- # We work around this issue by inserting a seek() before writing.
- # Note: This is likely not necessary on Python 3. However, because
- # the file handle is reused for reads and may be seeked there, we need
- # to be careful before changing this.
- if self._writinghandles is None:
- msg = b'adding revision outside `revlog._writing` context'
- raise error.ProgrammingError(msg)
- ifh, dfh, sdfh = self._writinghandles
- if self._docket is None:
- ifh.seek(0, os.SEEK_END)
- else:
- ifh.seek(self._docket.index_end, os.SEEK_SET)
- if dfh:
- if self._docket is None:
- dfh.seek(0, os.SEEK_END)
- else:
- dfh.seek(self._docket.data_end, os.SEEK_SET)
- if sdfh:
- sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
-
- curr = len(self) - 1
- if not self._inline:
- transaction.add(self._datafile, offset)
- if self._sidedatafile:
- transaction.add(self._sidedatafile, sidedata_offset)
- transaction.add(self._indexfile, curr * len(entry))
- if data[0]:
- dfh.write(data[0])
- dfh.write(data[1])
- if sidedata:
- sdfh.write(sidedata)
- ifh.write(entry)
- else:
- offset += curr * self.index.entry_size
- transaction.add(self._indexfile, offset)
- ifh.write(entry)
- ifh.write(data[0])
- ifh.write(data[1])
- assert not sidedata
- self._enforceinlinesize(transaction)
- if self._docket is not None:
- self._docket.index_end = self._writinghandles[0].tell()
- self._docket.data_end = self._writinghandles[1].tell()
- self._docket.sidedata_end = self._writinghandles[2].tell()
-
- nodemaputil.setup_persistent_nodemap(transaction, self)
-
- def addgroup(
- self,
- deltas,
- linkmapper,
- transaction,
- alwayscache=False,
- addrevisioncb=None,
- duplicaterevisioncb=None,
- ):
- """
- add a delta group
-
- given a set of deltas, add them to the revision log. the
- first delta is against its parent, which should be in our
- log, the rest are against the previous delta.
-
- If ``addrevisioncb`` is defined, it will be called with arguments of
- this revlog and the node that was added.
- """
-
- if self._adding_group:
- raise error.ProgrammingError(b'cannot nest addgroup() calls')
-
- self._adding_group = True
- empty = True
- try:
- with self._writing(transaction):
- deltacomputer = deltautil.deltacomputer(self)
- # loop through our set of deltas
- for data in deltas:
- (
- node,
- p1,
- p2,
- linknode,
- deltabase,
- delta,
- flags,
- sidedata,
- ) = data
- link = linkmapper(linknode)
- flags = flags or REVIDX_DEFAULT_FLAGS
-
- rev = self.index.get_rev(node)
- if rev is not None:
- # this can happen if two branches make the same change
- self._nodeduplicatecallback(transaction, rev)
- if duplicaterevisioncb:
- duplicaterevisioncb(self, rev)
- empty = False
- continue
-
- for p in (p1, p2):
- if not self.index.has_node(p):
- raise error.LookupError(
- p, self.radix, _(b'unknown parent')
- )
-
- if not self.index.has_node(deltabase):
- raise error.LookupError(
- deltabase, self.display_id, _(b'unknown delta base')
- )
-
- baserev = self.rev(deltabase)
-
- if baserev != nullrev and self.iscensored(baserev):
- # if base is censored, delta must be full replacement in a
- # single patch operation
- hlen = struct.calcsize(b">lll")
- oldlen = self.rawsize(baserev)
- newlen = len(delta) - hlen
- if delta[:hlen] != mdiff.replacediffheader(
- oldlen, newlen
- ):
- raise error.CensoredBaseError(
- self.display_id, self.node(baserev)
- )
-
- if not flags and self._peek_iscensored(baserev, delta):
- flags |= REVIDX_ISCENSORED
-
- # We assume consumers of addrevisioncb will want to retrieve
- # the added revision, which will require a call to
- # revision(). revision() will fast path if there is a cache
- # hit. So, we tell _addrevision() to always cache in this case.
- # We're only using addgroup() in the context of changegroup
- # generation so the revision data can always be handled as raw
- # by the flagprocessor.
- rev = self._addrevision(
- node,
- None,
- transaction,
- link,
- p1,
- p2,
- flags,
- (baserev, delta),
- alwayscache=alwayscache,
- deltacomputer=deltacomputer,
- sidedata=sidedata,
- )
-
- if addrevisioncb:
- addrevisioncb(self, rev)
- empty = False
- finally:
- self._adding_group = False
- return not empty
-
- def iscensored(self, rev):
- """Check if a file revision is censored."""
- if not self._censorable:
- return False
-
- return self.flags(rev) & REVIDX_ISCENSORED
-
- def _peek_iscensored(self, baserev, delta):
- """Quickly check if a delta produces a censored revision."""
- if not self._censorable:
- return False
-
- return storageutil.deltaiscensored(delta, baserev, self.rawsize)
-
- def getstrippoint(self, minlink):
- """find the minimum rev that must be stripped to strip the linkrev
-
- Returns a tuple containing the minimum rev and a set of all revs that
- have linkrevs that will be broken by this strip.
- """
- return storageutil.resolvestripinfo(
- minlink,
- len(self) - 1,
- self.headrevs(),
- self.linkrev,
- self.parentrevs,
- )
-
- def strip(self, minlink, transaction):
- """truncate the revlog on the first revision with a linkrev >= minlink
-
- This function is called when we're stripping revision minlink and
- its descendants from the repository.
-
- We have to remove all revisions with linkrev >= minlink, because
- the equivalent changelog revisions will be renumbered after the
- strip.
-
- So we truncate the revlog on the first of these revisions, and
- trust that the caller has saved the revisions that shouldn't be
- removed and that it'll re-add them after this truncation.
- """
- if len(self) == 0:
- return
-
- rev, _ = self.getstrippoint(minlink)
- if rev == len(self):
- return
-
- # first truncate the files on disk
- data_end = self.start(rev)
- if not self._inline:
- transaction.add(self._datafile, data_end)
- end = rev * self.index.entry_size
- else:
- end = data_end + (rev * self.index.entry_size)
-
- if self._sidedatafile:
- sidedata_end = self.sidedata_cut_off(rev)
- transaction.add(self._sidedatafile, sidedata_end)
-
- transaction.add(self._indexfile, end)
- if self._docket is not None:
- # XXX we could, leverage the docket while stripping. However it is
- # not powerfull enough at the time of this comment
- self._docket.index_end = end
- self._docket.data_end = data_end
- self._docket.sidedata_end = sidedata_end
- self._docket.write(transaction, stripping=True)
-
- # then reset internal state in memory to forget those revisions
- self._revisioncache = None
- self._chaininfocache = util.lrucachedict(500)
- self._chunkclear()
-
- del self.index[rev:-1]
-
- def checksize(self):
- """Check size of index and data files
-
- return a (dd, di) tuple.
- - dd: extra bytes for the "data" file
- - di: extra bytes for the "index" file
-
- A healthy revlog will return (0, 0).
- """
- expected = 0
- if len(self):
- expected = max(0, self.end(len(self) - 1))
-
- try:
- with self._datafp() as f:
- f.seek(0, io.SEEK_END)
- actual = f.tell()
- dd = actual - expected
- except IOError as inst:
- if inst.errno != errno.ENOENT:
- raise
- dd = 0
+ tr.addbackup(rl._indexfile, location=b'store')
+ if not rl._inline:
+ tr.addbackup(rl._datafile, location=b'store')
- try:
- f = self.opener(self._indexfile)
- f.seek(0, io.SEEK_END)
- actual = f.tell()
- f.close()
- s = self.index.entry_size
- i = max(0, actual // s)
- di = actual - (i * s)
- if self._inline:
- databytes = 0
- for r in self:
- databytes += max(0, self.length(r))
- dd = 0
- di = actual - len(self) * s - databytes
- except IOError as inst:
- if inst.errno != errno.ENOENT:
- raise
- di = 0
-
- return (dd, di)
-
- def files(self):
- res = [self._indexfile]
- if not self._inline:
- res.append(self._datafile)
- return res
-
- def emitrevisions(
- self,
- nodes,
- nodesorder=None,
- revisiondata=False,
- assumehaveparentrevisions=False,
- deltamode=repository.CG_DELTAMODE_STD,
- sidedata_helpers=None,
- ):
- if nodesorder not in (b'nodes', b'storage', b'linear', None):
- raise error.ProgrammingError(
- b'unhandled value for nodesorder: %s' % nodesorder
- )
-
- if nodesorder is None and not self._generaldelta:
- nodesorder = b'storage'
-
- if (
- not self._storedeltachains
- and deltamode != repository.CG_DELTAMODE_PREV
- ):
- deltamode = repository.CG_DELTAMODE_FULL
-
- return storageutil.emitrevisions(
- self,
- nodes,
- nodesorder,
- revlogrevisiondelta,
- deltaparentfn=self.deltaparent,
- candeltafn=self.candelta,
- rawsizefn=self.rawsize,
- revdifffn=self.revdiff,
- flagsfn=self.flags,
- deltamode=deltamode,
- revisiondata=revisiondata,
- assumehaveparentrevisions=assumehaveparentrevisions,
- sidedata_helpers=sidedata_helpers,
- )
-
- DELTAREUSEALWAYS = b'always'
- DELTAREUSESAMEREVS = b'samerevs'
- DELTAREUSENEVER = b'never'
-
- DELTAREUSEFULLADD = b'fulladd'
-
- DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
-
- def clone(
- self,
- tr,
- destrevlog,
- addrevisioncb=None,
- deltareuse=DELTAREUSESAMEREVS,
- forcedeltabothparents=None,
- sidedata_helpers=None,
- ):
- """Copy this revlog to another, possibly with format changes.
-
- The destination revlog will contain the same revisions and nodes.
- However, it may not be bit-for-bit identical due to e.g. delta encoding
- differences.
-
- The ``deltareuse`` argument control how deltas from the existing revlog
- are preserved in the destination revlog. The argument can have the
- following values:
-
- DELTAREUSEALWAYS
- Deltas will always be reused (if possible), even if the destination
- revlog would not select the same revisions for the delta. This is the
- fastest mode of operation.
- DELTAREUSESAMEREVS
- Deltas will be reused if the destination revlog would pick the same
- revisions for the delta. This mode strikes a balance between speed
- and optimization.
- DELTAREUSENEVER
- Deltas will never be reused. This is the slowest mode of execution.
- This mode can be used to recompute deltas (e.g. if the diff/delta
- algorithm changes).
- DELTAREUSEFULLADD
- Revision will be re-added as if their were new content. This is
- slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
- eg: large file detection and handling.
-
- Delta computation can be slow, so the choice of delta reuse policy can
- significantly affect run time.
-
- The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
- two extremes. Deltas will be reused if they are appropriate. But if the
- delta could choose a better revision, it will do so. This means if you
- are converting a non-generaldelta revlog to a generaldelta revlog,
- deltas will be recomputed if the delta's parent isn't a parent of the
- revision.
-
- In addition to the delta policy, the ``forcedeltabothparents``
- argument controls whether to force compute deltas against both parents
- for merges. By default, the current default is used.
-
- See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
- `sidedata_helpers`.
- """
- if deltareuse not in self.DELTAREUSEALL:
- raise ValueError(
- _(b'value for deltareuse invalid: %s') % deltareuse
- )
-
- if len(destrevlog):
- raise ValueError(_(b'destination revlog is not empty'))
-
- if getattr(self, 'filteredrevs', None):
- raise ValueError(_(b'source revlog has filtered revisions'))
- if getattr(destrevlog, 'filteredrevs', None):
- raise ValueError(_(b'destination revlog has filtered revisions'))
-
- # lazydelta and lazydeltabase controls whether to reuse a cached delta,
- # if possible.
- oldlazydelta = destrevlog._lazydelta
- oldlazydeltabase = destrevlog._lazydeltabase
- oldamd = destrevlog._deltabothparents
-
- try:
- if deltareuse == self.DELTAREUSEALWAYS:
- destrevlog._lazydeltabase = True
- destrevlog._lazydelta = True
- elif deltareuse == self.DELTAREUSESAMEREVS:
- destrevlog._lazydeltabase = False
- destrevlog._lazydelta = True
- elif deltareuse == self.DELTAREUSENEVER:
- destrevlog._lazydeltabase = False
- destrevlog._lazydelta = False
-
- destrevlog._deltabothparents = forcedeltabothparents or oldamd
-
- self._clone(
- tr,
- destrevlog,
- addrevisioncb,
- deltareuse,
- forcedeltabothparents,
- sidedata_helpers,
- )
-
- finally:
- destrevlog._lazydelta = oldlazydelta
- destrevlog._lazydeltabase = oldlazydeltabase
- destrevlog._deltabothparents = oldamd
-
- def _clone(
- self,
- tr,
- destrevlog,
- addrevisioncb,
- deltareuse,
- forcedeltabothparents,
- sidedata_helpers,
- ):
- """perform the core duty of `revlog.clone` after parameter processing"""
- deltacomputer = deltautil.deltacomputer(destrevlog)
- index = self.index
- for rev in self:
- entry = index[rev]
-
- # Some classes override linkrev to take filtered revs into
- # account. Use raw entry from index.
- flags = entry[0] & 0xFFFF
- linkrev = entry[4]
- p1 = index[entry[5]][7]
- p2 = index[entry[6]][7]
- node = entry[7]
-
- # (Possibly) reuse the delta from the revlog if allowed and
- # the revlog chunk is a delta.
- cachedelta = None
- rawtext = None
- if deltareuse == self.DELTAREUSEFULLADD:
- text = self._revisiondata(rev)
- sidedata = self.sidedata(rev)
-
- if sidedata_helpers is not None:
- (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
- self, sidedata_helpers, sidedata, rev
- )
- flags = flags | new_flags[0] & ~new_flags[1]
-
- destrevlog.addrevision(
- text,
- tr,
- linkrev,
- p1,
- p2,
- cachedelta=cachedelta,
- node=node,
- flags=flags,
- deltacomputer=deltacomputer,
- sidedata=sidedata,
- )
- else:
- if destrevlog._lazydelta:
- dp = self.deltaparent(rev)
- if dp != nullrev:
- cachedelta = (dp, bytes(self._chunk(rev)))
-
- sidedata = None
- if not cachedelta:
- rawtext = self._revisiondata(rev)
- sidedata = self.sidedata(rev)
- if sidedata is None:
- sidedata = self.sidedata(rev)
-
- if sidedata_helpers is not None:
- (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
- self, sidedata_helpers, sidedata, rev
- )
- flags = flags | new_flags[0] & ~new_flags[1]
-
- with destrevlog._writing(tr):
- destrevlog._addrevision(
- node,
- rawtext,
- tr,
- linkrev,
- p1,
- p2,
- flags,
- cachedelta,
- deltacomputer=deltacomputer,
- sidedata=sidedata,
- )
-
- if addrevisioncb:
- addrevisioncb(self, rev, node)
-
- def censorrevision(self, tr, censornode, tombstone=b''):
- if self._format_version == REVLOGV0:
- raise error.RevlogError(
- _(b'cannot censor with version %d revlogs')
- % self._format_version
- )
-
- censorrev = self.rev(censornode)
- tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
-
- if len(tombstone) > self.rawsize(censorrev):
- raise error.Abort(
- _(b'censor tombstone must be no longer than censored data')
- )
+ rl.opener.rename(newrl._indexfile, rl._indexfile)
+ if not rl._inline:
+ rl.opener.rename(newrl._datafile, rl._datafile)
- # Rewriting the revlog in place is hard. Our strategy for censoring is
- # to create a new revlog, copy all revisions to it, then replace the
- # revlogs on transaction close.
- #
- # This is a bit dangerous. We could easily have a mismatch of state.
- newrl = revlog(
- self.opener,
- target=self.target,
- radix=self.radix,
- postfix=b'tmpcensored',
- censorable=True,
- )
- newrl._format_version = self._format_version
- newrl._format_flags = self._format_flags
- newrl._generaldelta = self._generaldelta
- newrl._parse_index = self._parse_index
-
- for rev in self.revs():
- node = self.node(rev)
- p1, p2 = self.parents(node)
-
- if rev == censorrev:
- newrl.addrawrevision(
- tombstone,
- tr,
- self.linkrev(censorrev),
- p1,
- p2,
- censornode,
- REVIDX_ISCENSORED,
- )
-
- if newrl.deltaparent(rev) != nullrev:
- raise error.Abort(
- _(
- b'censored revision stored as delta; '
- b'cannot censor'
- ),
- hint=_(
- b'censoring of revlogs is not '
- b'fully implemented; please report '
- b'this bug'
- ),
- )
- continue
-
- if self.iscensored(rev):
- if self.deltaparent(rev) != nullrev:
- raise error.Abort(
- _(
- b'cannot censor due to censored '
- b'revision having delta stored'
- )
- )
- rawtext = self._chunk(rev)
- else:
- rawtext = self.rawdata(rev)
-
- newrl.addrawrevision(
- rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
- )
-
- tr.addbackup(self._indexfile, location=b'store')
- if not self._inline:
- tr.addbackup(self._datafile, location=b'store')
-
- self.opener.rename(newrl._indexfile, self._indexfile)
- if not self._inline:
- self.opener.rename(newrl._datafile, self._datafile)
-
- self.clearcaches()
- self._loadindex()
-
- def verifyintegrity(self, state):
- """Verifies the integrity of the revlog.
-
- Yields ``revlogproblem`` instances describing problems that are
- found.
- """
- dd, di = self.checksize()
- if dd:
- yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
- if di:
- yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
-
- version = self._format_version
-
- # The verifier tells us what version revlog we should be.
- if version != state[b'expectedversion']:
- yield revlogproblem(
- warning=_(b"warning: '%s' uses revlog format %d; expected %d")
- % (self.display_id, version, state[b'expectedversion'])
- )
-
- state[b'skipread'] = set()
- state[b'safe_renamed'] = set()
-
- for rev in self:
- node = self.node(rev)
-
- # Verify contents. 4 cases to care about:
- #
- # common: the most common case
- # rename: with a rename
- # meta: file content starts with b'\1\n', the metadata
- # header defined in filelog.py, but without a rename
- # ext: content stored externally
- #
- # More formally, their differences are shown below:
- #
- # | common | rename | meta | ext
- # -------------------------------------------------------
- # flags() | 0 | 0 | 0 | not 0
- # renamed() | False | True | False | ?
- # rawtext[0:2]=='\1\n'| False | True | True | ?
- #
- # "rawtext" means the raw text stored in revlog data, which
- # could be retrieved by "rawdata(rev)". "text"
- # mentioned below is "revision(rev)".
- #
- # There are 3 different lengths stored physically:
- # 1. L1: rawsize, stored in revlog index
- # 2. L2: len(rawtext), stored in revlog data
- # 3. L3: len(text), stored in revlog data if flags==0, or
- # possibly somewhere else if flags!=0
- #
- # L1 should be equal to L2. L3 could be different from them.
- # "text" may or may not affect commit hash depending on flag
- # processors (see flagutil.addflagprocessor).
- #
- # | common | rename | meta | ext
- # -------------------------------------------------
- # rawsize() | L1 | L1 | L1 | L1
- # size() | L1 | L2-LM | L1(*) | L1 (?)
- # len(rawtext) | L2 | L2 | L2 | L2
- # len(text) | L2 | L2 | L2 | L3
- # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
- #
- # LM: length of metadata, depending on rawtext
- # (*): not ideal, see comment in filelog.size
- # (?): could be "- len(meta)" if the resolved content has
- # rename metadata
- #
- # Checks needed to be done:
- # 1. length check: L1 == L2, in all cases.
- # 2. hash check: depending on flag processor, we may need to
- # use either "text" (external), or "rawtext" (in revlog).
-
- try:
- skipflags = state.get(b'skipflags', 0)
- if skipflags:
- skipflags &= self.flags(rev)
-
- _verify_revision(self, skipflags, state, node)
-
- l1 = self.rawsize(rev)
- l2 = len(self.rawdata(node))
-
- if l1 != l2:
- yield revlogproblem(
- error=_(b'unpacked size is %d, %d expected') % (l2, l1),
- node=node,
- )
-
- except error.CensoredNodeError:
- if state[b'erroroncensored']:
- yield revlogproblem(
- error=_(b'censored file data'), node=node
- )
- state[b'skipread'].add(node)
- except Exception as e:
- yield revlogproblem(
- error=_(b'unpacking %s: %s')
- % (short(node), stringutil.forcebytestr(e)),
- node=node,
- )
- state[b'skipread'].add(node)
-
- def storageinfo(
- self,
- exclusivefiles=False,
- sharedfiles=False,
- revisionscount=False,
- trackedsize=False,
- storedsize=False,
- ):
- d = {}
-
- if exclusivefiles:
- d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
- if not self._inline:
- d[b'exclusivefiles'].append((self.opener, self._datafile))
-
- if sharedfiles:
- d[b'sharedfiles'] = []
-
- if revisionscount:
- d[b'revisionscount'] = len(self)
-
- if trackedsize:
- d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
-
- if storedsize:
- d[b'storedsize'] = sum(
- self.opener.stat(path).st_size for path in self.files()
- )
-
- return d
-
- def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
- if not self.hassidedata:
- return
- # revlog formats with sidedata support does not support inline
- assert not self._inline
- if not helpers[1] and not helpers[2]:
- # Nothing to generate or remove
- return
-
- new_entries = []
- # append the new sidedata
- with self._writing(transaction):
- ifh, dfh, sdfh = self._writinghandles
- dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
-
- current_offset = sdfh.tell()
- for rev in range(startrev, endrev + 1):
- entry = self.index[rev]
- new_sidedata, flags = sidedatautil.run_sidedata_helpers(
- store=self,
- sidedata_helpers=helpers,
- sidedata={},
- rev=rev,
- )
-
- serialized_sidedata = sidedatautil.serialize_sidedata(
- new_sidedata
- )
-
- sidedata_compression_mode = COMP_MODE_INLINE
- if serialized_sidedata and self.hassidedata:
- sidedata_compression_mode = COMP_MODE_PLAIN
- h, comp_sidedata = self.compress(serialized_sidedata)
- if (
- h != b'u'
- and comp_sidedata[0] != b'\0'
- and len(comp_sidedata) < len(serialized_sidedata)
- ):
- assert not h
- if (
- comp_sidedata[0]
- == self._docket.default_compression_header
- ):
- sidedata_compression_mode = COMP_MODE_DEFAULT
- serialized_sidedata = comp_sidedata
- else:
- sidedata_compression_mode = COMP_MODE_INLINE
- serialized_sidedata = comp_sidedata
- if entry[8] != 0 or entry[9] != 0:
- # rewriting entries that already have sidedata is not
- # supported yet, because it introduces garbage data in the
- # revlog.
- msg = b"rewriting existing sidedata is not supported yet"
- raise error.Abort(msg)
-
- # Apply (potential) flags to add and to remove after running
- # the sidedata helpers
- new_offset_flags = entry[0] | flags[0] & ~flags[1]
- entry_update = (
- current_offset,
- len(serialized_sidedata),
- new_offset_flags,
- sidedata_compression_mode,
- )
-
- # the sidedata computation might have move the file cursors around
- sdfh.seek(current_offset, os.SEEK_SET)
- sdfh.write(serialized_sidedata)
- new_entries.append(entry_update)
- current_offset += len(serialized_sidedata)
- self._docket.sidedata_end = sdfh.tell()
-
- # rewrite the new index entries
- ifh.seek(startrev * self.index.entry_size)
- for i, e in enumerate(new_entries):
- rev = startrev + i
- self.index.replace_sidedata_info(rev, *e)
- packed = self.index.entry_binary(rev)
- if rev == 0 and self._docket is None:
- header = self._format_flags | self._format_version
- header = self.index.pack_header(header)
- packed = header + packed
- ifh.write(packed)
+ rl.clearcaches()
+ rl._loadindex()
diff --git a/mercurial/revlog.py b/mercurial/revlog.py
--- a/mercurial/revlog.py
+++ b/mercurial/revlog.py
@@ -80,6 +80,7 @@
util as interfaceutil,
)
from .revlogutils import (
+ censor,
deltas as deltautil,
docket as docketutil,
flagutil,
@@ -3232,88 +3233,15 @@
_(b'cannot censor with version %d revlogs')
% self._format_version
)
-
- censorrev = self.rev(censornode)
- tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
-
- if len(tombstone) > self.rawsize(censorrev):
- raise error.Abort(
- _(b'censor tombstone must be no longer than censored data')
+ elif self._format_version == REVLOGV1:
+ censor.v1_censor(self, tr, censornode, tombstone)
+ else:
+ # revlog v2
+ raise error.RevlogError(
+ _(b'cannot censor with version %d revlogs')
+ % self._format_version
)
- # Rewriting the revlog in place is hard. Our strategy for censoring is
- # to create a new revlog, copy all revisions to it, then replace the
- # revlogs on transaction close.
- #
- # This is a bit dangerous. We could easily have a mismatch of state.
- newrl = revlog(
- self.opener,
- target=self.target,
- radix=self.radix,
- postfix=b'tmpcensored',
- censorable=True,
- )
- newrl._format_version = self._format_version
- newrl._format_flags = self._format_flags
- newrl._generaldelta = self._generaldelta
- newrl._parse_index = self._parse_index
-
- for rev in self.revs():
- node = self.node(rev)
- p1, p2 = self.parents(node)
-
- if rev == censorrev:
- newrl.addrawrevision(
- tombstone,
- tr,
- self.linkrev(censorrev),
- p1,
- p2,
- censornode,
- REVIDX_ISCENSORED,
- )
-
- if newrl.deltaparent(rev) != nullrev:
- raise error.Abort(
- _(
- b'censored revision stored as delta; '
- b'cannot censor'
- ),
- hint=_(
- b'censoring of revlogs is not '
- b'fully implemented; please report '
- b'this bug'
- ),
- )
- continue
-
- if self.iscensored(rev):
- if self.deltaparent(rev) != nullrev:
- raise error.Abort(
- _(
- b'cannot censor due to censored '
- b'revision having delta stored'
- )
- )
- rawtext = self._chunk(rev)
- else:
- rawtext = self.rawdata(rev)
-
- newrl.addrawrevision(
- rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
- )
-
- tr.addbackup(self._indexfile, location=b'store')
- if not self._inline:
- tr.addbackup(self._datafile, location=b'store')
-
- self.opener.rename(newrl._indexfile, self._indexfile)
- if not self._inline:
- self.opener.rename(newrl._datafile, self._datafile)
-
- self.clearcaches()
- self._loadindex()
-
def verifyintegrity(self, state):
"""Verifies the integrity of the revlog.
To: marmoute, indygreg, #hg-reviewers, Alphare
Cc: mercurial-patches
More information about the Mercurial-patches
mailing list