D10934: dirstate: split dirstatemap in its own file
marmoute (Pierre-Yves David)
phabricator at mercurial-scm.org
Fri Jul 2 15:18:35 UTC 2021
marmoute created this revision.
Herald added a reviewer: hg-reviewers.
Herald added a subscriber: mercurial-patches.
REVISION SUMMARY
The dirstate file is large enough and the dirstatemap is quite insulated logic
already.
REPOSITORY
rHG Mercurial
BRANCH
default
REVISION DETAIL
https://phab.mercurial-scm.org/D10934
AFFECTED FILES
mercurial/dirstate.py
mercurial/dirstatemap.py
tests/fakedirstatewritetime.py
CHANGE DETAILS
diff --git a/tests/fakedirstatewritetime.py b/tests/fakedirstatewritetime.py
--- a/tests/fakedirstatewritetime.py
+++ b/tests/fakedirstatewritetime.py
@@ -10,6 +10,7 @@
from mercurial import (
context,
dirstate,
+ dirstatemap as dirstatemapmod,
extensions,
policy,
registrar,
@@ -66,11 +67,11 @@
if rustmod is not None:
# The Rust implementation does not use public parse/pack dirstate
# to prevent conversion round-trips
- orig_dirstatemap_write = dirstate.dirstatemap.write
+ orig_dirstatemap_write = dirstatemapmod.dirstatemap.write
wrapper = lambda self, st, now: orig_dirstatemap_write(
self, st, fakenow
)
- dirstate.dirstatemap.write = wrapper
+ dirstatemapmod.dirstatemap.write = wrapper
orig_dirstate_getfsnow = dirstate._getfsnow
wrapper = lambda *args: pack_dirstate(fakenow, orig_pack_dirstate, *args)
@@ -86,7 +87,7 @@
orig_module.pack_dirstate = orig_pack_dirstate
dirstate._getfsnow = orig_dirstate_getfsnow
if rustmod is not None:
- dirstate.dirstatemap.write = orig_dirstatemap_write
+ dirstatemapmod.dirstatemap.write = orig_dirstatemap_write
def _poststatusfixup(orig, workingctx, status, fixup):
diff --git a/mercurial/dirstatemap.py b/mercurial/dirstatemap.py
new file mode 100644
--- /dev/null
+++ b/mercurial/dirstatemap.py
@@ -0,0 +1,610 @@
+# dirstatemap.py
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+
+from __future__ import absolute_import
+
+import errno
+
+from .i18n import _
+
+from . import (
+ error,
+ pathutil,
+ policy,
+ pycompat,
+ txnutil,
+ util,
+)
+
+parsers = policy.importmod('parsers')
+rustmod = policy.importrust('dirstate')
+
+propertycache = util.propertycache
+
+dirstatetuple = parsers.dirstatetuple
+
+
+# a special value used internally for `size` if the file come from the other parent
+FROM_P2 = -2
+
+# a special value used internally for `size` if the file is modified/merged/added
+NONNORMAL = -1
+
+# a special value used internally for `time` if the time is ambigeous
+AMBIGUOUS_TIME = -1
+
+
+class dirstatemap(object):
+ """Map encapsulating the dirstate's contents.
+
+ The dirstate contains the following state:
+
+ - `identity` is the identity of the dirstate file, which can be used to
+ detect when changes have occurred to the dirstate file.
+
+ - `parents` is a pair containing the parents of the working copy. The
+ parents are updated by calling `setparents`.
+
+ - the state map maps filenames to tuples of (state, mode, size, mtime),
+ where state is a single character representing 'normal', 'added',
+ 'removed', or 'merged'. It is read by treating the dirstate as a
+ dict. File state is updated by calling the `addfile`, `removefile` and
+ `dropfile` methods.
+
+ - `copymap` maps destination filenames to their source filename.
+
+ The dirstate also provides the following views onto the state:
+
+ - `nonnormalset` is a set of the filenames that have state other
+ than 'normal', or are normal but have an mtime of -1 ('normallookup').
+
+ - `otherparentset` is a set of the filenames that are marked as coming
+ from the second parent when the dirstate is currently being merged.
+
+ - `filefoldmap` is a dict mapping normalized filenames to the denormalized
+ form that they appear as in the dirstate.
+
+ - `dirfoldmap` is a dict mapping normalized directory names to the
+ denormalized form that they appear as in the dirstate.
+ """
+
+ def __init__(self, ui, opener, root, nodeconstants, use_dirstate_v2):
+ self._ui = ui
+ self._opener = opener
+ self._root = root
+ self._filename = b'dirstate'
+ self._nodelen = 20
+ self._nodeconstants = nodeconstants
+ assert (
+ not use_dirstate_v2
+ ), "should have detected unsupported requirement"
+
+ self._parents = None
+ self._dirtyparents = False
+
+ # for consistent view between _pl() and _read() invocations
+ self._pendingmode = None
+
+ @propertycache
+ def _map(self):
+ self._map = {}
+ self.read()
+ return self._map
+
+ @propertycache
+ def copymap(self):
+ self.copymap = {}
+ self._map
+ return self.copymap
+
+ def directories(self):
+ # Rust / dirstate-v2 only
+ return []
+
+ def clear(self):
+ self._map.clear()
+ self.copymap.clear()
+ self.setparents(self._nodeconstants.nullid, self._nodeconstants.nullid)
+ util.clearcachedproperty(self, b"_dirs")
+ util.clearcachedproperty(self, b"_alldirs")
+ util.clearcachedproperty(self, b"filefoldmap")
+ util.clearcachedproperty(self, b"dirfoldmap")
+ util.clearcachedproperty(self, b"nonnormalset")
+ util.clearcachedproperty(self, b"otherparentset")
+
+ def items(self):
+ return pycompat.iteritems(self._map)
+
+ # forward for python2,3 compat
+ iteritems = items
+
+ def __len__(self):
+ return len(self._map)
+
+ def __iter__(self):
+ return iter(self._map)
+
+ def get(self, key, default=None):
+ return self._map.get(key, default)
+
+ def __contains__(self, key):
+ return key in self._map
+
+ def __getitem__(self, key):
+ return self._map[key]
+
+ def keys(self):
+ return self._map.keys()
+
+ def preload(self):
+ """Loads the underlying data, if it's not already loaded"""
+ self._map
+
+ def addfile(self, f, oldstate, state, mode, size, mtime):
+ """Add a tracked file to the dirstate."""
+ if oldstate in b"?r" and "_dirs" in self.__dict__:
+ self._dirs.addpath(f)
+ if oldstate == b"?" and "_alldirs" in self.__dict__:
+ self._alldirs.addpath(f)
+ self._map[f] = dirstatetuple(state, mode, size, mtime)
+ if state != b'n' or mtime == AMBIGUOUS_TIME:
+ self.nonnormalset.add(f)
+ if size == FROM_P2:
+ self.otherparentset.add(f)
+
+ def removefile(self, f, oldstate, size):
+ """
+ Mark a file as removed in the dirstate.
+
+ The `size` parameter is used to store sentinel values that indicate
+ the file's previous state. In the future, we should refactor this
+ to be more explicit about what that state is.
+ """
+ if oldstate not in b"?r" and "_dirs" in self.__dict__:
+ self._dirs.delpath(f)
+ if oldstate == b"?" and "_alldirs" in self.__dict__:
+ self._alldirs.addpath(f)
+ if "filefoldmap" in self.__dict__:
+ normed = util.normcase(f)
+ self.filefoldmap.pop(normed, None)
+ self._map[f] = dirstatetuple(b'r', 0, size, 0)
+ self.nonnormalset.add(f)
+
+ def dropfile(self, f, oldstate):
+ """
+ Remove a file from the dirstate. Returns True if the file was
+ previously recorded.
+ """
+ exists = self._map.pop(f, None) is not None
+ if exists:
+ if oldstate != b"r" and "_dirs" in self.__dict__:
+ self._dirs.delpath(f)
+ if "_alldirs" in self.__dict__:
+ self._alldirs.delpath(f)
+ if "filefoldmap" in self.__dict__:
+ normed = util.normcase(f)
+ self.filefoldmap.pop(normed, None)
+ self.nonnormalset.discard(f)
+ return exists
+
+ def clearambiguoustimes(self, files, now):
+ for f in files:
+ e = self.get(f)
+ if e is not None and e[0] == b'n' and e[3] == now:
+ self._map[f] = dirstatetuple(e[0], e[1], e[2], AMBIGUOUS_TIME)
+ self.nonnormalset.add(f)
+
+ def nonnormalentries(self):
+ '''Compute the nonnormal dirstate entries from the dmap'''
+ try:
+ return parsers.nonnormalotherparententries(self._map)
+ except AttributeError:
+ nonnorm = set()
+ otherparent = set()
+ for fname, e in pycompat.iteritems(self._map):
+ if e[0] != b'n' or e[3] == AMBIGUOUS_TIME:
+ nonnorm.add(fname)
+ if e[0] == b'n' and e[2] == FROM_P2:
+ otherparent.add(fname)
+ return nonnorm, otherparent
+
+ @propertycache
+ def filefoldmap(self):
+ """Returns a dictionary mapping normalized case paths to their
+ non-normalized versions.
+ """
+ try:
+ makefilefoldmap = parsers.make_file_foldmap
+ except AttributeError:
+ pass
+ else:
+ return makefilefoldmap(
+ self._map, util.normcasespec, util.normcasefallback
+ )
+
+ f = {}
+ normcase = util.normcase
+ for name, s in pycompat.iteritems(self._map):
+ if s[0] != b'r':
+ f[normcase(name)] = name
+ f[b'.'] = b'.' # prevents useless util.fspath() invocation
+ return f
+
+ def hastrackeddir(self, d):
+ """
+ Returns True if the dirstate contains a tracked (not removed) file
+ in this directory.
+ """
+ return d in self._dirs
+
+ def hasdir(self, d):
+ """
+ Returns True if the dirstate contains a file (tracked or removed)
+ in this directory.
+ """
+ return d in self._alldirs
+
+ @propertycache
+ def _dirs(self):
+ return pathutil.dirs(self._map, b'r')
+
+ @propertycache
+ def _alldirs(self):
+ return pathutil.dirs(self._map)
+
+ def _opendirstatefile(self):
+ fp, mode = txnutil.trypending(self._root, self._opener, self._filename)
+ if self._pendingmode is not None and self._pendingmode != mode:
+ fp.close()
+ raise error.Abort(
+ _(b'working directory state may be changed parallelly')
+ )
+ self._pendingmode = mode
+ return fp
+
+ def parents(self):
+ if not self._parents:
+ try:
+ fp = self._opendirstatefile()
+ st = fp.read(2 * self._nodelen)
+ fp.close()
+ except IOError as err:
+ if err.errno != errno.ENOENT:
+ raise
+ # File doesn't exist, so the current state is empty
+ st = b''
+
+ l = len(st)
+ if l == self._nodelen * 2:
+ self._parents = (
+ st[: self._nodelen],
+ st[self._nodelen : 2 * self._nodelen],
+ )
+ elif l == 0:
+ self._parents = (
+ self._nodeconstants.nullid,
+ self._nodeconstants.nullid,
+ )
+ else:
+ raise error.Abort(
+ _(b'working directory state appears damaged!')
+ )
+
+ return self._parents
+
+ def setparents(self, p1, p2):
+ self._parents = (p1, p2)
+ self._dirtyparents = True
+
+ def read(self):
+ # ignore HG_PENDING because identity is used only for writing
+ self.identity = util.filestat.frompath(
+ self._opener.join(self._filename)
+ )
+
+ try:
+ fp = self._opendirstatefile()
+ try:
+ st = fp.read()
+ finally:
+ fp.close()
+ except IOError as err:
+ if err.errno != errno.ENOENT:
+ raise
+ return
+ if not st:
+ return
+
+ if util.safehasattr(parsers, b'dict_new_presized'):
+ # Make an estimate of the number of files in the dirstate based on
+ # its size. This trades wasting some memory for avoiding costly
+ # resizes. Each entry have a prefix of 17 bytes followed by one or
+ # two path names. Studies on various large-scale real-world repositories
+ # found 54 bytes a reasonable upper limit for the average path names.
+ # Copy entries are ignored for the sake of this estimate.
+ self._map = parsers.dict_new_presized(len(st) // 71)
+
+ # Python's garbage collector triggers a GC each time a certain number
+ # of container objects (the number being defined by
+ # gc.get_threshold()) are allocated. parse_dirstate creates a tuple
+ # for each file in the dirstate. The C version then immediately marks
+ # them as not to be tracked by the collector. However, this has no
+ # effect on when GCs are triggered, only on what objects the GC looks
+ # into. This means that O(number of files) GCs are unavoidable.
+ # Depending on when in the process's lifetime the dirstate is parsed,
+ # this can get very expensive. As a workaround, disable GC while
+ # parsing the dirstate.
+ #
+ # (we cannot decorate the function directly since it is in a C module)
+ parse_dirstate = util.nogc(parsers.parse_dirstate)
+ p = parse_dirstate(self._map, self.copymap, st)
+ if not self._dirtyparents:
+ self.setparents(*p)
+
+ # Avoid excess attribute lookups by fast pathing certain checks
+ self.__contains__ = self._map.__contains__
+ self.__getitem__ = self._map.__getitem__
+ self.get = self._map.get
+
+ def write(self, st, now):
+ st.write(
+ parsers.pack_dirstate(self._map, self.copymap, self.parents(), now)
+ )
+ st.close()
+ self._dirtyparents = False
+ self.nonnormalset, self.otherparentset = self.nonnormalentries()
+
+ @propertycache
+ def nonnormalset(self):
+ nonnorm, otherparents = self.nonnormalentries()
+ self.otherparentset = otherparents
+ return nonnorm
+
+ @propertycache
+ def otherparentset(self):
+ nonnorm, otherparents = self.nonnormalentries()
+ self.nonnormalset = nonnorm
+ return otherparents
+
+ def non_normal_or_other_parent_paths(self):
+ return self.nonnormalset.union(self.otherparentset)
+
+ @propertycache
+ def identity(self):
+ self._map
+ return self.identity
+
+ @propertycache
+ def dirfoldmap(self):
+ f = {}
+ normcase = util.normcase
+ for name in self._dirs:
+ f[normcase(name)] = name
+ return f
+
+
+if rustmod is not None:
+
+ class dirstatemap(object):
+ def __init__(self, ui, opener, root, nodeconstants, use_dirstate_v2):
+ self._use_dirstate_v2 = use_dirstate_v2
+ self._nodeconstants = nodeconstants
+ self._ui = ui
+ self._opener = opener
+ self._root = root
+ self._filename = b'dirstate'
+ self._nodelen = 20 # Also update Rust code when changing this!
+ self._parents = None
+ self._dirtyparents = False
+
+ # for consistent view between _pl() and _read() invocations
+ self._pendingmode = None
+
+ self._use_dirstate_tree = self._ui.configbool(
+ b"experimental",
+ b"dirstate-tree.in-memory",
+ False,
+ )
+
+ def addfile(self, *args, **kwargs):
+ return self._rustmap.addfile(*args, **kwargs)
+
+ def removefile(self, *args, **kwargs):
+ return self._rustmap.removefile(*args, **kwargs)
+
+ def dropfile(self, *args, **kwargs):
+ return self._rustmap.dropfile(*args, **kwargs)
+
+ def clearambiguoustimes(self, *args, **kwargs):
+ return self._rustmap.clearambiguoustimes(*args, **kwargs)
+
+ def nonnormalentries(self):
+ return self._rustmap.nonnormalentries()
+
+ def get(self, *args, **kwargs):
+ return self._rustmap.get(*args, **kwargs)
+
+ @property
+ def copymap(self):
+ return self._rustmap.copymap()
+
+ def directories(self):
+ return self._rustmap.directories()
+
+ def preload(self):
+ self._rustmap
+
+ def clear(self):
+ self._rustmap.clear()
+ self.setparents(
+ self._nodeconstants.nullid, self._nodeconstants.nullid
+ )
+ util.clearcachedproperty(self, b"_dirs")
+ util.clearcachedproperty(self, b"_alldirs")
+ util.clearcachedproperty(self, b"dirfoldmap")
+
+ def items(self):
+ return self._rustmap.items()
+
+ def keys(self):
+ return iter(self._rustmap)
+
+ def __contains__(self, key):
+ return key in self._rustmap
+
+ def __getitem__(self, item):
+ return self._rustmap[item]
+
+ def __len__(self):
+ return len(self._rustmap)
+
+ def __iter__(self):
+ return iter(self._rustmap)
+
+ # forward for python2,3 compat
+ iteritems = items
+
+ def _opendirstatefile(self):
+ fp, mode = txnutil.trypending(
+ self._root, self._opener, self._filename
+ )
+ if self._pendingmode is not None and self._pendingmode != mode:
+ fp.close()
+ raise error.Abort(
+ _(b'working directory state may be changed parallelly')
+ )
+ self._pendingmode = mode
+ return fp
+
+ def setparents(self, p1, p2):
+ self._parents = (p1, p2)
+ self._dirtyparents = True
+
+ def parents(self):
+ if not self._parents:
+ if self._use_dirstate_v2:
+ offset = len(rustmod.V2_FORMAT_MARKER)
+ else:
+ offset = 0
+ read_len = offset + self._nodelen * 2
+ try:
+ fp = self._opendirstatefile()
+ st = fp.read(read_len)
+ fp.close()
+ except IOError as err:
+ if err.errno != errno.ENOENT:
+ raise
+ # File doesn't exist, so the current state is empty
+ st = b''
+
+ l = len(st)
+ if l == read_len:
+ st = st[offset:]
+ self._parents = (
+ st[: self._nodelen],
+ st[self._nodelen : 2 * self._nodelen],
+ )
+ elif l == 0:
+ self._parents = (
+ self._nodeconstants.nullid,
+ self._nodeconstants.nullid,
+ )
+ else:
+ raise error.Abort(
+ _(b'working directory state appears damaged!')
+ )
+
+ return self._parents
+
+ @propertycache
+ def _rustmap(self):
+ """
+ Fills the Dirstatemap when called.
+ """
+ # ignore HG_PENDING because identity is used only for writing
+ self.identity = util.filestat.frompath(
+ self._opener.join(self._filename)
+ )
+
+ try:
+ fp = self._opendirstatefile()
+ try:
+ st = fp.read()
+ finally:
+ fp.close()
+ except IOError as err:
+ if err.errno != errno.ENOENT:
+ raise
+ st = b''
+
+ self._rustmap, parents = rustmod.DirstateMap.new(
+ self._use_dirstate_tree, self._use_dirstate_v2, st
+ )
+
+ if parents and not self._dirtyparents:
+ self.setparents(*parents)
+
+ self.__contains__ = self._rustmap.__contains__
+ self.__getitem__ = self._rustmap.__getitem__
+ self.get = self._rustmap.get
+ return self._rustmap
+
+ def write(self, st, now):
+ parents = self.parents()
+ packed = self._rustmap.write(
+ self._use_dirstate_v2, parents[0], parents[1], now
+ )
+ st.write(packed)
+ st.close()
+ self._dirtyparents = False
+
+ @propertycache
+ def filefoldmap(self):
+ """Returns a dictionary mapping normalized case paths to their
+ non-normalized versions.
+ """
+ return self._rustmap.filefoldmapasdict()
+
+ def hastrackeddir(self, d):
+ self._dirs # Trigger Python's propertycache
+ return self._rustmap.hastrackeddir(d)
+
+ def hasdir(self, d):
+ self._dirs # Trigger Python's propertycache
+ return self._rustmap.hasdir(d)
+
+ @propertycache
+ def _dirs(self):
+ return self._rustmap.getdirs()
+
+ @propertycache
+ def _alldirs(self):
+ return self._rustmap.getalldirs()
+
+ @propertycache
+ def identity(self):
+ self._rustmap
+ return self.identity
+
+ @property
+ def nonnormalset(self):
+ nonnorm = self._rustmap.non_normal_entries()
+ return nonnorm
+
+ @propertycache
+ def otherparentset(self):
+ otherparents = self._rustmap.other_parent_entries()
+ return otherparents
+
+ def non_normal_or_other_parent_paths(self):
+ return self._rustmap.non_normal_or_other_parent_paths()
+
+ @propertycache
+ def dirfoldmap(self):
+ f = {}
+ normcase = util.normcase
+ for name in self._dirs:
+ f[normcase(name)] = name
+ return f
diff --git a/mercurial/dirstate.py b/mercurial/dirstate.py
--- a/mercurial/dirstate.py
+++ b/mercurial/dirstate.py
@@ -19,6 +19,7 @@
from hgdemandimport import tracing
from . import (
+ dirstatemap,
encoding,
error,
match as matchmod,
@@ -27,7 +28,6 @@
pycompat,
scmutil,
sparse,
- txnutil,
util,
)
@@ -49,13 +49,13 @@
# a special value used internally for `size` if the file come from the other parent
-FROM_P2 = -2
+FROM_P2 = dirstatemap.FROM_P2
# a special value used internally for `size` if the file is modified/merged/added
-NONNORMAL = -1
+NONNORMAL = dirstatemap.NONNORMAL
# a special value used internally for `time` if the time is ambigeous
-AMBIGUOUS_TIME = -1
+AMBIGUOUS_TIME = dirstatemap.AMBIGUOUS_TIME
class repocache(filecache):
@@ -119,7 +119,7 @@
self._plchangecallbacks = {}
self._origpl = None
self._updatedfiles = set()
- self._mapcls = dirstatemap
+ self._mapcls = dirstatemap.dirstatemap
# Access and cache cwd early, so we don't access it for the first time
# after a working-copy update caused it to not exist (accessing it then
# raises an exception).
@@ -1450,577 +1450,3 @@
def clearbackup(self, tr, backupname):
'''Clear backup file'''
self._opener.unlink(backupname)
-
-
-class dirstatemap(object):
- """Map encapsulating the dirstate's contents.
-
- The dirstate contains the following state:
-
- - `identity` is the identity of the dirstate file, which can be used to
- detect when changes have occurred to the dirstate file.
-
- - `parents` is a pair containing the parents of the working copy. The
- parents are updated by calling `setparents`.
-
- - the state map maps filenames to tuples of (state, mode, size, mtime),
- where state is a single character representing 'normal', 'added',
- 'removed', or 'merged'. It is read by treating the dirstate as a
- dict. File state is updated by calling the `addfile`, `removefile` and
- `dropfile` methods.
-
- - `copymap` maps destination filenames to their source filename.
-
- The dirstate also provides the following views onto the state:
-
- - `nonnormalset` is a set of the filenames that have state other
- than 'normal', or are normal but have an mtime of -1 ('normallookup').
-
- - `otherparentset` is a set of the filenames that are marked as coming
- from the second parent when the dirstate is currently being merged.
-
- - `filefoldmap` is a dict mapping normalized filenames to the denormalized
- form that they appear as in the dirstate.
-
- - `dirfoldmap` is a dict mapping normalized directory names to the
- denormalized form that they appear as in the dirstate.
- """
-
- def __init__(self, ui, opener, root, nodeconstants, use_dirstate_v2):
- self._ui = ui
- self._opener = opener
- self._root = root
- self._filename = b'dirstate'
- self._nodelen = 20
- self._nodeconstants = nodeconstants
- assert (
- not use_dirstate_v2
- ), "should have detected unsupported requirement"
-
- self._parents = None
- self._dirtyparents = False
-
- # for consistent view between _pl() and _read() invocations
- self._pendingmode = None
-
- @propertycache
- def _map(self):
- self._map = {}
- self.read()
- return self._map
-
- @propertycache
- def copymap(self):
- self.copymap = {}
- self._map
- return self.copymap
-
- def directories(self):
- # Rust / dirstate-v2 only
- return []
-
- def clear(self):
- self._map.clear()
- self.copymap.clear()
- self.setparents(self._nodeconstants.nullid, self._nodeconstants.nullid)
- util.clearcachedproperty(self, b"_dirs")
- util.clearcachedproperty(self, b"_alldirs")
- util.clearcachedproperty(self, b"filefoldmap")
- util.clearcachedproperty(self, b"dirfoldmap")
- util.clearcachedproperty(self, b"nonnormalset")
- util.clearcachedproperty(self, b"otherparentset")
-
- def items(self):
- return pycompat.iteritems(self._map)
-
- # forward for python2,3 compat
- iteritems = items
-
- def __len__(self):
- return len(self._map)
-
- def __iter__(self):
- return iter(self._map)
-
- def get(self, key, default=None):
- return self._map.get(key, default)
-
- def __contains__(self, key):
- return key in self._map
-
- def __getitem__(self, key):
- return self._map[key]
-
- def keys(self):
- return self._map.keys()
-
- def preload(self):
- """Loads the underlying data, if it's not already loaded"""
- self._map
-
- def addfile(self, f, oldstate, state, mode, size, mtime):
- """Add a tracked file to the dirstate."""
- if oldstate in b"?r" and "_dirs" in self.__dict__:
- self._dirs.addpath(f)
- if oldstate == b"?" and "_alldirs" in self.__dict__:
- self._alldirs.addpath(f)
- self._map[f] = dirstatetuple(state, mode, size, mtime)
- if state != b'n' or mtime == AMBIGUOUS_TIME:
- self.nonnormalset.add(f)
- if size == FROM_P2:
- self.otherparentset.add(f)
-
- def removefile(self, f, oldstate, size):
- """
- Mark a file as removed in the dirstate.
-
- The `size` parameter is used to store sentinel values that indicate
- the file's previous state. In the future, we should refactor this
- to be more explicit about what that state is.
- """
- if oldstate not in b"?r" and "_dirs" in self.__dict__:
- self._dirs.delpath(f)
- if oldstate == b"?" and "_alldirs" in self.__dict__:
- self._alldirs.addpath(f)
- if "filefoldmap" in self.__dict__:
- normed = util.normcase(f)
- self.filefoldmap.pop(normed, None)
- self._map[f] = dirstatetuple(b'r', 0, size, 0)
- self.nonnormalset.add(f)
-
- def dropfile(self, f, oldstate):
- """
- Remove a file from the dirstate. Returns True if the file was
- previously recorded.
- """
- exists = self._map.pop(f, None) is not None
- if exists:
- if oldstate != b"r" and "_dirs" in self.__dict__:
- self._dirs.delpath(f)
- if "_alldirs" in self.__dict__:
- self._alldirs.delpath(f)
- if "filefoldmap" in self.__dict__:
- normed = util.normcase(f)
- self.filefoldmap.pop(normed, None)
- self.nonnormalset.discard(f)
- return exists
-
- def clearambiguoustimes(self, files, now):
- for f in files:
- e = self.get(f)
- if e is not None and e[0] == b'n' and e[3] == now:
- self._map[f] = dirstatetuple(e[0], e[1], e[2], AMBIGUOUS_TIME)
- self.nonnormalset.add(f)
-
- def nonnormalentries(self):
- '''Compute the nonnormal dirstate entries from the dmap'''
- try:
- return parsers.nonnormalotherparententries(self._map)
- except AttributeError:
- nonnorm = set()
- otherparent = set()
- for fname, e in pycompat.iteritems(self._map):
- if e[0] != b'n' or e[3] == AMBIGUOUS_TIME:
- nonnorm.add(fname)
- if e[0] == b'n' and e[2] == FROM_P2:
- otherparent.add(fname)
- return nonnorm, otherparent
-
- @propertycache
- def filefoldmap(self):
- """Returns a dictionary mapping normalized case paths to their
- non-normalized versions.
- """
- try:
- makefilefoldmap = parsers.make_file_foldmap
- except AttributeError:
- pass
- else:
- return makefilefoldmap(
- self._map, util.normcasespec, util.normcasefallback
- )
-
- f = {}
- normcase = util.normcase
- for name, s in pycompat.iteritems(self._map):
- if s[0] != b'r':
- f[normcase(name)] = name
- f[b'.'] = b'.' # prevents useless util.fspath() invocation
- return f
-
- def hastrackeddir(self, d):
- """
- Returns True if the dirstate contains a tracked (not removed) file
- in this directory.
- """
- return d in self._dirs
-
- def hasdir(self, d):
- """
- Returns True if the dirstate contains a file (tracked or removed)
- in this directory.
- """
- return d in self._alldirs
-
- @propertycache
- def _dirs(self):
- return pathutil.dirs(self._map, b'r')
-
- @propertycache
- def _alldirs(self):
- return pathutil.dirs(self._map)
-
- def _opendirstatefile(self):
- fp, mode = txnutil.trypending(self._root, self._opener, self._filename)
- if self._pendingmode is not None and self._pendingmode != mode:
- fp.close()
- raise error.Abort(
- _(b'working directory state may be changed parallelly')
- )
- self._pendingmode = mode
- return fp
-
- def parents(self):
- if not self._parents:
- try:
- fp = self._opendirstatefile()
- st = fp.read(2 * self._nodelen)
- fp.close()
- except IOError as err:
- if err.errno != errno.ENOENT:
- raise
- # File doesn't exist, so the current state is empty
- st = b''
-
- l = len(st)
- if l == self._nodelen * 2:
- self._parents = (
- st[: self._nodelen],
- st[self._nodelen : 2 * self._nodelen],
- )
- elif l == 0:
- self._parents = (
- self._nodeconstants.nullid,
- self._nodeconstants.nullid,
- )
- else:
- raise error.Abort(
- _(b'working directory state appears damaged!')
- )
-
- return self._parents
-
- def setparents(self, p1, p2):
- self._parents = (p1, p2)
- self._dirtyparents = True
-
- def read(self):
- # ignore HG_PENDING because identity is used only for writing
- self.identity = util.filestat.frompath(
- self._opener.join(self._filename)
- )
-
- try:
- fp = self._opendirstatefile()
- try:
- st = fp.read()
- finally:
- fp.close()
- except IOError as err:
- if err.errno != errno.ENOENT:
- raise
- return
- if not st:
- return
-
- if util.safehasattr(parsers, b'dict_new_presized'):
- # Make an estimate of the number of files in the dirstate based on
- # its size. This trades wasting some memory for avoiding costly
- # resizes. Each entry have a prefix of 17 bytes followed by one or
- # two path names. Studies on various large-scale real-world repositories
- # found 54 bytes a reasonable upper limit for the average path names.
- # Copy entries are ignored for the sake of this estimate.
- self._map = parsers.dict_new_presized(len(st) // 71)
-
- # Python's garbage collector triggers a GC each time a certain number
- # of container objects (the number being defined by
- # gc.get_threshold()) are allocated. parse_dirstate creates a tuple
- # for each file in the dirstate. The C version then immediately marks
- # them as not to be tracked by the collector. However, this has no
- # effect on when GCs are triggered, only on what objects the GC looks
- # into. This means that O(number of files) GCs are unavoidable.
- # Depending on when in the process's lifetime the dirstate is parsed,
- # this can get very expensive. As a workaround, disable GC while
- # parsing the dirstate.
- #
- # (we cannot decorate the function directly since it is in a C module)
- parse_dirstate = util.nogc(parsers.parse_dirstate)
- p = parse_dirstate(self._map, self.copymap, st)
- if not self._dirtyparents:
- self.setparents(*p)
-
- # Avoid excess attribute lookups by fast pathing certain checks
- self.__contains__ = self._map.__contains__
- self.__getitem__ = self._map.__getitem__
- self.get = self._map.get
-
- def write(self, st, now):
- st.write(
- parsers.pack_dirstate(self._map, self.copymap, self.parents(), now)
- )
- st.close()
- self._dirtyparents = False
- self.nonnormalset, self.otherparentset = self.nonnormalentries()
-
- @propertycache
- def nonnormalset(self):
- nonnorm, otherparents = self.nonnormalentries()
- self.otherparentset = otherparents
- return nonnorm
-
- @propertycache
- def otherparentset(self):
- nonnorm, otherparents = self.nonnormalentries()
- self.nonnormalset = nonnorm
- return otherparents
-
- def non_normal_or_other_parent_paths(self):
- return self.nonnormalset.union(self.otherparentset)
-
- @propertycache
- def identity(self):
- self._map
- return self.identity
-
- @propertycache
- def dirfoldmap(self):
- f = {}
- normcase = util.normcase
- for name in self._dirs:
- f[normcase(name)] = name
- return f
-
-
-if rustmod is not None:
-
- class dirstatemap(object):
- def __init__(self, ui, opener, root, nodeconstants, use_dirstate_v2):
- self._use_dirstate_v2 = use_dirstate_v2
- self._nodeconstants = nodeconstants
- self._ui = ui
- self._opener = opener
- self._root = root
- self._filename = b'dirstate'
- self._nodelen = 20 # Also update Rust code when changing this!
- self._parents = None
- self._dirtyparents = False
-
- # for consistent view between _pl() and _read() invocations
- self._pendingmode = None
-
- self._use_dirstate_tree = self._ui.configbool(
- b"experimental",
- b"dirstate-tree.in-memory",
- False,
- )
-
- def addfile(self, *args, **kwargs):
- return self._rustmap.addfile(*args, **kwargs)
-
- def removefile(self, *args, **kwargs):
- return self._rustmap.removefile(*args, **kwargs)
-
- def dropfile(self, *args, **kwargs):
- return self._rustmap.dropfile(*args, **kwargs)
-
- def clearambiguoustimes(self, *args, **kwargs):
- return self._rustmap.clearambiguoustimes(*args, **kwargs)
-
- def nonnormalentries(self):
- return self._rustmap.nonnormalentries()
-
- def get(self, *args, **kwargs):
- return self._rustmap.get(*args, **kwargs)
-
- @property
- def copymap(self):
- return self._rustmap.copymap()
-
- def directories(self):
- return self._rustmap.directories()
-
- def preload(self):
- self._rustmap
-
- def clear(self):
- self._rustmap.clear()
- self.setparents(
- self._nodeconstants.nullid, self._nodeconstants.nullid
- )
- util.clearcachedproperty(self, b"_dirs")
- util.clearcachedproperty(self, b"_alldirs")
- util.clearcachedproperty(self, b"dirfoldmap")
-
- def items(self):
- return self._rustmap.items()
-
- def keys(self):
- return iter(self._rustmap)
-
- def __contains__(self, key):
- return key in self._rustmap
-
- def __getitem__(self, item):
- return self._rustmap[item]
-
- def __len__(self):
- return len(self._rustmap)
-
- def __iter__(self):
- return iter(self._rustmap)
-
- # forward for python2,3 compat
- iteritems = items
-
- def _opendirstatefile(self):
- fp, mode = txnutil.trypending(
- self._root, self._opener, self._filename
- )
- if self._pendingmode is not None and self._pendingmode != mode:
- fp.close()
- raise error.Abort(
- _(b'working directory state may be changed parallelly')
- )
- self._pendingmode = mode
- return fp
-
- def setparents(self, p1, p2):
- self._parents = (p1, p2)
- self._dirtyparents = True
-
- def parents(self):
- if not self._parents:
- if self._use_dirstate_v2:
- offset = len(rustmod.V2_FORMAT_MARKER)
- else:
- offset = 0
- read_len = offset + self._nodelen * 2
- try:
- fp = self._opendirstatefile()
- st = fp.read(read_len)
- fp.close()
- except IOError as err:
- if err.errno != errno.ENOENT:
- raise
- # File doesn't exist, so the current state is empty
- st = b''
-
- l = len(st)
- if l == read_len:
- st = st[offset:]
- self._parents = (
- st[: self._nodelen],
- st[self._nodelen : 2 * self._nodelen],
- )
- elif l == 0:
- self._parents = (
- self._nodeconstants.nullid,
- self._nodeconstants.nullid,
- )
- else:
- raise error.Abort(
- _(b'working directory state appears damaged!')
- )
-
- return self._parents
-
- @propertycache
- def _rustmap(self):
- """
- Fills the Dirstatemap when called.
- """
- # ignore HG_PENDING because identity is used only for writing
- self.identity = util.filestat.frompath(
- self._opener.join(self._filename)
- )
-
- try:
- fp = self._opendirstatefile()
- try:
- st = fp.read()
- finally:
- fp.close()
- except IOError as err:
- if err.errno != errno.ENOENT:
- raise
- st = b''
-
- self._rustmap, parents = rustmod.DirstateMap.new(
- self._use_dirstate_tree, self._use_dirstate_v2, st
- )
-
- if parents and not self._dirtyparents:
- self.setparents(*parents)
-
- self.__contains__ = self._rustmap.__contains__
- self.__getitem__ = self._rustmap.__getitem__
- self.get = self._rustmap.get
- return self._rustmap
-
- def write(self, st, now):
- parents = self.parents()
- packed = self._rustmap.write(
- self._use_dirstate_v2, parents[0], parents[1], now
- )
- st.write(packed)
- st.close()
- self._dirtyparents = False
-
- @propertycache
- def filefoldmap(self):
- """Returns a dictionary mapping normalized case paths to their
- non-normalized versions.
- """
- return self._rustmap.filefoldmapasdict()
-
- def hastrackeddir(self, d):
- self._dirs # Trigger Python's propertycache
- return self._rustmap.hastrackeddir(d)
-
- def hasdir(self, d):
- self._dirs # Trigger Python's propertycache
- return self._rustmap.hasdir(d)
-
- @propertycache
- def _dirs(self):
- return self._rustmap.getdirs()
-
- @propertycache
- def _alldirs(self):
- return self._rustmap.getalldirs()
-
- @propertycache
- def identity(self):
- self._rustmap
- return self.identity
-
- @property
- def nonnormalset(self):
- nonnorm = self._rustmap.non_normal_entries()
- return nonnorm
-
- @propertycache
- def otherparentset(self):
- otherparents = self._rustmap.other_parent_entries()
- return otherparents
-
- def non_normal_or_other_parent_paths(self):
- return self._rustmap.non_normal_or_other_parent_paths()
-
- @propertycache
- def dirfoldmap(self):
- f = {}
- normcase = util.normcase
- for name in self._dirs:
- f[normcase(name)] = name
- return f
To: marmoute, #hg-reviewers
Cc: mercurial-patches, mercurial-devel
More information about the Mercurial-devel
mailing list