[PATCH 4 of 4] dirstate: speed up case collision checking
Joshua Redstone
joshua.redstone at fb.com
Fri Jun 22 20:49:49 UTC 2012
# HG changeset patch
# User Joshua Redstone <joshua.redstone at fb.com>
# Date 1340048916 25200
# Node ID 4bdb6a9bb7ce10f78a8aa1d2b4d69b00e2949839
# Parent a97598a0ca30329ad513f8e9fe443ec3b0fc1baa
dirstate: speed up case collision checking
scmutil.casecollisionauditor is slow for large repos. Switch to a faster
implementation based on reducing the fraction of all files that are scanned
and caching the results of case conversion lazily rather than precomputing.
diff --git a/mercurial/cmdutil.py b/mercurial/cmdutil.py
--- a/mercurial/cmdutil.py
+++ b/mercurial/cmdutil.py
@@ -1202,12 +1202,15 @@
cca = None
abort, warn = scmutil.checkportabilityalert(ui)
if abort or warn:
- cca = scmutil.casecollisionauditor(ui, abort, wctx)
+ cca = scmutil.casecollisionauditor(repo.dirstate.sortedfiles())
for f in repo.walk(match):
exact = match.exact(f)
if exact or not explicitonly and f not in repo.dirstate:
- if cca:
- cca(f)
+ if cca and cca.collides(f):
+ msg = _('possible case-folding collision for %s') % f
+ if abort:
+ raise util.Abort(msg)
+ ui.warn(_("warning: %s\n") % msg)
names.append(f)
if ui.verbose or not exact:
ui.status(_('adding %s\n') % match.rel(join(f)))
diff --git a/mercurial/scmutil.py b/mercurial/scmutil.py
--- a/mercurial/scmutil.py
+++ b/mercurial/scmutil.py
@@ -9,6 +9,7 @@
import util, error, osutil, revset, similar, encoding
import match as matchmod
import os, errno, re, stat, sys, glob
+import bisect
def nochangesfound(ui, secretlist=None):
'''report no changes for push/pull'''
@@ -49,22 +50,50 @@
return abort, warn
class casecollisionauditor(object):
- def __init__(self, ui, abort, existingiter):
- self._ui = ui
- self._abort = abort
- self._map = {}
- for f in existingiter:
- self._map[encoding.lower(f)] = f
+ def __init__(self, sortedfiles):
+ self._lowerfiles = set()
+ self._lowereddirs = set()
+ self._sortedfiles = sortedfiles
- def __call__(self, f):
- fl = encoding.lower(f)
- map = self._map
- if fl in map and map[fl] != f:
- msg = _('possible case-folding collision for %s') % f
- if self._abort:
- raise util.Abort(msg)
- self._ui.warn(_("warning: %s\n") % msg)
- map[fl] = f
+ def collides(self, path):
+ """
+ Check if path has a case collision with another file, either
+ one we've checked before or one in a sorted list of files.
+ Returns true iff there is a collision.
+ """
+ dirn = os.path.dirname(path)
+ dirlevel = path.count('/')
+ hit = False
+ if dirn in self._lowereddirs:
+ # All files in dirn (but not subdirs) have been lowercased so we
+ # need only check for the lowered filename in the cache of lowered
+ # files.
+ return encoding.lower(path) in self._lowerfiles
+ else:
+ lowerpath = encoding.lower(path)
+ # If we're checking a/b/c, and we know there's already a file in the
+ # repo in the dir a/b, then we know that the 'a/b' component can not
+ # have a case collision, so we can limit our search to files in the
+ # tree under a/b. The prefixmatch call is to find the longest path
+ # component we can use to limit the search.
+ prefix = util.prefixmatch(path, self._sortedfiles)
+ prefixdirslash = os.path.dirname(prefix) + "/"
+ if prefixdirslash == "/":
+ prefixdirslash = ""
+ startidx = bisect.bisect_right(self._sortedfiles, prefixdirslash)
+ hit = False
+ for idx in xrange(startidx, len(self._sortedfiles)):
+ f = self._sortedfiles[idx]
+ if not f.startswith(prefixdirslash):
+ break
+ if f.count('/') != dirlevel:
+ continue
+ lowerf = encoding.lower(f)
+ self._lowerfiles.add(lowerf)
+ if lowerf == lowerpath and f != path:
+ hit = True
+ self._lowereddirs.add(dirn)
+ return hit
class pathauditor(object):
'''ensure that a filesystem path contains no banned components.
diff --git a/tests/test-casecollision.t b/tests/test-casecollision.t
--- a/tests/test-casecollision.t
+++ b/tests/test-casecollision.t
@@ -31,6 +31,23 @@
$ hg st
A A
A a
+ $ mkdir b
+ $ touch b/c b/D
+ $ hg add b
+ adding b/D
+ adding b/c
+ $ touch b/d b/C
+ $ hg add b/C
+ warning: possible case-folding collision for b/C
+ $ hg add b/d
+ warning: possible case-folding collision for b/d
+ $ touch b/a1 b/a2
+ $ hg add b
+ adding b/a1
+ adding b/a2
+ $ touch b/A2 b/a1.1
+ $ hg add b/a1.1 b/A2
+ warning: possible case-folding collision for b/A2
case changing rename must not warn or abort
More information about the Mercurial-devel
mailing list