[PATCH] use per-directory clustered stat calls even in cases where known tree is walked
Petr Kodl
petrkodl at gmail.com
Wed Oct 1 02:43:26 UTC 2008
# HG changeset patch
# User Petr Kodl <petrkodl at gmail.com>
# Date 1222828993 14400
# Node ID a7700f7f06902e5e99ff680aaa18d548ea2dca0f
# Parent 1859e907cd28d06d1b0c1b11c32723e8767a2290
use per-directory clustered stat calls even in cases where known tree is walked
This allows diff and other calls walking known tree to benefit from osutil.c.
Some timings from clean tree with ~23k files
This latest incarnation tries to migrate most of the OS specific code
into util.
Performance remains comparable to previous version of this patch
where step 2 and step 3 in walk call were manipulated directly.
diff -r 1859e907cd28 -r a7700f7f0690 mercurial/dirstate.py
--- a/mercurial/dirstate.py Tue Sep 30 22:43:13 2008 -0400
+++ b/mercurial/dirstate.py Tue Sep 30 22:43:13 2008 -0400
@@ -533,20 +533,10 @@
results[nf] = st
elif nf in dmap and matchfn(nf):
results[nf] = None
-
# step 3: report unseen items in the dmap hash
visit = [f for f in dmap if f not in results and match(f)]
- for nf in util.sort(visit):
- results[nf] = None
- try:
- st = lstat(join(nf))
- kind = getkind(st.st_mode)
- if kind == regkind or kind == lnkkind:
- results[nf] = st
- except OSError, inst:
- if inst.errno not in (errno.ENOENT, errno.ENOTDIR):
- raise
-
+ for nf,st in util.statfiles(visit, self._rootdir):
+ results[nf] = st
del results['.hg']
return results
diff -r 1859e907cd28 -r a7700f7f0690 mercurial/util.py
--- a/mercurial/util.py Tue Sep 30 22:43:13 2008 -0400
+++ b/mercurial/util.py Tue Sep 30 22:43:13 2008 -0400
@@ -15,10 +15,8 @@
from i18n import _
import cStringIO, errno, getpass, re, shutil, sys, tempfile
import os, stat, threading, time, calendar, ConfigParser, locale, glob, osutil
-import imp, urlparse
-
+import imp, urlparse, stat
# Python compatibility
-
try:
set = set
frozenset = frozenset
@@ -799,8 +797,45 @@
'''return true if it is safe to hold open file handles to hardlinks'''
return True
+def statfiles(files, root):
+ '''for each file in files return tuple file, status
+ if file does not exist or is not regular file or link status is None
+ files are assumed to be relative path to root'''
+ lstat = os.lstat
+ getkind = stat.S_IFMT
+ goodkind = (stat.S_IFREG, stat.S_IFLNK)
+ dircache = {}
+ usecache = sys.platform=='win32'
+ for nf in sort(files):
+ st = None
+ if usecache:
+ pos = nf.rfind('/')
+ if pos==-1:
+ dir = '.'
+ base = nf.lower()
+ else:
+ dir = nf[:pos].lower()
+ base = nf[pos+1:].lower()
+ if not dir in dircache:
+ ls = []
+ if os.path.isdir(root + dir):
+ try:
+ ls = [(i[0].lower(), i[2]) for i in
+ osutil.listdir(root + dir, True)]
+ except:
+ pass
+ dircache[dir] = dict(ls)
+ st = dircache[dir].get(base, None)
+ else:
+ try:
+ st = lstat(root + nf)
+ except OSError, inst:
+ if inst.errno not in (errno.ENOENT, errno.ENOTDIR):
+ raise
+ if not st is None and not getkind(st.st_mode) in goodkind:
+ st = None
+ yield nf,st
getuser_fallback = None
-
def getuser():
'''return name of current user'''
try:
More information about the Mercurial-devel
mailing list