[PATCH] .hgskip support

Matt Mackall mpm at selenic.com
Thu Jul 7 08:13:58 UTC 2005


On Thu, Jul 07, 2005 at 04:02:21AM +0100, Mark Williamson wrote:
> This has got the times down to about 370ms for both the large and
> small repositories. I suspect much of the rest of the performance is
> going in the extra regexp checking, though: our .hgignore file has
> 183 entries. We're now having to test the match on every directory,
> rather than just for files we don't know about.

Perhaps something like this will help, which builds a single big
ignore regex:

diff -r 8c89408a7154 mercurial/hg.py
--- a/mercurial/hg.py	Thu Jul  7 06:41:56 2005
+++ b/mercurial/hg.py	Thu Jul  7 01:07:21 2005
@@ -487,7 +487,7 @@
         self.wopener = opener(self.root)
         self.manifest = manifest(self.opener)
         self.changelog = changelog(self.opener)
-        self.ignorelist = None
+        self.ignorefunc = None
         self.tagscache = None
         self.nodetagscache = None
 
@@ -498,17 +498,22 @@
             except IOError: pass
 
     def ignore(self, f):
-        if self.ignorelist is None:
-            self.ignorelist = []
+        if not self.ignorefunc:
+            bigpat = []
             try:
                 l = file(self.wjoin(".hgignore"))
                 for pat in l:
                     if pat != "\n":
-                        self.ignorelist.append(re.compile(util.pconvert(pat[:-1])))
+                        bigpat.append(util.pconvert(pat[:-1]))
             except IOError: pass
-        for pat in self.ignorelist:
-            if pat.search(f): return True
-        return False
+            if bigpat:
+                s = "(%s)" % (")|(".join(bigpat))
+                r = re.compile(s)
+                self.ignorefunc = lambda x: r.search(x)
+            else:
+                self.ignorefunc = lambda x: False
+
+        return self.ignorefunc(f)
 
     def hook(self, name, **args):
         s = self.ui.config("hooks", name)


Works here, but you've obviously got a more demanding test environment
than I do, so give it a spin.

> diff -r 8b8f710bb658 -r 9d971c2fd3bb mercurial/hg.py
> --- a/mercurial/hg.py	Wed Jul  6 02:23:56 2005
> +++ b/mercurial/hg.py	Thu Jul  7 02:52:35 2005
> @@ -297,33 +297,46 @@
>                  if os.path.isdir(f):
>                      for dir, subdirs, fl in os.walk(f):
>                          d = dir[len(self.root) + 1:]
> -                        if ".hg" in subdirs: subdirs.remove(".hg")
> +                        if ".hg" in subdirs:
> +                            subdirs.remove(".hg")

I should just work this into ignore somehow.

> +                        for sd in subdirs:
> +                            if ignore(os.path.join(d, sd + '/')):
> +                                subdirs.remove(sd)
>                          for fn in fl:
>                              fn = util.pconvert(os.path.join(d, fn))
>                              yield fn
>                  else:
>                      yield f[len(self.root) + 1:]
>  
> -        for fn in util.unique(walk(files)):
> +        seen = {}
> +
> +        for fn in dc:
>              try: s = os.stat(os.path.join(self.root, fn))
>              except: continue
>  
> -            if fn in dc:
> -                c = dc[fn]
> -                del dc[fn]
> -
> -                if c[0] == 'm':
> -                    changed.append(fn)
> -                elif c[0] == 'a':
> -                    added.append(fn)
> -                elif c[0] == 'r':
> -                    unknown.append(fn)
> -                elif c[2] != s.st_size or (c[1] ^ s.st_mode) & 0100:
> -                    changed.append(fn)
> -                elif c[1] != s.st_mode or c[3] != s.st_mtime:
> -                    lookup.append(fn)
> -            else:
> -                if not ignore(fn): unknown.append(fn)
> +            c = dc[fn]
> +
> +            if c[0] == 'm':
> +                changed.append(fn)
> +            elif c[0] == 'a':
> +                added.append(fn)
> +            elif c[0] == 'r':
> +                unknown.append(fn)
> +            elif c[2] != s.st_size or (c[1] ^ s.st_mode) & 0100:
> +                changed.append(fn)
> +            elif c[1] != s.st_mode or c[3] != s.st_mtime:
> +                lookup.append(fn)
> +
> +            seen[fn] = 1
> +
> +        for fn in seen:
> +            del dc[fn]
> +
> +        for fn in util.unique(walk(files), seen):
> +            try: s = os.stat(os.path.join(self.root, fn))
> +            except: continue
> +
> +            if not ignore(fn): unknown.append(fn)

Don't like this much. Perhaps we can just make a copy of dc.keys()?

> +def unique(g, seen=None):
> +    if seen == None:
> +        seen = {}

Hmm, I'd completely forgotten about Python's default argument wart here.

-- 
Mathematics is the supreme nostalgia of our time.



More information about the Mercurial mailing list