[PATCH] Proposed patch: support for Python functions as .hgignore filters

Boris Figovsky borfig at gmail.com
Mon Jun 28 20:07:45 UTC 2010


Hello,
In a project I work on, our build system generated a lot of files in
the working directory,
and we wanted Mercurial to ignore them, but the current .hgignore
syntax is not enough.
We thought we could use Python functions, such as os.path.islink or
mymodule.py's is_ignored() func.
The outcome is the attached patch.

# HG changeset patch
# User Boris Figovsky <borfig at gmail.com>
# Date 1277754913 -10800
# Node ID 82a012d54714d2e091477a63a59434ececb873bc
# Parent  8b452fe4bf506a1a08bbc7e1bac81aceda8f4d10
support for Python functions as .hgignore filter
diff -r 8b452fe4bf50 -r 82a012d54714 doc/hgignore.5.txt
--- a/doc/hgignore.5.txt Mon Jun 21 17:02:48 2010 -0300
+++ b/doc/hgignore.5.txt Mon Jun 28 22:55:13 2010 +0300
@@ -66,6 +66,8 @@
   Regular expression, Python/Perl syntax.
 ``glob``
   Shell-style glob.
+``python``
+  Python function names.

 The chosen syntax stays in effect when parsing all patterns that
 follow, until another syntax is selected.
@@ -75,6 +77,11 @@
 and a regexp pattern of the form ``\.c$`` will do the same. To root a
 regexp pattern, start it with ``^``.

+A Python function name has the form path:[submodule.]func, where path
+is a python file or package, or module.[submodule.[...]]func.
+The function must receive exactly one file-name and return True if it
+is to be ignored.
+
 Example
 -------

@@ -91,6 +98,9 @@
   syntax: regexp
   ^\.pc/

+  # We don't like symlinks
+  python:os.path.islink
+
 Author
 ------
 Vadim Gelfer <vadim.gelfer at gmail.com>
diff -r 8b452fe4bf50 -r 82a012d54714 mercurial/ignore.py
--- a/mercurial/ignore.py Mon Jun 21 17:02:48 2010 -0300
+++ b/mercurial/ignore.py Mon Jun 28 22:55:13 2010 +0300
@@ -15,7 +15,7 @@
     '''parse lines (iterable) of .hgignore text, returning a tuple of
     (patterns, parse errors). These patterns should be given to compile()
     to be validated and converted into a match function.'''
-    syntaxes = {'re': 'relre:', 'regexp': 'relre:', 'glob': 'relglob:'}
+    syntaxes = {'re': 'relre:', 'regexp': 'relre:', 'glob':
'relglob:', 'python':'python:'}
     syntax = 'relre:'
     patterns = []
     warnings = []
diff -r 8b452fe4bf50 -r 82a012d54714 mercurial/match.py
--- a/mercurial/match.py Mon Jun 21 17:02:48 2010 -0300
+++ b/mercurial/match.py Mon Jun 28 22:55:13 2010 +0300
@@ -7,6 +7,7 @@

 import re
 import util
+import extensions

 class match(object):
     def __init__(self, root, cwd, patterns, include=[], exclude=[],
@@ -29,6 +30,7 @@
         'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
         'relpath:<path>' - a path relative to cwd
         'relre:<regexp>' - a regexp that needn't match the start of a name
+        'python:[import_path:]module.func' - a function that receives
the name and returns True if it matches
         '<something>' - a pattern of the specified default type
         """

@@ -115,7 +117,7 @@
     actual pattern."""
     if ':' in pat:
         kind, val = pat.split(':', 1)
-        if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre'):
+        if kind in ('re', 'glob', 'path', 'relglob', 'relpath',
'relre', 'python'):
             return kind, val
     return default, pat

@@ -193,8 +195,41 @@
         return '.*' + name
     return _globre(name) + tail

+def _extended_getattr(obj, attrs):
+    for attr in attrs.split('.'):
+        obj = getattr(obj, attr)
+    return obj
+
+def _pythonmatch(function_pattern):
+    if ':' in function_pattern: # load from a module
+        path, rest = function_pattern.rsplit(':', 1)
+        mod = extensions.loadpath(path, path.replace('.',
'_').replace('/','__'))
+    else: # __import__ should work?
+        module_name, rest = function_pattern.split('.',1)
+        mod = __import__(module_name)
+
+    func = _extended_getattr(mod, rest)
+    return func
+
 def _buildmatch(pats, tail):
     """build a matching function from a set of patterns"""
+
+    matchs = []
+    non_python_pats = []
+    for k, p in pats:
+        if k != 'python':
+            non_python_pats.append((k, p))
+            continue
+        if non_python_pats:
+            matchs.append(_regexmatch(pats, tail))
+            del non_python_pats[:]
+        matchs.append(_pythonmatch(p))
+    if non_python_pats:
+        matchs.append(_regexmatch(pats, tail))
+    return lambda s: any(m(s) for m in matchs)
+
+def _regexmatch(pats, tail):
+    """build a matching function from a set of regexable patterns"""
     try:
         pat = '(?:%s)' % '|'.join([_regex(k, p, tail) for (k, p) in pats])
         if len(pat) > 20000:
@@ -207,7 +242,7 @@
         l = len(pats)
         if l < 2:
             raise
-        a, b = _buildmatch(pats[:l//2], tail), _buildmatch(pats[l//2:], tail)
+        a, b = _regexmatch(pats[:l//2], tail), _regexmatch(pats[l//2:], tail)
         return lambda s: a(s) or b(s)
     except re.error:
         for k, p in pats:
diff -r 8b452fe4bf50 -r 82a012d54714 tests/test-hgignore
--- a/tests/test-hgignore Mon Jun 21 17:02:48 2010 -0300
+++ b/tests/test-hgignore Mon Jun 28 22:55:13 2010 +0300
@@ -69,3 +69,27 @@

 cd dir
 echo "--" ; hg status .
+
+# new feature: python pattern for .hgignore
+cd ..
+rm .hgignore
+touch .hgignore
+ln -s a.c alink.c
+echo "--" ; hg status
+echo "python:os.path.islink" > .hgignore
+echo "--" ; hg status
+echo "glob:*.o" > .hgignore
+echo "syntax: python" >> .hgignore
+echo "os.path.islink" >> .hgignore
+echo "--" ; hg status
+cat <<EOF > ignorer.py
+def ignore(filename):
+    print 'checking', filename
+    return 'py' in filename
+EOF
+
+echo "glob:*.o" > .hgignore
+echo "syntax: python" >> .hgignore
+echo "python:`pwd`/ignorer.py:ignore" >> .hgignore
+echo "glob:*.c" >> .hgignore
+echo "--" ; hg status
diff -r 8b452fe4bf50 -r 82a012d54714 tests/test-hgignore.out
--- a/tests/test-hgignore.out Mon Jun 21 17:02:48 2010 -0300
+++ b/tests/test-hgignore.out Mon Jun 28 22:55:13 2010 +0300
@@ -57,3 +57,32 @@
 A dir/b.o
 --
 A b.o
+--
+A dir/b.o
+? .hgignore
+? a.c
+? a.o
+? alink.c
+? dir/c.o
+? syntax
+--
+A dir/b.o
+? .hgignore
+? a.c
+? a.o
+? dir/c.o
+? syntax
+--
+A dir/b.o
+? .hgignore
+? a.c
+? syntax
+--
+checking ignorer.pyc
+checking ignorer.py
+checking .hgignore
+checking dir
+checking syntax
+A dir/b.o
+? .hgignore
+? syntax



More information about the Mercurial-devel mailing list