[PATCH 3 of 3] lfs: migrate file filtering from threshold to custom filter

Matt Harbison mharbison72 at gmail.com
Fri Jan 5 04:58:56 UTC 2018


# HG changeset patch
# User Matt Harbison <matt_harbison at yahoo.com>
# Date 1514706889 18000
#      Sun Dec 31 02:54:49 2017 -0500
# Node ID 860f79f99faded4b711381b827eced30c0009e31
# Parent  8c20ade835ce43441c61e56e63d9bf92deaacd55
lfs: migrate file filtering from threshold to custom filter

This patch was authored by Jun Wu for the fb-experimental repo, to avoid using
matcher for efficiency[1].  All I've changed here is to register the new
'lfs.track' default, so that the tests run cleanly.  Migrating the remaining
uses of 'lfs.threshold' can be done separately since there's a fallback in
place.

Migrate `lfs.threshold` to more powerful `lfs.filter` added by D4990618 so
people can specify what files to be stored in LFS with more flexibility

[1] https://www.mercurial-scm.org/pipermail/mercurial-devel/2017-December/109388.html

diff --git a/hgext/lfs/__init__.py b/hgext/lfs/__init__.py
--- a/hgext/lfs/__init__.py
+++ b/hgext/lfs/__init__.py
@@ -19,8 +19,17 @@
     # (default: unset)
     url = https://example.com/lfs
 
-    # size of a file to make it use LFS
-    threshold = 10M
+    # Which files to track in LFS. It could be a combination of ".extname",
+    # ">size", "/under/some/directory" with logic operations "|" (or), "&"
+    # (and), "!" (not) and parentheses. Some examples:
+    # - always                # everything
+    # - >20MB                 # larger than 20MB
+    # - !.txt                 # except for .txt files
+    # - .zip | .tar.gz | .7z  # some types of compressed files
+    # -  /bin                 # files under "bin" in the project root
+    # - (.php & >2MB) | (.js & >5MB) | .tar.gz | (/bin & !/bin/README) | >1GB
+    # (default: !always (never))
+    track = >10M
 
     # how many times to retry before giving up on transferring an object
     retry = 5
@@ -54,6 +63,7 @@
 
 from . import (
     blobstore,
+    filterlang,
     wrapper,
 )
 
@@ -76,9 +86,13 @@
 configitem('lfs', 'usercache',
     default=None,
 )
+# Deprecated
 configitem('lfs', 'threshold',
     default=None,
 )
+configitem('lfs', 'track',
+    default='!always',
+)
 configitem('lfs', 'retry',
     default=5,
 )
@@ -112,9 +126,14 @@
     if not repo.local():
         return
 
-    threshold = repo.ui.configbytes('lfs', 'threshold')
+    trackspec = repo.ui.config('lfs', 'track')
 
-    repo.svfs.options['lfsthreshold'] = threshold
+    # deprecated config: lfs.threshold
+    threshold = repo.ui.configbytes('lfs', 'threshold')
+    if threshold:
+        trackspec = "(%s) | >%s" % (trackspec, threshold)
+
+    repo.svfs.options['lfstrack'] = filterlang.compile(trackspec)
     repo.svfs.lfslocalblobstore = blobstore.local(repo)
     repo.svfs.lfsremoteblobstore = blobstore.remote(repo)
 
@@ -143,6 +162,7 @@
 def wrapfilelog(filelog):
     wrapfunction = extensions.wrapfunction
 
+    wrapfunction(filelog, '__init__', wrapper.fileloginit)
     wrapfunction(filelog, 'addrevision', wrapper.filelogaddrevision)
     wrapfunction(filelog, 'renamed', wrapper.filelogrenamed)
     wrapfunction(filelog, 'size', wrapper.filelogsize)
diff --git a/hgext/lfs/wrapper.py b/hgext/lfs/wrapper.py
--- a/hgext/lfs/wrapper.py
+++ b/hgext/lfs/wrapper.py
@@ -120,17 +120,22 @@
     flags = rlog.flags(rev)
     return bool(flags & revlog.REVIDX_EXTSTORED)
 
+def fileloginit(orig, self, opener, path, *args, **kwargs):
+    # record filename so it can be tested in addrevision
+    self.filename = path
+    orig(self, opener, path, *args, **kwargs)
+
 def filelogaddrevision(orig, self, text, transaction, link, p1, p2,
                        cachedelta=None, node=None,
                        flags=revlog.REVIDX_DEFAULT_FLAGS, **kwds):
-    threshold = self.opener.options['lfsthreshold']
     textlen = len(text)
     # exclude hg rename meta from file size
     meta, offset = filelog.parsemeta(text)
     if offset:
         textlen -= offset
 
-    if threshold and textlen > threshold:
+    lfstrack = self.opener.options['lfstrack']
+    if lfstrack(self.filename, textlen):
         flags |= revlog.REVIDX_EXTSTORED
 
     return orig(self, text, transaction, link, p1, p2, cachedelta=cachedelta,
diff --git a/tests/test-lfs-test-server.t b/tests/test-lfs-test-server.t
--- a/tests/test-lfs-test-server.t
+++ b/tests/test-lfs-test-server.t
@@ -30,7 +30,7 @@
   > lfs=
   > [lfs]
   > url=http://foo:bar@$LFS_HOST/
-  > threshold=1
+  > track=always
   > EOF
 
   $ hg init repo1
diff --git a/tests/test-lfs.t b/tests/test-lfs.t
--- a/tests/test-lfs.t
+++ b/tests/test-lfs.t
@@ -140,7 +140,7 @@
   $ cd repo3
   $ cat >> .hg/hgrc << EOF
   > [lfs]
-  > threshold=10B
+  > track=>10B
   > EOF
 
   $ echo LONGER-THAN-TEN-BYTES-WILL-TRIGGER-LFS > large
@@ -203,7 +203,7 @@
   $ cd repo6
   $ cat >> .hg/hgrc << EOF
   > [lfs]
-  > threshold=30B
+  > track=>30B
   > EOF
 
   $ echo LARGE-BECAUSE-IT-IS-MORE-THAN-30-BYTES > large
@@ -239,7 +239,7 @@
   $ cd repo8
   $ cat >> .hg/hgrc << EOF
   > [lfs]
-  > threshold=10B
+  > track=>10B
   > EOF
 
   $ echo THIS-IS-LFS-BECAUSE-10-BYTES > a1
@@ -320,7 +320,7 @@
   $ cd repo9
   $ cat >> .hg/hgrc << EOF
   > [lfs]
-  > threshold=10B
+  > track=>10B
   > [diff]
   > git=1
   > EOF
@@ -451,10 +451,8 @@
   $ hg init repo10
   $ cd repo10
   $ cat >> .hg/hgrc << EOF
-  > [extensions]
-  > lfs=
   > [lfs]
-  > threshold=1
+  > track=always
   > EOF
   $ $PYTHON <<'EOF'
   > def write(path, content):
@@ -542,6 +540,46 @@
 
   $ cd ..
 
+# Test filter
+
+  $ hg init repo11
+  $ cd repo11
+  $ cat >> .hg/hgrc << EOF
+  > [lfs]
+  > track=(.a & >5B) | (.b & !>5B) | (.c & /d & !/d/c.c) | >10B
+  > EOF
+
+  $ mkdir a
+  $ echo aaaaaa > a/1.a
+  $ echo a > a/2.a
+  $ echo aaaaaa > 1.b
+  $ echo a > 2.b
+  $ echo a > 1.c
+  $ mkdir d
+  $ echo a > d/c.c
+  $ echo a > d/d.c
+  $ echo aaaaaaaaaaaa > x
+  $ hg add . -q
+  $ hg commit -m files
+
+  $ for p in a/1.a a/2.a 1.b 2.b 1.c d/c.c d/d.c x; do
+  >   if hg debugdata $p 0 2>&1 | grep git-lfs >/dev/null; then
+  >     echo "${p}: is lfs"
+  >   else
+  >     echo "${p}: not lfs"
+  >   fi
+  > done
+  a/1.a: is lfs
+  a/2.a: not lfs
+  1.b: not lfs
+  2.b: is lfs
+  1.c: not lfs
+  d/c.c: not lfs
+  d/d.c: is lfs
+  x: is lfs
+
+  $ cd ..
+
 # Verify the repos
 
   $ cat > $TESTTMP/dumpflog.py << EOF



More information about the Mercurial-devel mailing list