[PATCH 3 of 3] lfs: migrate file filtering from threshold to custom filter
Matt Harbison
mharbison72 at gmail.com
Fri Jan 5 04:58:56 UTC 2018
# HG changeset patch
# User Matt Harbison <matt_harbison at yahoo.com>
# Date 1514706889 18000
# Sun Dec 31 02:54:49 2017 -0500
# Node ID 860f79f99faded4b711381b827eced30c0009e31
# Parent 8c20ade835ce43441c61e56e63d9bf92deaacd55
lfs: migrate file filtering from threshold to custom filter
This patch was authored by Jun Wu for the fb-experimental repo, to avoid using
matcher for efficiency[1]. All I've changed here is to register the new
'lfs.track' default, so that the tests run cleanly. Migrating the remaining
uses of 'lfs.threshold' can be done separately since there's a fallback in
place.
Migrate `lfs.threshold` to more powerful `lfs.filter` added by D4990618 so
people can specify what files to be stored in LFS with more flexibility
[1] https://www.mercurial-scm.org/pipermail/mercurial-devel/2017-December/109388.html
diff --git a/hgext/lfs/__init__.py b/hgext/lfs/__init__.py
--- a/hgext/lfs/__init__.py
+++ b/hgext/lfs/__init__.py
@@ -19,8 +19,17 @@
# (default: unset)
url = https://example.com/lfs
- # size of a file to make it use LFS
- threshold = 10M
+ # Which files to track in LFS. It could be a combination of ".extname",
+ # ">size", "/under/some/directory" with logic operations "|" (or), "&"
+ # (and), "!" (not) and parentheses. Some examples:
+ # - always # everything
+ # - >20MB # larger than 20MB
+ # - !.txt # except for .txt files
+ # - .zip | .tar.gz | .7z # some types of compressed files
+ # - /bin # files under "bin" in the project root
+ # - (.php & >2MB) | (.js & >5MB) | .tar.gz | (/bin & !/bin/README) | >1GB
+ # (default: !always (never))
+ track = >10M
# how many times to retry before giving up on transferring an object
retry = 5
@@ -54,6 +63,7 @@
from . import (
blobstore,
+ filterlang,
wrapper,
)
@@ -76,9 +86,13 @@
configitem('lfs', 'usercache',
default=None,
)
+# Deprecated
configitem('lfs', 'threshold',
default=None,
)
+configitem('lfs', 'track',
+ default='!always',
+)
configitem('lfs', 'retry',
default=5,
)
@@ -112,9 +126,14 @@
if not repo.local():
return
- threshold = repo.ui.configbytes('lfs', 'threshold')
+ trackspec = repo.ui.config('lfs', 'track')
- repo.svfs.options['lfsthreshold'] = threshold
+ # deprecated config: lfs.threshold
+ threshold = repo.ui.configbytes('lfs', 'threshold')
+ if threshold:
+ trackspec = "(%s) | >%s" % (trackspec, threshold)
+
+ repo.svfs.options['lfstrack'] = filterlang.compile(trackspec)
repo.svfs.lfslocalblobstore = blobstore.local(repo)
repo.svfs.lfsremoteblobstore = blobstore.remote(repo)
@@ -143,6 +162,7 @@
def wrapfilelog(filelog):
wrapfunction = extensions.wrapfunction
+ wrapfunction(filelog, '__init__', wrapper.fileloginit)
wrapfunction(filelog, 'addrevision', wrapper.filelogaddrevision)
wrapfunction(filelog, 'renamed', wrapper.filelogrenamed)
wrapfunction(filelog, 'size', wrapper.filelogsize)
diff --git a/hgext/lfs/wrapper.py b/hgext/lfs/wrapper.py
--- a/hgext/lfs/wrapper.py
+++ b/hgext/lfs/wrapper.py
@@ -120,17 +120,22 @@
flags = rlog.flags(rev)
return bool(flags & revlog.REVIDX_EXTSTORED)
+def fileloginit(orig, self, opener, path, *args, **kwargs):
+ # record filename so it can be tested in addrevision
+ self.filename = path
+ orig(self, opener, path, *args, **kwargs)
+
def filelogaddrevision(orig, self, text, transaction, link, p1, p2,
cachedelta=None, node=None,
flags=revlog.REVIDX_DEFAULT_FLAGS, **kwds):
- threshold = self.opener.options['lfsthreshold']
textlen = len(text)
# exclude hg rename meta from file size
meta, offset = filelog.parsemeta(text)
if offset:
textlen -= offset
- if threshold and textlen > threshold:
+ lfstrack = self.opener.options['lfstrack']
+ if lfstrack(self.filename, textlen):
flags |= revlog.REVIDX_EXTSTORED
return orig(self, text, transaction, link, p1, p2, cachedelta=cachedelta,
diff --git a/tests/test-lfs-test-server.t b/tests/test-lfs-test-server.t
--- a/tests/test-lfs-test-server.t
+++ b/tests/test-lfs-test-server.t
@@ -30,7 +30,7 @@
> lfs=
> [lfs]
> url=http://foo:bar@$LFS_HOST/
- > threshold=1
+ > track=always
> EOF
$ hg init repo1
diff --git a/tests/test-lfs.t b/tests/test-lfs.t
--- a/tests/test-lfs.t
+++ b/tests/test-lfs.t
@@ -140,7 +140,7 @@
$ cd repo3
$ cat >> .hg/hgrc << EOF
> [lfs]
- > threshold=10B
+ > track=>10B
> EOF
$ echo LONGER-THAN-TEN-BYTES-WILL-TRIGGER-LFS > large
@@ -203,7 +203,7 @@
$ cd repo6
$ cat >> .hg/hgrc << EOF
> [lfs]
- > threshold=30B
+ > track=>30B
> EOF
$ echo LARGE-BECAUSE-IT-IS-MORE-THAN-30-BYTES > large
@@ -239,7 +239,7 @@
$ cd repo8
$ cat >> .hg/hgrc << EOF
> [lfs]
- > threshold=10B
+ > track=>10B
> EOF
$ echo THIS-IS-LFS-BECAUSE-10-BYTES > a1
@@ -320,7 +320,7 @@
$ cd repo9
$ cat >> .hg/hgrc << EOF
> [lfs]
- > threshold=10B
+ > track=>10B
> [diff]
> git=1
> EOF
@@ -451,10 +451,8 @@
$ hg init repo10
$ cd repo10
$ cat >> .hg/hgrc << EOF
- > [extensions]
- > lfs=
> [lfs]
- > threshold=1
+ > track=always
> EOF
$ $PYTHON <<'EOF'
> def write(path, content):
@@ -542,6 +540,46 @@
$ cd ..
+# Test filter
+
+ $ hg init repo11
+ $ cd repo11
+ $ cat >> .hg/hgrc << EOF
+ > [lfs]
+ > track=(.a & >5B) | (.b & !>5B) | (.c & /d & !/d/c.c) | >10B
+ > EOF
+
+ $ mkdir a
+ $ echo aaaaaa > a/1.a
+ $ echo a > a/2.a
+ $ echo aaaaaa > 1.b
+ $ echo a > 2.b
+ $ echo a > 1.c
+ $ mkdir d
+ $ echo a > d/c.c
+ $ echo a > d/d.c
+ $ echo aaaaaaaaaaaa > x
+ $ hg add . -q
+ $ hg commit -m files
+
+ $ for p in a/1.a a/2.a 1.b 2.b 1.c d/c.c d/d.c x; do
+ > if hg debugdata $p 0 2>&1 | grep git-lfs >/dev/null; then
+ > echo "${p}: is lfs"
+ > else
+ > echo "${p}: not lfs"
+ > fi
+ > done
+ a/1.a: is lfs
+ a/2.a: not lfs
+ 1.b: not lfs
+ 2.b: is lfs
+ 1.c: not lfs
+ d/c.c: not lfs
+ d/d.c: is lfs
+ x: is lfs
+
+ $ cd ..
+
# Verify the repos
$ cat > $TESTTMP/dumpflog.py << EOF
More information about the Mercurial-devel
mailing list