[PATCH 08 of 11] util: declare wire protocol support of compression engines
Augie Fackler
raf at durin42.com
Mon Nov 21 22:57:42 UTC 2016
On Sun, Nov 20, 2016 at 02:23:45PM -0800, Gregory Szorc wrote:
> # HG changeset patch
> # User Gregory Szorc <gregory.szorc at gmail.com>
> # Date 1479679442 28800
> # Sun Nov 20 14:04:02 2016 -0800
> # Node ID 9e0c42d347fd8bcba87561c92fc93b3ba597ec6f
> # Parent a550b00e82d531756e73869055123fa6682effe0
> util: declare wire protocol support of compression engines
>
> This patch implements a new compression engine API allowing
> compression engines to declare support for the wire protocol.
>
> Support is declared by returning a 2 character compression format
> identifier that will be added to payloads to signal the compression
> type of data that follows and default integer priorities of the
> engine.
>
> Accessor methods have been added to the compression engine manager
> class to facilitate use.
>
> Note that the "none" and "bz2" engines declare wire protocol support
> but aren't enabled by default due to their priorities being 0. It
> is essentially free to support these compression formats. So why not.
>
> diff --git a/mercurial/help/internals/wireprotocol.txt b/mercurial/help/internals/wireprotocol.txt
> --- a/mercurial/help/internals/wireprotocol.txt
> +++ b/mercurial/help/internals/wireprotocol.txt
> @@ -265,6 +265,17 @@ The compression format strings are 2 byt
> 2 byte *header* values at the beginning of ``application/mercurial-0.2``
> media types (as used by the HTTP transport).
>
> +The identifiers used by the official Mercurial distribution are:
> +
> +BZ
> + bzip2
> +UN
> + uncompressed / raw data
> +ZL
> + zlib (no gzip header)
> +ZS
> + zstd
> +
> This capability was introduced in Mercurial 4.1 (released February
> 2017).
>
> diff --git a/mercurial/util.py b/mercurial/util.py
> --- a/mercurial/util.py
> +++ b/mercurial/util.py
> @@ -2966,6 +2966,8 @@ class compressormanager(object):
> self._bundlenames = {}
> # Internal bundle identifier to engine name.
> self._bundletypes = {}
> + # Wire proto identifier to engine name.
> + self._wiretypes = {}
>
> def __getitem__(self, key):
> return self._engines[key]
> @@ -3007,6 +3009,16 @@ class compressormanager(object):
>
> self._bundletypes[bundletype] = name
>
> + wireinfo = engine.wireprotosupport()
> + if wireinfo:
> + wiretype = wireinfo[0]
> + if wiretype in self._wiretypes:
> + raise error.Abort(_('wire protocol compression %s already '
> + 'registered by %s') %
> + (wiretype, self._wiretypes[wiretype]))
> +
> + self._wiretypes[wiretype] = name
> +
> self._engines[name] = engine
>
> @property
> @@ -3043,6 +3055,32 @@ class compressormanager(object):
> engine.name())
> return engine
>
> + def supportedwireengines(self, role, onlyavailable=False):
> + """Obtain compression engines that support the wire protocol.
> +
> + Returns a list of engines in prioritized order, most desired first.
> +
> + If ``onlyavailable`` is set, filter out engines that can't be
I'd rather be explicit here, so I suggest:
s/set/true (the default)/
> + loaded.
> + """
> + assert role in ('client', 'server')
Can I interest you in having module-level constants for ROLE_CLIENT
and ROLE_SERVER so we don't get these strings passed around like magic
everywhere?
> +
> + engines = [self._engines[e] for e in self._wiretypes.values()]
> + if onlyavailable:
> + engines = [e for e in engines if e.available()]
> +
> + idx = 1 if role == 'server' else 2
> +
> + return list(sorted(engines, key=lambda e: e.wireprotosupport()[idx],
> + reverse=True))
> +
> + def forwiretype(self, wiretype):
> + engine = self._engines[self._wiretypes[wiretype]]
> + if not engine.available():
> + raise error.Abort(_('compression engine %s could not be loaded') %
> + engine.name())
> + return engine
> +
> compengines = compressormanager()
>
> class compressionengine(object):
> @@ -3083,6 +3121,30 @@ class compressionengine(object):
> """
> return None
>
> + def wireprotosupport(self):
> + """Declare support for this compression format on the wire protocol.
> +
> + If this compression engine isn't supported for compressing wire
> + protocol payloads, returns None.
> +
> + Otherwise, returns a 3-tuple of the following elements:
> +
> + * 2 byte format identifier
> + * Integer priority for the server
> + * Integer priority for the client
> +
> + The integer priorities are used to order the advertisement of format
> + support by server and client. The highest integer is advertised
> + first. Integers with non-positive values aren't advertised.
> +
> + The priority values are somewhat arbitrary and only used for default
> + ordering. The relative order can be changed via config options.
> +
> + If wire protocol compression is supported, the class must also implement
> + ``compressstream`` and ``decompressorreader``.
> + """
> + return None
> +
> def compressstream(self, it, opts=None):
> """Compress an iterator of chunks.
>
> @@ -3111,6 +3173,9 @@ class _zlibengine(compressionengine):
> def bundletype(self):
> return 'gzip', 'GZ'
>
> + def wireprotosupport(self):
> + return 'ZL', 20, 20
> +
> def compressstream(self, it, opts=None):
> opts = opts or {}
>
> @@ -3141,6 +3206,11 @@ class _bz2engine(compressionengine):
> def bundletype(self):
> return 'bzip2', 'BZ'
>
> + # We declare a protocol name but don't advertise by default because
> + # it is slow.
> + def wireprotosupport(self):
> + return 'BZ', 0, 0
I assume not only do we not advertise it but if a client tries to
demand it we'll refuse?
> +
> def compressstream(self, it, opts=None):
> opts = opts or {}
> z = bz2.BZ2Compressor(opts.get('level', 9))
> @@ -3189,6 +3259,12 @@ class _noopengine(compressionengine):
> def bundletype(self):
> return 'none', 'UN'
>
> + # Clients always support uncompressed payloads. Servers don't because
> + # unless you are on a fast network, uncompressed payloads can easily
> + # saturate your network pipe.
> + def wireprotosupport(self):
> + return 'UN', 0, 10
> +
> def compressstream(self, it, opts=None):
> return it
>
> @@ -3219,6 +3295,9 @@ class _zstdengine(compressionengine):
> def bundletype(self):
> return 'zstd', 'ZS'
>
> + def wireprotosupport(self):
> + return 'ZS', 50, 50
> +
> def compressstream(self, it, opts=None):
> opts = opts or {}
> # zstd level 3 is almost always significantly faster than zlib
More information about the Mercurial-devel
mailing list