[PATCH 08 of 11] util: declare wire protocol support of compression engines

Augie Fackler raf at durin42.com
Mon Nov 21 22:57:42 UTC 2016


On Sun, Nov 20, 2016 at 02:23:45PM -0800, Gregory Szorc wrote:
> # HG changeset patch
> # User Gregory Szorc <gregory.szorc at gmail.com>
> # Date 1479679442 28800
> #      Sun Nov 20 14:04:02 2016 -0800
> # Node ID 9e0c42d347fd8bcba87561c92fc93b3ba597ec6f
> # Parent  a550b00e82d531756e73869055123fa6682effe0
> util: declare wire protocol support of compression engines
>
> This patch implements a new compression engine API allowing
> compression engines to declare support for the wire protocol.
>
> Support is declared by returning a 2 character compression format
> identifier that will be added to payloads to signal the compression
> type of data that follows and default integer priorities of the
> engine.
>
> Accessor methods have been added to the compression engine manager
> class to facilitate use.
>
> Note that the "none" and "bz2" engines declare wire protocol support
> but aren't enabled by default due to their priorities being 0. It
> is essentially free to support these compression formats. So why not.
>
> diff --git a/mercurial/help/internals/wireprotocol.txt b/mercurial/help/internals/wireprotocol.txt
> --- a/mercurial/help/internals/wireprotocol.txt
> +++ b/mercurial/help/internals/wireprotocol.txt
> @@ -265,6 +265,17 @@ The compression format strings are 2 byt
>  2 byte *header* values at the beginning of ``application/mercurial-0.2``
>  media types (as used by the HTTP transport).
>
> +The identifiers used by the official Mercurial distribution are:
> +
> +BZ
> +   bzip2
> +UN
> +   uncompressed / raw data
> +ZL
> +   zlib (no gzip header)
> +ZS
> +   zstd
> +
>  This capability was introduced in Mercurial 4.1 (released February
>  2017).
>
> diff --git a/mercurial/util.py b/mercurial/util.py
> --- a/mercurial/util.py
> +++ b/mercurial/util.py
> @@ -2966,6 +2966,8 @@ class compressormanager(object):
>          self._bundlenames = {}
>          # Internal bundle identifier to engine name.
>          self._bundletypes = {}
> +        # Wire proto identifier to engine name.
> +        self._wiretypes = {}
>
>      def __getitem__(self, key):
>          return self._engines[key]
> @@ -3007,6 +3009,16 @@ class compressormanager(object):
>
>              self._bundletypes[bundletype] = name
>
> +        wireinfo = engine.wireprotosupport()
> +        if wireinfo:
> +            wiretype = wireinfo[0]
> +            if wiretype in self._wiretypes:
> +                raise error.Abort(_('wire protocol compression %s already '
> +                                    'registered by %s') %
> +                                  (wiretype, self._wiretypes[wiretype]))
> +
> +            self._wiretypes[wiretype] = name
> +
>          self._engines[name] = engine
>
>      @property
> @@ -3043,6 +3055,32 @@ class compressormanager(object):
>                                engine.name())
>          return engine
>
> +    def supportedwireengines(self, role, onlyavailable=False):
> +        """Obtain compression engines that support the wire protocol.
> +
> +        Returns a list of engines in prioritized order, most desired first.
> +
> +        If ``onlyavailable`` is set, filter out engines that can't be

I'd rather be explicit here, so I suggest:

s/set/true (the default)/

> +        loaded.
> +        """
> +        assert role in ('client', 'server')

Can I interest you in having module-level constants for ROLE_CLIENT
and ROLE_SERVER so we don't get these strings passed around like magic
everywhere?


> +
> +        engines = [self._engines[e] for e in self._wiretypes.values()]
> +        if onlyavailable:
> +            engines = [e for e in engines if e.available()]
> +
> +        idx = 1 if role == 'server' else 2
> +
> +        return list(sorted(engines, key=lambda e: e.wireprotosupport()[idx],
> +                           reverse=True))
> +
> +    def forwiretype(self, wiretype):
> +        engine = self._engines[self._wiretypes[wiretype]]
> +        if not engine.available():
> +            raise error.Abort(_('compression engine %s could not be loaded') %
> +                              engine.name())
> +        return engine
> +
>  compengines = compressormanager()
>
>  class compressionengine(object):
> @@ -3083,6 +3121,30 @@ class compressionengine(object):
>          """
>          return None
>
> +    def wireprotosupport(self):
> +        """Declare support for this compression format on the wire protocol.
> +
> +        If this compression engine isn't supported for compressing wire
> +        protocol payloads, returns None.
> +
> +        Otherwise, returns a 3-tuple of the following elements:
> +
> +        * 2 byte format identifier
> +        * Integer priority for the server
> +        * Integer priority for the client
> +
> +        The integer priorities are used to order the advertisement of format
> +        support by server and client. The highest integer is advertised
> +        first. Integers with non-positive values aren't advertised.
> +
> +        The priority values are somewhat arbitrary and only used for default
> +        ordering. The relative order can be changed via config options.
> +
> +        If wire protocol compression is supported, the class must also implement
> +        ``compressstream`` and ``decompressorreader``.
> +        """
> +        return None
> +
>      def compressstream(self, it, opts=None):
>          """Compress an iterator of chunks.
>
> @@ -3111,6 +3173,9 @@ class _zlibengine(compressionengine):
>      def bundletype(self):
>          return 'gzip', 'GZ'
>
> +    def wireprotosupport(self):
> +        return 'ZL', 20, 20
> +
>      def compressstream(self, it, opts=None):
>          opts = opts or {}
>
> @@ -3141,6 +3206,11 @@ class _bz2engine(compressionengine):
>      def bundletype(self):
>          return 'bzip2', 'BZ'
>
> +    # We declare a protocol name but don't advertise by default because
> +    # it is slow.
> +    def wireprotosupport(self):
> +        return 'BZ', 0, 0

I assume not only do we not advertise it but if a client tries to
demand it we'll refuse?

> +
>      def compressstream(self, it, opts=None):
>          opts = opts or {}
>          z = bz2.BZ2Compressor(opts.get('level', 9))
> @@ -3189,6 +3259,12 @@ class _noopengine(compressionengine):
>      def bundletype(self):
>          return 'none', 'UN'
>
> +    # Clients always support uncompressed payloads. Servers don't because
> +    # unless you are on a fast network, uncompressed payloads can easily
> +    # saturate your network pipe.
> +    def wireprotosupport(self):
> +        return 'UN', 0, 10
> +
>      def compressstream(self, it, opts=None):
>          return it
>
> @@ -3219,6 +3295,9 @@ class _zstdengine(compressionengine):
>      def bundletype(self):
>          return 'zstd', 'ZS'
>
> +    def wireprotosupport(self):
> +        return 'ZS', 50, 50
> +
>      def compressstream(self, it, opts=None):
>          opts = opts or {}
>          # zstd level 3 is almost always significantly faster than zlib



More information about the Mercurial-devel mailing list