D2851: wireproto: define and implement protocol for issuing requests
indygreg (Gregory Szorc)
phabricator at mercurial-scm.org
Thu Mar 15 01:11:29 UTC 2018
indygreg updated this revision to Diff 7047.
REPOSITORY
rHG Mercurial
CHANGES SINCE LAST UPDATE
https://phab.mercurial-scm.org/D2851?vs=7017&id=7047
REVISION DETAIL
https://phab.mercurial-scm.org/D2851
AFFECTED FILES
mercurial/debugcommands.py
mercurial/help/internals/wireprotocol.txt
mercurial/wireprotoframing.py
mercurial/wireprotoserver.py
tests/test-http-api-httpv2.t
CHANGE DETAILS
diff --git a/tests/test-http-api-httpv2.t b/tests/test-http-api-httpv2.t
--- a/tests/test-http-api-httpv2.t
+++ b/tests/test-http-api-httpv2.t
@@ -1,5 +1,5 @@
$ HTTPV2=exp-http-v2-0001
- $ MEDIATYPE=application/mercurial-tbd
+ $ MEDIATYPE=application/mercurial-exp-framing-0001
$ send() {
> hg --verbose debugwireproto --peer raw http://$LOCALIP:$HGPORT/
@@ -105,9 +105,9 @@
s> Server: testing stub value\r\n
s> Date: $HTTP_DATE$\r\n
s> Content-Type: text/plain\r\n
- s> Content-Length: 72\r\n
+ s> Content-Length: 85\r\n
s> \r\n
- s> client MUST specify Accept header with value: application/mercurial-tbd\n
+ s> client MUST specify Accept header with value: application/mercurial-exp-framing-0001\n
Bad Accept header results in 406
@@ -128,9 +128,9 @@
s> Server: testing stub value\r\n
s> Date: $HTTP_DATE$\r\n
s> Content-Type: text/plain\r\n
- s> Content-Length: 72\r\n
+ s> Content-Length: 85\r\n
s> \r\n
- s> client MUST specify Accept header with value: application/mercurial-tbd\n
+ s> client MUST specify Accept header with value: application/mercurial-exp-framing-0001\n
Bad Content-Type header results in 415
@@ -143,7 +143,7 @@
using raw connection to peer
s> POST /api/exp-http-v2-0001/ro/capabilities HTTP/1.1\r\n
s> Accept-Encoding: identity\r\n
- s> accept: application/mercurial-tbd\r\n
+ s> accept: application/mercurial-exp-framing-0001\r\n
s> content-type: badmedia\r\n
s> user-agent: test\r\n
s> host: $LOCALIP:$HGPORT\r\n (glob)
@@ -153,26 +153,29 @@
s> Server: testing stub value\r\n
s> Date: $HTTP_DATE$\r\n
s> Content-Type: text/plain\r\n
- s> Content-Length: 75\r\n
+ s> Content-Length: 88\r\n
s> \r\n
- s> client MUST send Content-Type header with value: application/mercurial-tbd\n
+ s> client MUST send Content-Type header with value: application/mercurial-exp-framing-0001\n
Request to read-only command works out of the box
$ send << EOF
> httprequest POST api/$HTTPV2/ro/capabilities
> accept: $MEDIATYPE
> content-type: $MEDIATYPE
> user-agent: test
+ > frame command-name eos capabilities
> EOF
using raw connection to peer
s> POST /api/exp-http-v2-0001/ro/capabilities HTTP/1.1\r\n
s> Accept-Encoding: identity\r\n
- s> accept: application/mercurial-tbd\r\n
- s> content-type: application/mercurial-tbd\r\n
+ s> accept: application/mercurial-exp-framing-0001\r\n
+ s> content-type: application/mercurial-exp-framing-0001\r\n
s> user-agent: test\r\n
+ s> content-length: 16\r\n
s> host: $LOCALIP:$HGPORT\r\n (glob)
s> \r\n
+ s> \x0c\x00\x00\x11capabilities
s> makefile('rb', None)
s> HTTP/1.1 200 OK\r\n
s> Server: testing stub value\r\n
@@ -268,15 +271,18 @@
> user-agent: test
> accept: $MEDIATYPE
> content-type: $MEDIATYPE
+ > frame command-name eos capabilities
> EOF
using raw connection to peer
s> POST /api/exp-http-v2-0001/rw/capabilities HTTP/1.1\r\n
s> Accept-Encoding: identity\r\n
- s> accept: application/mercurial-tbd\r\n
- s> content-type: application/mercurial-tbd\r\n
+ s> accept: application/mercurial-exp-framing-0001\r\n
+ s> content-type: application/mercurial-exp-framing-0001\r\n
s> user-agent: test\r\n
+ s> content-length: 16\r\n
s> host: $LOCALIP:$HGPORT\r\n (glob)
s> \r\n
+ s> \x0c\x00\x00\x11capabilities
s> makefile('rb', None)
s> HTTP/1.1 200 OK\r\n
s> Server: testing stub value\r\n
@@ -296,7 +302,7 @@
using raw connection to peer
s> POST /api/exp-http-v2-0001/rw/badcommand HTTP/1.1\r\n
s> Accept-Encoding: identity\r\n
- s> accept: application/mercurial-tbd\r\n
+ s> accept: application/mercurial-exp-framing-0001\r\n
s> user-agent: test\r\n
s> host: $LOCALIP:$HGPORT\r\n (glob)
s> \r\n
diff --git a/mercurial/wireprotoserver.py b/mercurial/wireprotoserver.py
--- a/mercurial/wireprotoserver.py
+++ b/mercurial/wireprotoserver.py
@@ -32,7 +32,7 @@
HGTYPE = 'application/mercurial-0.1'
HGTYPE2 = 'application/mercurial-0.2'
HGERRTYPE = 'application/hg-error'
-HTTPV2TYPE = 'application/mercurial-tbd'
+FRAMINGTYPE = b'application/mercurial-exp-framing-0001'
HTTPV2 = wireprototypes.HTTPV2
SSHV1 = wireprototypes.SSHV1
@@ -336,21 +336,21 @@
res.setbodybytes(_('invalid wire protocol command: %s') % command)
return
- if req.headers.get(b'Accept') != HTTPV2TYPE:
+ if req.headers.get(b'Accept') != FRAMINGTYPE:
res.status = b'406 Not Acceptable'
res.headers[b'Content-Type'] = b'text/plain'
res.setbodybytes(_('client MUST specify Accept header with value: %s\n')
- % HTTPV2TYPE)
+ % FRAMINGTYPE)
return
if (b'Content-Type' in req.headers
- and req.headers[b'Content-Type'] != HTTPV2TYPE):
+ and req.headers[b'Content-Type'] != FRAMINGTYPE):
res.status = b'415 Unsupported Media Type'
# TODO we should send a response with appropriate media type,
# since client does Accept it.
res.headers[b'Content-Type'] = b'text/plain'
res.setbodybytes(_('client MUST send Content-Type header with '
- 'value: %s\n') % HTTPV2TYPE)
+ 'value: %s\n') % FRAMINGTYPE)
return
# We don't do anything meaningful yet.
diff --git a/mercurial/wireprotoframing.py b/mercurial/wireprotoframing.py
new file mode 100644
--- /dev/null
+++ b/mercurial/wireprotoframing.py
@@ -0,0 +1,156 @@
+# wireprotoframing.py - unified framing protocol for wire protocol
+#
+# Copyright 2018 Gregory Szorc <gregory.szorc at gmail.com>
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+
+# This file contains functionality to support the unified frame-based wire
+# protocol. For details about the protocol, see
+# `hg help internals.wireprotocol`.
+
+from __future__ import absolute_import
+
+import struct
+
+from . import (
+ util,
+)
+
+FRAME_HEADER_SIZE = 4
+DEFAULT_MAX_FRAME_SIZE = 32768
+
+FRAME_TYPE_COMMAND_NAME = 0x01
+FRAME_TYPE_COMMAND_ARGUMENT = 0x02
+FRAME_TYPE_COMMAND_DATA = 0x03
+
+FRAME_TYPES = {
+ b'command-name': FRAME_TYPE_COMMAND_NAME,
+ b'command-argument': FRAME_TYPE_COMMAND_ARGUMENT,
+ b'command-data': FRAME_TYPE_COMMAND_DATA,
+}
+
+FLAG_COMMAND_NAME_EOS = 0x01
+FLAG_COMMAND_NAME_HAVE_ARGS = 0x02
+FLAG_COMMAND_NAME_HAVE_DATA = 0x04
+
+FLAGS_COMMAND = {
+ b'eos': FLAG_COMMAND_NAME_EOS,
+ b'have-args': FLAG_COMMAND_NAME_HAVE_ARGS,
+ b'have-data': FLAG_COMMAND_NAME_HAVE_DATA,
+}
+
+FLAG_COMMAND_ARGUMENT_CONTINUATION = 0x01
+FLAG_COMMAND_ARGUMENT_EOA = 0x02
+
+FLAGS_COMMAND_ARGUMENT = {
+ b'continuation': FLAG_COMMAND_ARGUMENT_CONTINUATION,
+ b'eoa': FLAG_COMMAND_ARGUMENT_EOA,
+}
+
+FLAG_COMMAND_DATA_CONTINUATION = 0x01
+FLAG_COMMAND_DATA_EOS = 0x02
+
+FLAGS_COMMAND_DATA = {
+ b'continuation': FLAG_COMMAND_DATA_CONTINUATION,
+ b'eos': FLAG_COMMAND_DATA_EOS,
+}
+
+# Maps frame types to their available flags.
+FRAME_TYPE_FLAGS = {
+ FRAME_TYPE_COMMAND_NAME: FLAGS_COMMAND,
+ FRAME_TYPE_COMMAND_ARGUMENT: FLAGS_COMMAND_ARGUMENT,
+ FRAME_TYPE_COMMAND_DATA: FLAGS_COMMAND_DATA,
+}
+
+ARGUMENT_FRAME_HEADER = struct.Struct(r'<HH')
+
+def makeframe(frametype, frameflags, payload):
+ """Assemble a frame into a byte array."""
+ # TODO assert size of payload.
+ frame = bytearray(FRAME_HEADER_SIZE + len(payload))
+
+ l = struct.pack(r'<I', len(payload))
+ frame[0:3] = l[0:3]
+ frame[3] = (frametype << 4) | frameflags
+ frame[4:] = payload
+
+ return frame
+
+def makeframefromhumanstring(s):
+ """Given a string of the form: <type> <flags> <payload>, creates a frame.
+
+ This can be used by user-facing applications and tests for creating
+ frames easily without having to type out a bunch of constants.
+
+ Frame type and flags can be specified by integer or named constant.
+ Flags can be delimited by `|` to bitwise OR them together.
+ """
+ frametype, frameflags, payload = s.split(b' ', 2)
+
+ if frametype in FRAME_TYPES:
+ frametype = FRAME_TYPES[frametype]
+ else:
+ frametype = int(frametype)
+
+ finalflags = 0
+ validflags = FRAME_TYPE_FLAGS[frametype]
+ for flag in frameflags.split(b'|'):
+ if flag in validflags:
+ finalflags |= validflags[flag]
+ else:
+ finalflags |= int(flag)
+
+ payload = util.unescapestr(payload)
+
+ return makeframe(frametype, finalflags, payload)
+
+def createcommandframes(cmd, args, datafh=None):
+ """Create frames necessary to transmit a request to run a command.
+
+ This is a generator of bytearrays. Each item represents a frame
+ ready to be sent over the wire to a peer.
+ """
+ flags = 0
+ if args:
+ flags |= FLAG_COMMAND_NAME_HAVE_ARGS
+ if datafh:
+ flags |= FLAG_COMMAND_NAME_HAVE_DATA
+
+ if not flags:
+ flags |= FLAG_COMMAND_NAME_EOS
+
+ yield makeframe(FRAME_TYPE_COMMAND_NAME, flags, cmd)
+
+ for i, k in enumerate(sorted(args)):
+ v = args[k]
+ last = i == len(args) - 1
+
+ # TODO handle splitting of argument values across frames.
+ payload = bytearray(ARGUMENT_FRAME_HEADER.size + len(k) + len(v))
+ offset = 0
+ ARGUMENT_FRAME_HEADER.pack_into(payload, offset, len(k), len(v))
+ offset += ARGUMENT_FRAME_HEADER.size
+ payload[offset:offset + len(k)] = k
+ offset += len(k)
+ payload[offset:offset + len(v)] = v
+
+ flags = FLAG_COMMAND_ARGUMENT_EOA if last else 0
+ yield makeframe(FRAME_TYPE_COMMAND_ARGUMENT, flags, payload)
+
+ if datafh:
+ while True:
+ data = datafh.read(DEFAULT_MAX_FRAME_SIZE)
+
+ done = False
+ if len(data) == DEFAULT_MAX_FRAME_SIZE:
+ flags = FLAG_COMMAND_DATA_CONTINUATION
+ else:
+ flags = FLAG_COMMAND_DATA_EOS
+ assert datafh.read(1) == b''
+ done = True
+
+ yield makeframe(FRAME_TYPE_COMMAND_DATA, flags, data)
+
+ if done:
+ break
diff --git a/mercurial/help/internals/wireprotocol.txt b/mercurial/help/internals/wireprotocol.txt
--- a/mercurial/help/internals/wireprotocol.txt
+++ b/mercurial/help/internals/wireprotocol.txt
@@ -187,12 +187,15 @@
Requests to unknown commands or URLS result in an HTTP 404.
TODO formally define response type, how error is communicated, etc.
-HTTP request and response bodies use the *TBD Protocol* for media exchange.
+HTTP request and response bodies use the *Unified Frame-Based Protocol*
+(defined below) for media exchange. The entirety of the HTTP message
+body is 0 or more frames as defined by this protocol.
Clients and servers MUST advertise the ``TBD`` media type via the
``Content-Type`` request and response headers. In addition, clients MUST
advertise this media type value in their ``Accept`` request header in all
requests.
+TODO finalize the media type. For now, it is defined in wireprotoserver.py.
Servers receiving requests without an ``Accept`` header SHOULD respond with
an HTTP 406.
@@ -429,7 +432,7 @@
SSH Version 2 Transport
-----------------------
-**Experimental**
+**Experimental and under development**
Version 2 of the SSH transport behaves identically to version 1 of the SSH
transport with the exception of handshake semantics. See above for how
@@ -451,6 +454,164 @@
Following capabilities advertisement, the peers communicate using version
1 of the SSH transport.
+Unified Frame-Based Protocol
+============================
+
+**Experimental and under development**
+
+The *Unified Frame-Based Protocol* is a communications protocol between
+Mercurial peers. The protocol aims to be mostly transport agnostic
+(works similarly on HTTP, SSH, etc).
+
+To operate the protocol, a bi-directional, half-duplex pipe supporting
+ordered sends and receives is required. That is, each peer has one pipe
+for sending data and another for receiving.
+
+The protocol is request-response based: the client issues requests to
+the server, which issues replies to those requests. Server-initiated
+messaging is not supported.
+
+All data is read and written in atomic units called *frames*. These
+are conceptually similar to TCP packets. Higher-level functionality
+is built on the exchange and processing of frames.
+
+Frames begin with a 4 octet header followed by a variable length
+payload::
+
+ +-----------------------------------------------+
+ | Length (24) |
+ +-----------+-----------------------------------+
+ | Type (4) |
+ +-----------+
+ | Flags (4) |
+ +===========+===================================================|
+ | Frame Payload (0...) ...
+ +---------------------------------------------------------------+
+
+The length of the frame payload is expressed as an unsigned 24 bit
+little endian integer. Values larger than 65535 MUST NOT be used unless
+given permission by the server as part of the negotiated capabilities
+during the handshake. The frame header is not part of the advertised
+frame length.
+
+The 4-bit ``Type`` field denotes the type of message being sent.
+
+The 4-bit ``Flags`` field defines special, per-type attributes for
+the frame.
+
+The sections below define the frame types and their behavior.
+
+Command Request (``0x01``)
+--------------------------
+
+This frame contains a request to run a command.
+
+The name of the command to run constitutes the entirety of the frame
+payload.
+
+This frame type MUST ONLY be sent from clients to servers: it is illegal
+for a server to send this frame to a client.
+
+The following flag values are defined for this type:
+
+0x01
+ End of command data. When set, the client will not send any command
+ arguments or additional command data. When set, the command has been
+ fully issued and the server has the full context to process the command.
+ The next frame issued by the client is not part of this command.
+0x02
+ Command argument frames expected. When set, the client will send
+ *Command Argument* frames containing command argument data.
+0x04
+ Command data frames expected. When set, the client will send
+ *Command Data* frames containing a raw stream of data for this
+ command.
+
+The ``0x01`` flag is mutually exclusive with both the ``0x02`` and ``0x04``
+flags.
+
+Command Argument (``0x02``)
+---------------------------
+
+This frame contains a named argument for a command.
+
+The frame type MUST ONLY be sent from clients to servers: it is illegal
+for a server to send this frame to a client.
+
+The payload consists of:
+
+* A 16-bit little endian integer denoting the length of the
+ argument name.
+* A 16-bit little endian integer denoting the length of the
+ argument value.
+* N bytes of ASCII data containing the argument name.
+* N bytes of binary data containing the argument value.
+
+The payload MUST hold the entirety of the 32-bit header and the
+argument name. The argument value MAY span multiple frames. If this
+occurs, the appropriate frame flag should be set to indicate this.
+
+The following flag values are defined for this type:
+
+0x01
+ Argument data continuation. When set, the data for this argument did
+ not fit in a single frame and the next frame will contain additional
+ argument data.
+
+0x02
+ End of arguments data. When set, the client will not send any more
+ command arguments for the command this frame is associated with.
+ The next frame issued by the client will be command data or
+ belong to a separate request.
+
+Command Data (``0x03``)
+-----------------------
+
+This frame contains raw data for a command.
+
+Most commands can be executed by specifying arguments. However,
+arguments have an upper bound to their length. For commands that
+accept data that is beyond this length or whose length isn't known
+when the command is initially sent, they will need to stream
+arbitrary data to the server. This frame type facilitates the sending
+of this data.
+
+The payload of this frame type consists of a stream of raw data to be
+consumed by the command handler on the server. The format of the data
+is command specific.
+
+The following flag values are defined for this type:
+
+0x01
+ Command data continuation. When set, the data for this command
+ continues into a subsequent frame.
+
+0x02
+ End of data. When set, command data has been fully sent to the
+ server. The command has been fully issued and no new data for this
+ command will be sent. The next frame will belong to a new command.
+
+Issuing Commands
+----------------
+
+A client can request that a remote run a command by sending it
+frames defining that command. This logical stream is composed of
+1 ``Command Request`` frame, 0 or more ``Command Argument`` frames,
+and 0 or more ``Command Data`` frames.
+
+Argument frames are the recommended mechanism for transferring fixed
+sets of parameters to a command. Data frames are appropriate for
+transferring variable data. A similar comparison would be to HTTP:
+argument frames are headers and the message body is data frames.
+
+It is recommended for servers to delay the dispatch of a command
+until all argument frames for that command have been received. Servers
+MAY impose limits on the maximum argument size.
+TODO define failure mechanism.
+
+Servers MAY dispatch to commands immediately once argument data
+is available or delay until command data is received in full.
+
Capabilities
============
diff --git a/mercurial/debugcommands.py b/mercurial/debugcommands.py
--- a/mercurial/debugcommands.py
+++ b/mercurial/debugcommands.py
@@ -78,6 +78,7 @@
url as urlmod,
util,
vfs as vfsmod,
+ wireprotoframing,
wireprotoserver,
)
from .utils import dateutil
@@ -2697,6 +2698,12 @@
The content of the file defined as the value to this argument will be
transferred verbatim as the HTTP request body.
+ ``frame <type> <flags> <payload>``
+ Send a unified protocol frame as part of the request body.
+
+ All frames will be collected and sent as the body to the HTTP
+ request.
+
close
-----
@@ -2736,6 +2743,28 @@
---------
``read()`` N bytes from the server's stderr pipe, if available.
+
+ Specifying Unified Frame-Based Protocol Frames
+ ----------------------------------------------
+
+ It is possible to emit a *Unified Frame-Based Protocol* by using special
+ syntax.
+
+ A frame is composed as a type, flags, and payload. These can be parsed
+ from a string of the form ``<type> <flags> <payload>``. That is, 3
+ space-delimited strings.
+
+ ``payload`` is the simplest: it is evaluated as a Python byte string
+ literal.
+
+ ``type`` can be an integer value for the frame type or the string name
+ of the type. The strings are defined in ``wireprotoframing.py``. e.g.
+ ``command-name``.
+
+ ``flags`` is a ``|`` delimited list of flag components. Each component
+ (and there can be just one) can be an integer or a flag name for the
+ specified frame type. Values are resolved to integers and then bitwise
+ OR'd together.
"""
opts = pycompat.byteskwargs(opts)
@@ -2939,6 +2968,7 @@
method, httppath = request[1:]
headers = {}
body = None
+ frames = []
for line in lines:
line = line.lstrip()
m = re.match(b'^([a-zA-Z0-9_-]+): (.*)$', line)
@@ -2949,11 +2979,20 @@
if line.startswith(b'BODYFILE '):
with open(line.split(b' ', 1), 'rb') as fh:
body = fh.read()
+ elif line.startswith(b'frame '):
+ frame = wireprotoframing.makeframefromhumanstring(
+ line[len(b'frame '):])
+
+ frames.append(frame)
else:
raise error.Abort(_('unknown argument to httprequest: %s') %
line)
url = path + httppath
+
+ if frames:
+ body = b''.join(bytes(f) for f in frames)
+
req = urlmod.urlreq.request(pycompat.strurl(url), body, headers)
# urllib.Request insists on using has_data() as a proxy for
To: indygreg, #hg-reviewers
Cc: mercurial-devel
More information about the Mercurial-devel
mailing list