[PATCH 11 of 12] hgweb: use chunked encoding in responses

Mads Kiilerich mads at kiilerich.com
Fri Jan 11 23:32:55 UTC 2013


# HG changeset patch
# User Mads Kiilerich <madski at unity3d.com>
# Date 1357947109 -3600
# Node ID d4104616205bc3d9fa665a6cae85bc702774987c
# Parent  d085fef69932009da56e66d46b75da5c885cf91a
hgweb: use chunked encoding in responses

hgweb closed the established http connections when the content length was
unknown. That caused extra connections to be created and thus some extra
connection overhead, extra ssl warnings and extra authentication steps.

By using 'Transfer-Encoding: chunked' when streaming a response with
unknown length it is possible to keep connections alive.

Responses where the content body (and thus its length) is unknown when sending
the header will use chunked encoding. The iterator corresponding to this
request must pass through .processiter() to make sure it gets the necessary
chunkencoding.

The changed hash in test-clone-cgi.t is caused by the chunked headers and
encoding.

diff --git a/mercurial/hgweb/hgweb_mod.py b/mercurial/hgweb/hgweb_mod.py
--- a/mercurial/hgweb/hgweb_mod.py
+++ b/mercurial/hgweb/hgweb_mod.py
@@ -113,7 +113,7 @@
 
     def __call__(self, env, respond):
         req = wsgirequest(env, respond)
-        return self.run_wsgi(req)
+        return req.processiter(self.run_wsgi(req))
 
     def run_wsgi(self, req):
 
diff --git a/mercurial/hgweb/hgwebdir_mod.py b/mercurial/hgweb/hgwebdir_mod.py
--- a/mercurial/hgweb/hgwebdir_mod.py
+++ b/mercurial/hgweb/hgwebdir_mod.py
@@ -144,7 +144,7 @@
 
     def __call__(self, env, respond):
         req = wsgirequest(env, respond)
-        return self.run_wsgi(req)
+        return req.processiter(self.run_wsgi(req))
 
     def read_allowed(self, ui, req):
         """Check allow_read and deny_read config options of a repo's ui object
diff --git a/mercurial/hgweb/request.py b/mercurial/hgweb/request.py
--- a/mercurial/hgweb/request.py
+++ b/mercurial/hgweb/request.py
@@ -39,6 +39,13 @@
         form[k] = [i.strip() for i in v]
     return form
 
+def chunker(l):
+    """perform chunk encoding of strings in iter"""
+    for s in l:
+        if s:
+            yield '%x\r\n%s\r\n' % (len(s), s)
+    yield '0\r\n\r\n'
+
 class wsgirequest(object):
     def __init__(self, wsgienv, start_response):
         version = wsgienv['wsgi.version']
@@ -57,6 +64,7 @@
         self._start_response = start_response
         self.server_write = None
         self.headers = []
+        self._chunked = None
 
     def __iter__(self):
         return iter([])
@@ -79,7 +87,10 @@
                             .replace('\\', '\\\\').replace('"', '\\"'))
                 self.headers.append(('Content-Disposition',
                                      'inline; filename="%s"' % filename))
-            if body is not None:
+            self._chunked = body is None
+            if self._chunked:
+                self.headers.append(('Transfer-Encoding', 'chunked'))
+            else:
                 self.headers.append(('Content-Length', str(len(body))))
 
             for k, v in self.headers:
@@ -110,6 +121,8 @@
 
     def write(self, thing):
         if thing:
+            if self._chunked:
+                thing = '%x\r\n%s\r\n' % (len(thing), thing)
             try:
                 self.server_write(thing)
             except socket.error, inst:
@@ -126,6 +139,14 @@
     def close(self):
         return None
 
+    def processiter(self, l):
+        """process a WSGI-like response iterable. All responses must be passed
+        through this to ensure they are encoded correctly.
+        """
+        if self._chunked:
+            return chunker(l)
+        return l
+
 def wsgiapplication(app_maker):
     '''For compatibility with old CGI scripts. A plain hgweb() or hgwebdir()
     can and should now be used as a WSGI application.'''
diff --git a/tests/test-clone-cgi.t b/tests/test-clone-cgi.t
--- a/tests/test-clone-cgi.t
+++ b/tests/test-clone-cgi.t
@@ -28,4 +28,4 @@
   $ QUERY_STRING="cmd=changegroup&roots=0000000000000000000000000000000000000000"; export QUERY_STRING
   $ python hgweb.cgi >page1 2>&1
   $ python "$TESTDIR/md5sum.py" page1
-  1f424bb22ec05c3c6bc866b6e67efe43  page1
+  9a14c537fc7ede9a85f95978a0ddb8fc  page1
diff --git a/tests/test-hgweb-no-path-info.t b/tests/test-hgweb-no-path-info.t
--- a/tests/test-hgweb-no-path-info.t
+++ b/tests/test-hgweb-no-path-info.t
@@ -64,8 +64,9 @@
   ---- STATUS
   200 Script output follows
   ---- HEADERS
-  [('Content-Type', 'application/atom+xml; charset=ascii')]
+  [('Content-Type', 'application/atom+xml; charset=ascii'), ('Transfer-Encoding', 'chunked')]
   ---- DATA
+  34a\r (esc)
   <?xml version="1.0" encoding="ascii"?>
   <feed xmlns="http://www.w3.org/2005/Atom">
    <!-- Changelog -->
@@ -93,16 +94,23 @@
    </entry>
   
   </feed>
+  \r (esc)
+  0\r (esc)
+  \r (esc)
   ---- ERRORS
   
   ---- STATUS
   200 Script output follows
   ---- HEADERS
-  [('Content-Type', 'text/plain; charset=ascii')]
+  [('Content-Type', 'text/plain; charset=ascii'), ('Transfer-Encoding', 'chunked')]
   ---- DATA
+  8\r (esc)
   
   repo/
   
+  \r (esc)
+  0\r (esc)
+  \r (esc)
   ---- ERRORS
   
 
diff --git a/tests/test-hgweb-no-request-uri.t b/tests/test-hgweb-no-request-uri.t
--- a/tests/test-hgweb-no-request-uri.t
+++ b/tests/test-hgweb-no-request-uri.t
@@ -76,8 +76,9 @@
   ---- STATUS
   200 Script output follows
   ---- HEADERS
-  [('Content-Type', 'application/atom+xml; charset=ascii')]
+  [('Content-Type', 'application/atom+xml; charset=ascii'), ('Transfer-Encoding', 'chunked')]
   ---- DATA
+  34a\r (esc)
   <?xml version="1.0" encoding="ascii"?>
   <feed xmlns="http://www.w3.org/2005/Atom">
    <!-- Changelog -->
@@ -105,38 +106,53 @@
    </entry>
   
   </feed>
+  \r (esc)
+  0\r (esc)
+  \r (esc)
   ---- ERRORS
   
   ---- STATUS
   200 Script output follows
   ---- HEADERS
-  [('Content-Type', 'text/plain; charset=ascii')]
+  [('Content-Type', 'text/plain; charset=ascii'), ('Transfer-Encoding', 'chunked')]
   ---- DATA
+  14\r (esc)
   
   -rw-r--r-- 4 bar
   
   
+  \r (esc)
+  0\r (esc)
+  \r (esc)
   ---- ERRORS
   
   ---- STATUS
   200 Script output follows
   ---- HEADERS
-  [('Content-Type', 'text/plain; charset=ascii')]
+  [('Content-Type', 'text/plain; charset=ascii'), ('Transfer-Encoding', 'chunked')]
   ---- DATA
+  9\r (esc)
   
   /repo/
   
+  \r (esc)
+  0\r (esc)
+  \r (esc)
   ---- ERRORS
   
   ---- STATUS
   200 Script output follows
   ---- HEADERS
-  [('Content-Type', 'text/plain; charset=ascii')]
+  [('Content-Type', 'text/plain; charset=ascii'), ('Transfer-Encoding', 'chunked')]
   ---- DATA
+  14\r (esc)
   
   -rw-r--r-- 4 bar
   
   
+  \r (esc)
+  0\r (esc)
+  \r (esc)
   ---- ERRORS
   
 
diff --git a/tests/test-hgweb-non-interactive.t b/tests/test-hgweb-non-interactive.t
--- a/tests/test-hgweb-non-interactive.t
+++ b/tests/test-hgweb-non-interactive.t
@@ -70,7 +70,7 @@
   ---- STATUS
   200 Script output follows
   ---- HEADERS
-  [('Content-Type', 'text/html; charset=ascii')]
+  [('Content-Type', 'text/html; charset=ascii'), ('Transfer-Encoding', 'chunked')]
   ---- DATA
   ---- ERRORS
   



More information about the Mercurial-devel mailing list