D2822: hgweb: support constructing URLs from an alternate base URL
indygreg (Gregory Szorc)
phabricator at mercurial-scm.org
Mon Mar 12 21:38:35 UTC 2018
This revision was automatically updated to reflect the committed changes.
Closed by commit rHG219b23359f4c: hgweb: support constructing URLs from an alternate base URL (authored by indygreg, committed by ).
REPOSITORY
rHG Mercurial
CHANGES SINCE LAST UPDATE
https://phab.mercurial-scm.org/D2822?vs=6888&id=6952
REVISION DETAIL
https://phab.mercurial-scm.org/D2822
AFFECTED FILES
mercurial/hgweb/hgwebdir_mod.py
mercurial/hgweb/request.py
tests/test-wsgirequest.py
CHANGE DETAILS
diff --git a/tests/test-wsgirequest.py b/tests/test-wsgirequest.py
--- a/tests/test-wsgirequest.py
+++ b/tests/test-wsgirequest.py
@@ -23,11 +23,12 @@
r'wsgi.run_once': False,
}
-def parse(env, bodyfh=None, reponame=None, extra=None):
+def parse(env, bodyfh=None, reponame=None, altbaseurl=None, extra=None):
env = dict(env)
env.update(extra or {})
- return requestmod.parserequestfromenv(env, bodyfh, reponame=reponame)
+ return requestmod.parserequestfromenv(env, bodyfh, reponame=reponame,
+ altbaseurl=altbaseurl)
class ParseRequestTests(unittest.TestCase):
def testdefault(self):
@@ -242,6 +243,174 @@
self.assertEqual(r.dispatchpath, b'path1/path2')
self.assertEqual(r.reponame, b'prefix/repo')
+ def testaltbaseurl(self):
+ # Simple hostname remap.
+ r = parse(DEFAULT_ENV, altbaseurl='http://altserver')
+
+ self.assertEqual(r.url, b'http://testserver')
+ self.assertEqual(r.baseurl, b'http://testserver')
+ self.assertEqual(r.advertisedurl, b'http://altserver')
+ self.assertEqual(r.advertisedbaseurl, b'http://altserver')
+ self.assertEqual(r.urlscheme, b'http')
+ self.assertEqual(r.apppath, b'')
+ self.assertEqual(r.dispatchparts, [])
+ self.assertIsNone(r.dispatchpath)
+ self.assertIsNone(r.reponame)
+
+ # With a custom port.
+ r = parse(DEFAULT_ENV, altbaseurl='http://altserver:8000')
+ self.assertEqual(r.url, b'http://testserver')
+ self.assertEqual(r.baseurl, b'http://testserver')
+ self.assertEqual(r.advertisedurl, b'http://altserver:8000')
+ self.assertEqual(r.advertisedbaseurl, b'http://altserver:8000')
+ self.assertEqual(r.urlscheme, b'http')
+ self.assertEqual(r.apppath, b'')
+ self.assertEqual(r.dispatchparts, [])
+ self.assertIsNone(r.dispatchpath)
+ self.assertIsNone(r.reponame)
+
+ # With a changed protocol.
+ r = parse(DEFAULT_ENV, altbaseurl='https://altserver')
+ self.assertEqual(r.url, b'http://testserver')
+ self.assertEqual(r.baseurl, b'http://testserver')
+ self.assertEqual(r.advertisedurl, b'https://altserver')
+ self.assertEqual(r.advertisedbaseurl, b'https://altserver')
+ # URL scheme is defined as the actual scheme, not advertised.
+ self.assertEqual(r.urlscheme, b'http')
+ self.assertEqual(r.apppath, b'')
+ self.assertEqual(r.dispatchparts, [])
+ self.assertIsNone(r.dispatchpath)
+ self.assertIsNone(r.reponame)
+
+ # Need to specify explicit port number for proper https:// alt URLs.
+ r = parse(DEFAULT_ENV, altbaseurl='https://altserver:443')
+ self.assertEqual(r.url, b'http://testserver')
+ self.assertEqual(r.baseurl, b'http://testserver')
+ self.assertEqual(r.advertisedurl, b'https://altserver')
+ self.assertEqual(r.advertisedbaseurl, b'https://altserver')
+ self.assertEqual(r.urlscheme, b'http')
+ self.assertEqual(r.apppath, b'')
+ self.assertEqual(r.dispatchparts, [])
+ self.assertIsNone(r.dispatchpath)
+ self.assertIsNone(r.reponame)
+
+ # With only PATH_INFO defined.
+ r = parse(DEFAULT_ENV, altbaseurl='http://altserver', extra={
+ r'PATH_INFO': r'/path1/path2',
+ })
+ self.assertEqual(r.url, b'http://testserver/path1/path2')
+ self.assertEqual(r.baseurl, b'http://testserver')
+ self.assertEqual(r.advertisedurl, b'http://altserver/path1/path2')
+ self.assertEqual(r.advertisedbaseurl, b'http://altserver')
+ self.assertEqual(r.urlscheme, b'http')
+ self.assertEqual(r.apppath, b'')
+ self.assertEqual(r.dispatchparts, [b'path1', b'path2'])
+ self.assertEqual(r.dispatchpath, b'path1/path2')
+ self.assertIsNone(r.reponame)
+
+ # Path on alt URL.
+ r = parse(DEFAULT_ENV, altbaseurl='http://altserver/altpath')
+ self.assertEqual(r.url, b'http://testserver')
+ self.assertEqual(r.baseurl, b'http://testserver')
+ self.assertEqual(r.advertisedurl, b'http://altserver/altpath')
+ self.assertEqual(r.advertisedbaseurl, b'http://altserver')
+ self.assertEqual(r.urlscheme, b'http')
+ self.assertEqual(r.apppath, b'/altpath')
+ self.assertEqual(r.dispatchparts, [])
+ self.assertIsNone(r.dispatchpath)
+ self.assertIsNone(r.reponame)
+
+ # With a trailing slash.
+ r = parse(DEFAULT_ENV, altbaseurl='http://altserver/altpath/')
+ self.assertEqual(r.url, b'http://testserver')
+ self.assertEqual(r.baseurl, b'http://testserver')
+ self.assertEqual(r.advertisedurl, b'http://altserver/altpath/')
+ self.assertEqual(r.advertisedbaseurl, b'http://altserver')
+ self.assertEqual(r.urlscheme, b'http')
+ self.assertEqual(r.apppath, b'/altpath/')
+ self.assertEqual(r.dispatchparts, [])
+ self.assertIsNone(r.dispatchpath)
+ self.assertIsNone(r.reponame)
+
+ # PATH_INFO + path on alt URL.
+ r = parse(DEFAULT_ENV, altbaseurl='http://altserver/altpath', extra={
+ r'PATH_INFO': r'/path1/path2',
+ })
+ self.assertEqual(r.url, b'http://testserver/path1/path2')
+ self.assertEqual(r.baseurl, b'http://testserver')
+ self.assertEqual(r.advertisedurl,
+ b'http://altserver/altpath/path1/path2')
+ self.assertEqual(r.advertisedbaseurl, b'http://altserver')
+ self.assertEqual(r.urlscheme, b'http')
+ self.assertEqual(r.apppath, b'/altpath')
+ self.assertEqual(r.dispatchparts, [b'path1', b'path2'])
+ self.assertEqual(r.dispatchpath, b'path1/path2')
+ self.assertIsNone(r.reponame)
+
+ # PATH_INFO + path on alt URL with trailing slash.
+ r = parse(DEFAULT_ENV, altbaseurl='http://altserver/altpath/', extra={
+ r'PATH_INFO': r'/path1/path2',
+ })
+ self.assertEqual(r.url, b'http://testserver/path1/path2')
+ self.assertEqual(r.baseurl, b'http://testserver')
+ self.assertEqual(r.advertisedurl,
+ b'http://altserver/altpath//path1/path2')
+ self.assertEqual(r.advertisedbaseurl, b'http://altserver')
+ self.assertEqual(r.urlscheme, b'http')
+ self.assertEqual(r.apppath, b'/altpath/')
+ self.assertEqual(r.dispatchparts, [b'path1', b'path2'])
+ self.assertEqual(r.dispatchpath, b'path1/path2')
+ self.assertIsNone(r.reponame)
+
+ # Local SCRIPT_NAME is ignored.
+ r = parse(DEFAULT_ENV, altbaseurl='http://altserver', extra={
+ r'SCRIPT_NAME': r'/script',
+ r'PATH_INFO': r'/path1/path2',
+ })
+ self.assertEqual(r.url, b'http://testserver/script/path1/path2')
+ self.assertEqual(r.baseurl, b'http://testserver')
+ self.assertEqual(r.advertisedurl, b'http://altserver/path1/path2')
+ self.assertEqual(r.advertisedbaseurl, b'http://altserver')
+ self.assertEqual(r.urlscheme, b'http')
+ self.assertEqual(r.apppath, b'')
+ self.assertEqual(r.dispatchparts, [b'path1', b'path2'])
+ self.assertEqual(r.dispatchpath, b'path1/path2')
+ self.assertIsNone(r.reponame)
+
+ # Use remote's path for script name, app path
+ r = parse(DEFAULT_ENV, altbaseurl='http://altserver/altroot', extra={
+ r'SCRIPT_NAME': r'/script',
+ r'PATH_INFO': r'/path1/path2',
+ })
+ self.assertEqual(r.url, b'http://testserver/script/path1/path2')
+ self.assertEqual(r.baseurl, b'http://testserver')
+ self.assertEqual(r.advertisedurl,
+ b'http://altserver/altroot/path1/path2')
+ self.assertEqual(r.advertisedbaseurl, b'http://altserver')
+ self.assertEqual(r.urlscheme, b'http')
+ self.assertEqual(r.apppath, b'/altroot')
+ self.assertEqual(r.dispatchparts, [b'path1', b'path2'])
+ self.assertEqual(r.dispatchpath, b'path1/path2')
+ self.assertIsNone(r.reponame)
+
+ # reponame is factored in properly.
+ r = parse(DEFAULT_ENV, reponame=b'repo',
+ altbaseurl='http://altserver/altroot',
+ extra={
+ r'SCRIPT_NAME': r'/script',
+ r'PATH_INFO': r'/repo/path1/path2',
+ })
+
+ self.assertEqual(r.url, b'http://testserver/script/repo/path1/path2')
+ self.assertEqual(r.baseurl, b'http://testserver')
+ self.assertEqual(r.advertisedurl,
+ b'http://altserver/altroot/repo/path1/path2')
+ self.assertEqual(r.advertisedbaseurl, b'http://altserver')
+ self.assertEqual(r.apppath, b'/altroot/repo')
+ self.assertEqual(r.dispatchparts, [b'path1', b'path2'])
+ self.assertEqual(r.dispatchpath, b'path1/path2')
+ self.assertEqual(r.reponame, b'repo')
+
if __name__ == '__main__':
import silenttestrunner
silenttestrunner.main(__name__)
diff --git a/mercurial/hgweb/request.py b/mercurial/hgweb/request.py
--- a/mercurial/hgweb/request.py
+++ b/mercurial/hgweb/request.py
@@ -157,7 +157,7 @@
# Request body input stream.
bodyfh = attr.ib()
-def parserequestfromenv(env, bodyfh, reponame=None):
+def parserequestfromenv(env, bodyfh, reponame=None, altbaseurl=None):
"""Parse URL components from environment variables.
WSGI defines request attributes via environment variables. This function
@@ -167,8 +167,18 @@
string are effectively shifted from ``PATH_INFO`` to ``SCRIPT_NAME``.
This simulates the world view of a WSGI application that processes
requests from the base URL of a repo.
+
+ If ``altbaseurl`` (typically comes from ``web.baseurl`` config option)
+ is defined, it is used - instead of the WSGI environment variables - for
+ constructing URL components up to and including the WSGI application path.
+ For example, if the current WSGI application is at ``/repo`` and a request
+ is made to ``/rev/@`` with this argument set to
+ ``http://myserver:9000/prefix``, the URL and path components will resolve as
+ if the request were to ``http://myserver:9000/prefix/rev/@``. In other
+ words, ``wsgi.url_scheme``, ``SERVER_NAME``, ``SERVER_PORT``, and
+ ``SCRIPT_NAME`` are all effectively replaced by components from this URL.
"""
- # PEP-0333 defines the WSGI spec and is a useful reference for this code.
+ # PEP 3333 defines the WSGI spec and is a useful reference for this code.
# We first validate that the incoming object conforms with the WSGI spec.
# We only want to be dealing with spec-conforming WSGI implementations.
@@ -184,38 +194,67 @@
env = {k: v.encode('latin-1') if isinstance(v, str) else v
for k, v in env.iteritems()}
+ if altbaseurl:
+ altbaseurl = util.url(altbaseurl)
+
# https://www.python.org/dev/peps/pep-0333/#environ-variables defines
# the environment variables.
# https://www.python.org/dev/peps/pep-0333/#url-reconstruction defines
# how URLs are reconstructed.
fullurl = env['wsgi.url_scheme'] + '://'
- advertisedfullurl = fullurl
+
+ if altbaseurl and altbaseurl.scheme:
+ advertisedfullurl = altbaseurl.scheme + '://'
+ else:
+ advertisedfullurl = fullurl
- def addport(s):
- if env['wsgi.url_scheme'] == 'https':
- if env['SERVER_PORT'] != '443':
- s += ':' + env['SERVER_PORT']
+ def addport(s, port):
+ if s.startswith('https://'):
+ if port != '443':
+ s += ':' + port
else:
- if env['SERVER_PORT'] != '80':
- s += ':' + env['SERVER_PORT']
+ if port != '80':
+ s += ':' + port
return s
if env.get('HTTP_HOST'):
fullurl += env['HTTP_HOST']
else:
fullurl += env['SERVER_NAME']
- fullurl = addport(fullurl)
+ fullurl = addport(fullurl, env['SERVER_PORT'])
+
+ if altbaseurl and altbaseurl.host:
+ advertisedfullurl += altbaseurl.host
- advertisedfullurl += env['SERVER_NAME']
- advertisedfullurl = addport(advertisedfullurl)
+ if altbaseurl.port:
+ port = altbaseurl.port
+ elif altbaseurl.scheme == 'http' and not altbaseurl.port:
+ port = '80'
+ elif altbaseurl.scheme == 'https' and not altbaseurl.port:
+ port = '443'
+ else:
+ port = env['SERVER_PORT']
+
+ advertisedfullurl = addport(advertisedfullurl, port)
+ else:
+ advertisedfullurl += env['SERVER_NAME']
+ advertisedfullurl = addport(advertisedfullurl, env['SERVER_PORT'])
baseurl = fullurl
advertisedbaseurl = advertisedfullurl
fullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
- advertisedfullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
fullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
+
+ if altbaseurl:
+ path = altbaseurl.path or ''
+ if path and not path.startswith('/'):
+ path = '/' + path
+ advertisedfullurl += util.urlreq.quote(path)
+ else:
+ advertisedfullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
+
advertisedfullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
if env.get('QUERY_STRING'):
@@ -226,7 +265,12 @@
# that represents the repository being dispatched to. When computing
# the dispatch info, we ignore these leading path components.
- apppath = env.get('SCRIPT_NAME', '')
+ if altbaseurl:
+ apppath = altbaseurl.path or ''
+ if apppath and not apppath.startswith('/'):
+ apppath = '/' + apppath
+ else:
+ apppath = env.get('SCRIPT_NAME', '')
if reponame:
repoprefix = '/' + reponame.strip('/')
@@ -545,7 +589,7 @@
instantiate instances of this class, which provides higher-level APIs
for obtaining request parameters, writing HTTP output, etc.
"""
- def __init__(self, wsgienv, start_response):
+ def __init__(self, wsgienv, start_response, altbaseurl=None):
version = wsgienv[r'wsgi.version']
if (version < (1, 0)) or (version >= (2, 0)):
raise RuntimeError("Unknown and unsupported WSGI version %d.%d"
@@ -563,7 +607,7 @@
self.multiprocess = wsgienv[r'wsgi.multiprocess']
self.run_once = wsgienv[r'wsgi.run_once']
self.env = wsgienv
- self.req = parserequestfromenv(wsgienv, inp)
+ self.req = parserequestfromenv(wsgienv, inp, altbaseurl=altbaseurl)
self.res = wsgiresponse(self.req, start_response)
self._start_response = start_response
self.server_write = None
diff --git a/mercurial/hgweb/hgwebdir_mod.py b/mercurial/hgweb/hgwebdir_mod.py
--- a/mercurial/hgweb/hgwebdir_mod.py
+++ b/mercurial/hgweb/hgwebdir_mod.py
@@ -33,7 +33,6 @@
error,
hg,
profiling,
- pycompat,
scmutil,
templater,
ui as uimod,
@@ -83,33 +82,6 @@
yield (prefix + '/' +
util.pconvert(path[len(roothead):]).lstrip('/')).strip('/'), path
-def geturlcgivars(baseurl, port):
- """
- Extract CGI variables from baseurl
-
- >>> geturlcgivars(b"http://host.org/base", b"80")
- ('host.org', '80', '/base')
- >>> geturlcgivars(b"http://host.org:8000/base", b"80")
- ('host.org', '8000', '/base')
- >>> geturlcgivars(b'/base', 8000)
- ('', '8000', '/base')
- >>> geturlcgivars(b"base", b'8000')
- ('', '8000', '/base')
- >>> geturlcgivars(b"http://host", b'8000')
- ('host', '8000', '/')
- >>> geturlcgivars(b"http://host/", b'8000')
- ('host', '8000', '/')
- """
- u = util.url(baseurl)
- name = u.host or ''
- if u.port:
- port = u.port
- path = u.path or ""
- if not path.startswith('/'):
- path = '/' + path
-
- return name, pycompat.bytestr(port), path
-
def readallowed(ui, req):
"""Check allow_read and deny_read config options of a repo's ui object
to determine user permissions. By default, with neither option set (or
@@ -359,7 +331,6 @@
self.stripecount = self.ui.config('web', 'stripes')
if self.stripecount:
self.stripecount = int(self.stripecount)
- self._baseurl = self.ui.config('web', 'baseurl')
prefix = self.ui.config('web', 'prefix')
if prefix.startswith('/'):
prefix = prefix[1:]
@@ -376,7 +347,8 @@
wsgicgi.launch(self)
def __call__(self, env, respond):
- wsgireq = requestmod.wsgirequest(env, respond)
+ baseurl = self.ui.config('web', 'baseurl')
+ wsgireq = requestmod.wsgirequest(env, respond, altbaseurl=baseurl)
return self.run_wsgi(wsgireq)
def run_wsgi(self, wsgireq):
@@ -455,7 +427,8 @@
# Re-parse the WSGI environment to take into account our
# repository path component.
wsgireq.req = requestmod.parserequestfromenv(
- wsgireq.env, wsgireq.req.bodyfh, reponame=virtualrepo)
+ wsgireq.env, wsgireq.req.bodyfh, reponame=virtualrepo,
+ altbaseurl=self.ui.config('web', 'baseurl'))
try:
# ensure caller gets private copy of ui
repo = hg.repository(self.ui.copy(), real)
@@ -502,7 +475,6 @@
for column in sortable]
self.refresh()
- self.updatereqenv(wsgireq.env)
entries = indexentries(self.ui, self.repos, wsgireq, req,
self.stripecount, sortcolumn=sortcolumn,
@@ -524,8 +496,6 @@
def config(section, name, default=uimod._unset, untrusted=True):
return self.ui.config(section, name, default, untrusted)
- self.updatereqenv(wsgireq.env)
-
url = wsgireq.env.get('SCRIPT_NAME', '')
if not url.endswith('/'):
url += '/'
@@ -557,10 +527,3 @@
}
tmpl = templater.templater.frommapfile(mapfile, defaults=defaults)
return tmpl
-
- def updatereqenv(self, env):
- if self._baseurl is not None:
- name, port, path = geturlcgivars(self._baseurl, env['SERVER_PORT'])
- env['SERVER_NAME'] = name
- env['SERVER_PORT'] = port
- env['SCRIPT_NAME'] = path
To: indygreg, #hg-reviewers, durin42
Cc: mercurial-devel
More information about the Mercurial-devel
mailing list