[PATCH v6] cmdutil: add within-line color diff capacity
Yuya Nishihara
yuya at tcha.org
Sun Dec 3 08:30:04 UTC 2017
On Sat, 02 Dec 2017 21:51:33 +0900, matthieu.laneuville at octobus.net wrote:
> # HG changeset patch
> # User Matthieu Laneuville <matthieu.laneuville at octobus.net>
> # Date 1508944418 -32400
> # Thu Oct 26 00:13:38 2017 +0900
> # Node ID 2cd53502aa3a206eed18d1c13ee294b418fb43c5
> # Parent 3da4bd50103daab5419fc796d0c62470bab6da03
> # Available At https://bitbucket.org/octobus/mercurial-devel/
> # hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 2cd53502aa3a
> # EXP-Topic inline-diff
> cmdutil: add within-line color diff capacity
Looks nicer than the previous patch, but found a couple of bugs.
> diff -r 3da4bd50103d -r 2cd53502aa3a mercurial/patch.py
> --- a/mercurial/patch.py Wed Nov 29 07:57:17 2017 +0530
> +++ b/mercurial/patch.py Thu Oct 26 00:13:38 2017 +0900
> @@ -10,6 +10,7 @@ from __future__ import absolute_import,
>
> import collections
> import copy
> +import difflib
> import email
> import errno
> import hashlib
> @@ -2461,6 +2462,12 @@ def diffhunks(repo, node1=None, node2=No
>
> def difflabel(func, *args, **kw):
> '''yields 2-tuples of (output, label) based on the output of func()'''
> + inlinecolor = False
> + for arg in args:
> + if util.safehasattr(arg, 'ui'):
> + inlinecolor = arg.ui.configbool("experimental", "inline-color-diff")
> + break
This is ugly. Perhaps the worddiff option can be carried by the opts
argument. See patch.difffeatureopts() for details.
> @@ -2477,6 +2484,7 @@ def difflabel(func, *args, **kw):
> head = False
> for chunk in func(*args, **kw):
> lines = chunk.split('\n')
> + matches = [-1 for x in range(len(lines) + 1)]
> for i, line in enumerate(lines):
> if i != 0:
> yield ('\n', '')
> @@ -2504,7 +2512,12 @@ def difflabel(func, *args, **kw):
> if '\t' == token[0]:
> yield (token, 'diff.tab')
> else:
> - yield (token, label)
> + if not inlinecolor:
> + yield (token, label)
> + else:
> + buff, matches = _worddiff(lines, i, matches)
> + for (color, word) in buff:
> + yield (word, color)
> else:
> yield (stripline, label)
> break
> @@ -2513,6 +2526,63 @@ def difflabel(func, *args, **kw):
> if line != stripline:
> yield (line[len(stripline):], 'diff.trailingwhitespace')
>
> +def _worddiff(slist, idx, matches):
> + '''Find match of a given string in current chunk and performs word diff.'''
> + operation = 'inserted' if slist[idx][0] == '+' else 'deleted'
> + bound = matches[-1] # last item in matches stores the id of the last match
> +
> + # inserted lines should only be compared to lines that matched them before
> + if operation == 'inserted':
> + if matches[idx] != -1:
> + return _inlinediff(slist[idx],
> + slist[matches[idx]],
> + operation), matches
> + else:
> + return [('diff.' + operation, slist[idx])], matches
a) Needs to return a stripline, not a line.
> + # deleted lines first need to be matched
> + for i, line in enumerate(slist[bound + 1:-1]):
> + if line == '':
> + continue
> + if line[0] == '+':
> + sim = difflib.SequenceMatcher(None, slist[idx], line).ratio()
> + if sim > 0.7:
> + matches[i + bound + 1] = idx
> + matches[-1] = i + bound + 1
> + return _inlinediff(slist[idx], line, operation), matches
> + return [('diff.' + operation, slist[idx])], matches
b) We need to limit the search space only to the next inserted lines.
Perhaps (a) and (b) can be simply fixed by 2-pass scanning, in which build
a map of matched lines by 1st pass, and apply _inlinediff() by 2nd pass.
lines = chunk.split('\n')
matches = {}
if inlinecolor:
# build {deleted/inserted: inserted/deleted} map by looking @, -, + lines
matches = _findmatches(lines)
for i, line in enumerate(lines):
...
elif i in matches:
for w, l in _inlinediff(token, strip(lines[matches[i]])):
yield (w, l)
else:
yield (stripline, label)
> +def _inlinediff(s1, s2, operation):
> + '''Perform string diff to highlight specific changes.'''
> + operation_skip = '+?' if operation == 'deleted' else '-?'
> + if operation == 'deleted':
> + s2, s1 = s1, s2
> +
> + # buffer required to remove last space, there may be smarter ways to do this
> + buff = []
> +
> + # we never want to higlight the leading +-
> + if operation == 'deleted' and s2.startswith('-'):
> + buff.append(('diff.deleted', '-'))
> + s2 = s2[1:]
> + s1 = s1[1:]
> + elif operation == 'inserted' and s1.startswith('+'):
> + buff.append(('diff.inserted', '+'))
> + s2 = s2[1:]
> + s1 = s1[1:]
> +
> + s = difflib.ndiff(re.split(r'(\W)', s2), re.split(r'(\W)', s1))
^^
Nit: please use br'' for Python 3 compatibility.
> + for line in s:
^^^^
Nit: the variable name "line" is confusing.
> + if line[0] in operation_skip:
> + continue
> + l = 'diff.' + operation + '.highlight'
> + if line[0] in ' ': # unchanged parts
> + l = 'diff.' + operation
> + buff.append((l, line[2:]))
> +
> + buff[-1] = (buff[-1][0], buff[-1][1].strip(' '))
> + return buff
Maybe we should compact contiguous words of the same label, but that can be
addressed by a follow-up patch.
More information about the Mercurial-devel
mailing list