[PATCH 1 of 8] use UTF-8 to encode/decode log text
Andrey
grooz-work at gorodok.net
Mon Nov 20 17:43:04 UTC 2006
On 20 November 2006 (Mon) 22:55, Matt Mackall wrote:
> This is still going to throw exceptions on existing repos, right?
> That's absolutely not acceptable.
>
> Again, this should go in util and be robust. And don't bother with
> making a CHANGELOG_ENCODING variable, please.
What about this?
Please note that safe_decode function should better use ui.encodings['default']
instead of locale.getpreferredencoding(), but passing an ui object to this
function every time is clearly unacceptable. That is why I suggested to move all
config related stuff from ui into a separate module. :)
# HG changeset patch
# User Andrey <grooz-work at gorodok.net>
# Date 1164043562 -21600
# Node ID 360befe49f4979f6ec8b2988c2884feac3eea2ed
# Parent 1dba5b1038d2c5d9bf494dcf64508c2c5047ef78
added safe_decode function
diff -r 1dba5b1038d2 -r 360befe49f49 mercurial/util.py
--- a/mercurial/util.py Tue Nov 14 10:29:30 2006 +0600
+++ b/mercurial/util.py Mon Nov 20 23:26:02 2006 +0600
@@ -15,7 +15,7 @@ from i18n import gettext as _
from i18n import gettext as _
from demandload import *
demandload(globals(), "cStringIO errno getpass popen2 re shutil sys tempfile")
-demandload(globals(), "os threading time calendar ConfigParser")
+demandload(globals(), "os threading time calendar ConfigParser locale")
# used by parsedate
defaultdateformats = ('%Y-%m-%d %H:%M:%S', '%Y-%m-%d %H:%M',
@@ -1083,3 +1083,13 @@ def drop_scheme(scheme, path):
if path.startswith('//'):
path = path[2:]
return path
+
+def safe_decode(text, encoding):
+ try:
+ return unicode(text, encoding)
+ except UnicodeDecodeError:
+ try:
+ return unicode(text, locale.getpreferredencoding())
+ except UnicodeDecodeError:
+ return unicode(text, 'ISO-8859-1') # can't fail
+
# HG changeset patch
# User Andrey <grooz-work at gorodok.net>
# Date 1164043746 -21600
# Node ID 49ae4f2cfc6fc5c4f54040bc321116dfeff27410
# Parent 360befe49f4979f6ec8b2988c2884feac3eea2ed
use UTF-8 to encode/decode log text
diff -r 360befe49f49 -r 49ae4f2cfc6f mercurial/changelog.py
--- a/mercurial/changelog.py Mon Nov 20 23:26:02 2006 +0600
+++ b/mercurial/changelog.py Mon Nov 20 23:29:06 2006 +0600
@@ -60,6 +60,7 @@ class changelog(revlog):
"""
if not text:
return (nullid, "", (0, 0), [], "", {})
+ text = util.safe_decode(text, 'UTF-8')
last = text.index("\n\n")
desc = text[last + 2:]
l = text[:last].split('\n')
@@ -98,4 +99,4 @@ class changelog(revlog):
list.sort()
l = [hex(manifest), user, parseddate] + list + ["", desc]
text = "\n".join(l)
- return self.addrevision(text, transaction, self.count(), p1, p2)
+ return self.addrevision(text.encode('UTF-8'), transaction, self.count(), p1, p2)
-------------- next part --------------
A non-text attachment was scrubbed...
Name: changelog_utf8.diff
Type: text/x-diff
Size: 1011 bytes
Desc: not available
URL: <http://lists.mercurial-scm.org/pipermail/mercurial-devel/attachments/20061120/d38f42da/attachment.diff>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: safe_decode.diff
Type: text/x-diff
Size: 1220 bytes
Desc: not available
URL: <http://lists.mercurial-scm.org/pipermail/mercurial-devel/attachments/20061120/d38f42da/attachment-0001.diff>
More information about the Mercurial-devel
mailing list