Unicode support for non-unicode locales
tailgunner at smtp.ru
tailgunner at smtp.ru
Mon Oct 8 10:13:29 UTC 2007
Currently Mercurial lack an ability to convert file names to
Unicode when working
in non-Unicode locales. For example, file names which were added
and committed
in cp-1251 (russian Windows codepage) can't be correctly checked
out in koi8-r
(russian Unix coding) or UTF-8 - they are checked out as cp-1251,
which is wrong.
This patch attempts to fix this by keeping file names in UTF-8
on-disk, and converting
them to local encoding for hg's internal use.
diff --git a/mercurial/changelog.py b/mercurial/changelog.py
--- a/mercurial/changelog.py
+++ b/mercurial/changelog.py
@@ -172,7 +172,7 @@ class changelog(revlog):
extra = self.decode_extra(extra)
if not extra.get('branch'):
extra['branch'] = 'default'
- files = l[3:]
+ files = map(util.tolocal, l[3:])
return (manifest, user, (time, timezone), files, desc,
extra)
def read(self, node):
@@ -193,6 +193,7 @@ class changelog(revlog):
extra = self.encode_extra(extra)
parseddate = "%s %s" % (parseddate, extra)
list.sort()
+ list = map(util.fromlocal, list)
l = [hex(manifest), user, parseddate] + list + ["", desc]
text = "\n".join(l)
return self.addrevision(text, transaction, self.count(), p1,
p2)
diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py
--- a/mercurial/localrepo.py
+++ b/mercurial/localrepo.py
@@ -421,7 +421,7 @@ class localrepository(repo.repository):
def file(self, f):
if f[0] == '/':
f = f[1:]
- return filelog.filelog(self.sopener, f)
+ return filelog.filelog(self.sopener, util.fromlocal(f))
def changectx(self, changeid=None):
return context.changectx(self, changeid)
diff --git a/mercurial/manifest.py b/mercurial/manifest.py
--- a/mercurial/manifest.py
+++ b/mercurial/manifest.py
@@ -40,6 +40,19 @@ class manifest(revlog):
mfdict = manifestdict()
for l in lines.splitlines():
f, n = l.split('\0')
+ def should_recode(s):
+ "True if UTF-8 string s should be recoded to local
charset"
+ # XXX this function could use util._encoding
+ # XXX to avoid calling decode
+ try:
+ s.decode("ascii")
+ except:
+ ret = True # not ASCII, should recode
+ else:
+ ret = False # ASCII, no recoding needed
+ return ret
+ if should_recode(f):
+ f = util.tolocal(f)
if len(n) > 40:
mfdict._flags[f] = n[40:]
mfdict[f] = bin(n[:40])
@@ -103,6 +116,7 @@ class manifest(revlog):
def find(self, node, f):
'''look up entry for a single file efficiently.
return (node, flags) pair if found, (None, None) if not.'''
+ f = util.fromlocal(f)
if self.mapcache and node == self.mapcache[0]:
return self.mapcache[1].get(f),
self.mapcache[1].flags(f)
text = self.revision(node)
@@ -136,6 +150,12 @@ class manifest(revlog):
if '\n' in f or '\r' in f:
raise RevlogError(_("'\\n' and '\\r' disallowed in
filenames"))
+ t = manifestdict()
+ for k in map.keys():
+ fname = util.fromlocal(k)
+ t[fname] = map[k]
+ t.set(fname, map.execf(k), map.linkf(k))
+ map = t
# if we're using the listcache, make sure it is valid and
# parented by the same node we're diffing against
if not (changed and self.listcache and p1 and
self.mapcache[0] == p1):
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.mercurial-scm.org/pipermail/mercurial-devel/attachments/20071008/0479134f/attachment.html>
More information about the Mercurial-devel
mailing list