[PATCH 3 of 4 STABLE] i18n: use "encoding.lower()" to normalize specified string for revset

FUJIWARA Katsunori foozy at lares.dti.ne.jp
Sun Dec 25 11:38:09 UTC 2011


# HG changeset patch
# User FUJIWARA Katsunori <foozy at lares.dti.ne.jp>
# Date 1324812916 -32400
# Branch stable
# Node ID 89cbee80b33f7fbc065b21d7f6d664b2e9669cda
# Parent  baccb82b6bc3ea34bd3ce8de939bb177bc8a4927
i18n: use "encoding.lower()" to normalize specified string for revset

some problematic encoding (e.g.: cp932) uses ASCII alphabet characters
in byte sequence of multi byte characters.

"str.lower()" on such byte sequence may treat distinct characters as
same one, and cause unexpected log matching.

this patch uses "encoding.lower()" instead of "str.lower()" to
normalize strings for compare.

diff -r baccb82b6bc3 -r 89cbee80b33f mercurial/revset.py
--- a/mercurial/revset.py	Sun Dec 25 20:35:16 2011 +0900
+++ b/mercurial/revset.py	Sun Dec 25 20:35:16 2011 +0900
@@ -11,6 +11,7 @@
 import bookmarks as bookmarksmod
 import match as matchmod
 from i18n import _
+import encoding
 
 elements = {
     "(": (20, ("group", 1, ")"), ("func", 1, ")")),
@@ -233,8 +234,8 @@
     Alias for ``user(string)``.
     """
     # i18n: "author" is a keyword
-    n = getstring(x, _("author requires a string")).lower()
-    return [r for r in subset if n in repo[r].user().lower()]
+    n = encoding.lower(getstring(x, _("author requires a string")))
+    return [r for r in subset if n in encoding.lower(repo[r].user())]
 
 def bisect(repo, subset, x):
     """``bisect(string)``
@@ -376,11 +377,11 @@
     Search commit message for string. The match is case-insensitive.
     """
     # i18n: "desc" is a keyword
-    ds = getstring(x, _("desc requires a string")).lower()
+    ds = encoding.lower(getstring(x, _("desc requires a string")))
     l = []
     for r in subset:
         c = repo[r]
-        if ds in c.description().lower():
+        if ds in encoding.lower(c.description()):
             l.append(r)
     return l
 
@@ -522,12 +523,12 @@
     string. The match is case-insensitive.
     """
     # i18n: "keyword" is a keyword
-    kw = getstring(x, _("keyword requires a string")).lower()
+    kw = encoding.lower(getstring(x, _("keyword requires a string")))
     l = []
     for r in subset:
         c = repo[r]
         t = " ".join(c.files() + [c.user(), c.description()])
-        if kw in t.lower():
+        if kw in encoding.lower(t):
             l.append(r)
     return l
 
diff -r baccb82b6bc3 -r 89cbee80b33f tests/test-revset.t
--- a/tests/test-revset.t	Sun Dec 25 20:35:16 2011 +0900
+++ b/tests/test-revset.t	Sun Dec 25 20:35:16 2011 +0900
@@ -473,3 +473,61 @@
   $ log 'max(1 or 2) and not 2'
   $ log 'min(1 or 2) and not 1'
   $ log 'last(1 or 2, 1) and not 2'
+
+  $ cd ..
+
+test author/desc/keyword in problematic encoding
+# unicode: cp932:
+# u30A2    0x83 0x41(= 'A')
+# u30C2    0x83 0x61(= 'a')
+
+  $ hg init problematicencoding
+  $ cd problematicencoding
+
+  $ python > setup.sh <<EOF
+  > print u'''
+  > echo a > text
+  > hg add text
+  > hg --encoding utf-8 commit -u '\u30A2' -m none
+  > echo b > text
+  > hg --encoding utf-8 commit -u '\u30C2' -m none
+  > echo c > text
+  > hg --encoding utf-8 commit -u none -m '\u30A2'
+  > echo d > text
+  > hg --encoding utf-8 commit -u none -m '\u30C2'
+  > '''.encode('utf-8')
+  > EOF
+  $ sh < setup.sh
+
+test in problematic encoding
+  $ python > test.sh <<EOF
+  > print u'''
+  > hg --encoding cp932 log --template '{rev}\\n' -r 'author(\u30A2)'
+  > echo ====
+  > hg --encoding cp932 log --template '{rev}\\n' -r 'author(\u30C2)'
+  > echo ====
+  > hg --encoding cp932 log --template '{rev}\\n' -r 'desc(\u30A2)'
+  > echo ====
+  > hg --encoding cp932 log --template '{rev}\\n' -r 'desc(\u30C2)'
+  > echo ====
+  > hg --encoding cp932 log --template '{rev}\\n' -r 'keyword(\u30A2)'
+  > echo ====
+  > hg --encoding cp932 log --template '{rev}\\n' -r 'keyword(\u30C2)'
+  > '''.encode('cp932')
+  > EOF
+  $ sh < test.sh
+  0
+  ====
+  1
+  ====
+  2
+  ====
+  3
+  ====
+  0
+  2
+  ====
+  1
+  3
+
+  $ cd ..



More information about the Mercurial-devel mailing list