[PATCH 1 of 4] mailutil: module for handling non-ascii chars in mails
Christian Ebert
blacktrash at gmx.net
Wed Mar 5 12:03:10 UTC 2008
# HG changeset patch
# User Christian Ebert <blacktrash at gmx.net>
# Date 1204716940 -3600
# Node ID ccaf1af3177b687af56f9ba56300dc573883ea44
# Parent b193a6e591319726e8ccd0b4322f9b4785f59390
mailutil: module for handling non-ascii chars in mails
- methods to encode headers
- method to create mime text object with proper charset
diff --git a/hgext/mailutil.py b/hgext/mailutil.py
new file mode 100644
--- /dev/null
+++ b/hgext/mailutil.py
@@ -0,0 +1,82 @@
+from mercurial import util
+from mercurial.i18n import _
+import email.Charset, email.Header, email.MIMEText, email.Utils
+
+def isutf(s, cs='utf-8'):
+ '''Checks whether string is valid utf.
+ Charset is optional for ascii probe.'''
+ try:
+ s.decode(cs)
+ return True
+ except UnicodeDecodeError:
+ return False
+
+class converter(email.Charset.Charset):
+ '''
+ Provides methods to encode mails containing non-ascii chars.
+ '''
+ defcharsets = ['iso-8859-1', 'iso-8859-15', 'windows-1252']
+
+ def __init__(self, ui):
+ email.Charset.Charset.__init__(self, util._encoding)
+ self.ui = ui
+ sendcharsets = self.ui.configlist('email', 'sendcharsets',
+ default=self.defcharsets)
+ # remove charsets that are always tried
+ self.sendcharsets = [cs for cs in sendcharsets
+ if cs not in ('ascii', 'us-ascii', 'utf-8')]
+ # ensure utf-8 is last
+ self.sendcharsets.append('utf-8')
+
+ def encode(self, s):
+ '''Encodes string to first charset in sendcharsets,
+ after trying ascii before and resorting finally to utf.'''
+ if isutf(s, 'ascii'):
+ self.output_codec = 'us-ascii'
+ return s
+ for cs in self.sendcharsets:
+ try:
+ self.output_codec = cs
+ return self.convert(s)
+ except UnicodeEncodeError:
+ pass
+ except UnicodeDecodeError:
+ # broken input
+ # or manually set HGENCODING incompatible with locale
+ self.output_codec = 'us-ascii'
+ return s.decode('us-ascii', 'replace')
+ except LookupError:
+ self.ui.warn(_('skipping invalid sendcharset: %s\n') % cs)
+ # last exit
+ self.output_codec = util._encoding
+ return util.tolocal(s)
+
+ def headencode(self, s):
+ '''Returns RFC-2047 compliant header from given string.'''
+ # split into words?
+ s = self.encode(s)
+ return str(email.Header.Header(s, self.output_codec))
+
+ def addressencode(self, address):
+ '''Turn address into RFC-2047 compliant header.'''
+ if not address:
+ return ''
+ name, addr = email.Utils.parseaddr(address)
+ name = self.headencode(name)
+ try:
+ acc, dom = addr.split('@')
+ acc = acc.encode('ascii')
+ dom = dom.encode('idna')
+ addr = '%s@%s' % (acc, dom)
+ except UnicodeDecodeError:
+ raise util.Abort(_('invalid email address: %s') % addr)
+ except ValueError:
+ # too strict for local user names?
+ addr = addr.encode('ascii')
+ return email.Utils.formataddr((name, addr))
+
+ def mimeencode(self, s):
+ '''Creates mime text object, encodes it if needed, and sets
+ charset and transfer-encoding accordingly.'''
+ s = self.encode(s)
+ return email.MIMEText.MIMEText(s, 'plain', self.output_codec)
More information about the Mercurial-devel
mailing list