Michele Orrù <maker...@gmail.com> added the comment:
unittest.skip* are decorators, so useless in this case; also, AFAIS
Lib/test/ uses sys.platform.
I would suggest to put a try statement in encodings.mbcs, and raise an
error in case the imported modules imported are not found.
But this is another story.
----------
title: The email package should defer to the codecs module for all aliases ->
The email package should defer to the codecs module for all aliases
Added file: http://bugs.python.org/file22065/issue8898_skip.patch
_______________________________________
Python tracker <rep...@bugs.python.org>
<http://bugs.python.org/issue8898>
_______________________________________
diff -r cc60d0283fad Lib/email/charset.py
--- a/Lib/email/charset.py Fri May 20 16:55:06 2011 +0200
+++ b/Lib/email/charset.py Sun May 22 14:18:05 2011 +0200
@@ -10,6 +10,7 @@
]
from functools import partial
+from codecs import lookup
import email.base64mime
import email.quoprimime
@@ -63,36 +64,6 @@
'utf-8': (SHORTEST, BASE64, 'utf-8'),
}
-# Aliases for other commonly-used names for character sets. Map
-# them to the real ones used in email.
-ALIASES = {
- 'latin_1': 'iso-8859-1',
- 'latin-1': 'iso-8859-1',
- 'latin_2': 'iso-8859-2',
- 'latin-2': 'iso-8859-2',
- 'latin_3': 'iso-8859-3',
- 'latin-3': 'iso-8859-3',
- 'latin_4': 'iso-8859-4',
- 'latin-4': 'iso-8859-4',
- 'latin_5': 'iso-8859-9',
- 'latin-5': 'iso-8859-9',
- 'latin_6': 'iso-8859-10',
- 'latin-6': 'iso-8859-10',
- 'latin_7': 'iso-8859-13',
- 'latin-7': 'iso-8859-13',
- 'latin_8': 'iso-8859-14',
- 'latin-8': 'iso-8859-14',
- 'latin_9': 'iso-8859-15',
- 'latin-9': 'iso-8859-15',
- 'latin_10':'iso-8859-16',
- 'latin-10':'iso-8859-16',
- 'cp949': 'ks_c_5601-1987',
- 'euc_jp': 'euc-jp',
- 'euc_kr': 'euc-kr',
- 'ascii': 'us-ascii',
- }
-
-
# Map charsets to their Unicode codec strings.
CODEC_MAP = {
'gb2312': 'eucgb2312_cn',
@@ -103,6 +74,8 @@
'us-ascii': None,
}
+# Aliases defined by the user
+ALIASES = dict()
# Convenience functions for extending the above mappings
@@ -220,9 +193,12 @@
input_charset = str(input_charset, 'ascii')
except UnicodeError:
raise errors.CharsetError(input_charset)
- input_charset = input_charset.lower()
- # Set the input charset after filtering through the aliases
- self.input_charset = ALIASES.get(input_charset, input_charset)
+ # Set the input charset after filtering through its aliases defined in
+ # codecs library
+ try:
+ self.input_charset = lookup(input_charset).name
+ except LookupError:
+ self.input_charset = ALIASES.get(input_charset, input_charset)
# We can try to guess which encoding and conversion to use by the
# charset_map dictionary. Try that first, but let the user override
# it.
diff -r cc60d0283fad Lib/encodings/aliases.py
--- a/Lib/encodings/aliases.py Fri May 20 16:55:06 2011 +0200
+++ b/Lib/encodings/aliases.py Sun May 22 14:18:05 2011 +0200
@@ -254,7 +254,7 @@
# hp_roman8 codec
'roman8' : 'hp_roman8',
'r8' : 'hp_roman8',
- 'csHPRoman8' : 'hp_roman8',
+ 'cshproman8' : 'hp_roman8',
# hz codec
'hzgb' : 'hz',
@@ -298,6 +298,7 @@
'iso_ir_157' : 'iso8859_10',
'l6' : 'iso8859_10',
'latin6' : 'iso8859_10',
+ 'latin_6' : 'iso8859_10',
# iso8859_11 codec
'thai' : 'iso8859_11',
@@ -308,6 +309,7 @@
'iso_8859_13' : 'iso8859_13',
'l7' : 'iso8859_13',
'latin7' : 'iso8859_13',
+ 'latin_7' : 'iso8859_13',
# iso8859_14 codec
'iso_8859_14' : 'iso8859_14',
@@ -316,11 +318,13 @@
'iso_ir_199' : 'iso8859_14',
'l8' : 'iso8859_14',
'latin8' : 'iso8859_14',
+ 'latin_8' : 'iso8859_14',
# iso8859_15 codec
'iso_8859_15' : 'iso8859_15',
'l9' : 'iso8859_15',
'latin9' : 'iso8859_15',
+ 'latin_9' : 'iso8859_15',
# iso8859_16 codec
'iso_8859_16' : 'iso8859_16',
@@ -328,6 +332,7 @@
'iso_ir_226' : 'iso8859_16',
'l10' : 'iso8859_16',
'latin10' : 'iso8859_16',
+ 'latin_10' : 'iso8859_16',
# iso8859_2 codec
'csisolatin2' : 'iso8859_2',
@@ -336,6 +341,7 @@
'iso_ir_101' : 'iso8859_2',
'l2' : 'iso8859_2',
'latin2' : 'iso8859_2',
+ 'latin_2' : 'iso8859_2',
# iso8859_3 codec
'csisolatin3' : 'iso8859_3',
@@ -344,6 +350,7 @@
'iso_ir_109' : 'iso8859_3',
'l3' : 'iso8859_3',
'latin3' : 'iso8859_3',
+ 'latin_3' : 'iso8859_3',
# iso8859_4 codec
'csisolatin4' : 'iso8859_4',
@@ -352,6 +359,7 @@
'iso_ir_110' : 'iso8859_4',
'l4' : 'iso8859_4',
'latin4' : 'iso8859_4',
+ 'latin_4' : 'iso8859_4',
# iso8859_5 codec
'csisolatincyrillic' : 'iso8859_5',
@@ -393,6 +401,7 @@
'iso_ir_148' : 'iso8859_9',
'l5' : 'iso8859_9',
'latin5' : 'iso8859_9',
+ 'latin_5' : 'iso8859_9',
# johab codec
'cp1361' : 'johab',
@@ -474,9 +483,6 @@
'sjisx0213' : 'shift_jisx0213',
's_jisx0213' : 'shift_jisx0213',
- # tactis codec
- 'tis260' : 'tactis',
-
# tis_620 codec
'tis620' : 'tis_620',
'tis_620_0' : 'tis_620',
diff -r cc60d0283fad Lib/test/test_codeccallbacks.py
--- a/Lib/test/test_codeccallbacks.py Fri May 20 16:55:06 2011 +0200
+++ b/Lib/test/test_codeccallbacks.py Sun May 22 14:18:05 2011 +0200
@@ -1,5 +1,13 @@
-import test.support, unittest
-import sys, codecs, html.entities, unicodedata
+import test.support
+import unittest
+
+from encodings.aliases import aliases
+import codecs
+import unicodedata
+import html.entities
+import importlib
+import sys
+
class PosReturn:
# this can be used for configurable callbacks
@@ -629,7 +637,16 @@
"test.badhandler"
)
- def test_lookup(self):
+ def test_lookup_aliases(self):
+ for alias, module_name in aliases.items():
+ if sys.platform != 'win32' and module_name == 'mbcs':
+ continue
+
+ module = importlib.import_module('encodings.' + module_name)
+ codec_name = module.getregentry().name
+ self.assertEqual(codecs.lookup(alias).name, codec_name)
+
+ def test_lookup_error(self):
self.assertEqual(codecs.strict_errors, codecs.lookup_error("strict"))
self.assertEqual(codecs.ignore_errors, codecs.lookup_error("ignore"))
self.assertEqual(codecs.strict_errors, codecs.lookup_error("strict"))
@@ -664,7 +681,7 @@
self.assertRaises(TypeError, codecs.register_error, 42)
self.assertRaises(TypeError, codecs.register_error, "test.dummy", 42)
- def test_badlookupcall(self):
+ def test_badlookup_errorcall(self):
# enhance coverage of:
# Modules/_codecsmodule.c::lookup_error()
self.assertRaises(TypeError, codecs.lookup_error)
_______________________________________________
Python-bugs-list mailing list
Unsubscribe:
http://mail.python.org/mailman/options/python-bugs-list/archive%40mail-archive.com