https://github.com/python/cpython/commit/b60557382828a094f9da525cbd71c154bb97378b
commit: b60557382828a094f9da525cbd71c154bb97378b
branch: main
author: Mike Edmunds <[email protected]>
committer: bitdancer <[email protected]>
date: 2026-05-01T14:00:46-04:00
summary:
gh-81074: Allow non-ASCII addr_spec in email.headerregistry.Address (#122477)
The email.headerregistry.Address constructor raised an error if
addr_spec contained a non-ASCII character. (But it fully supports
non-ASCII in the separate username and domain args.) This change
removes the error for a non-ASCII addr_spec, as well as the
Defect that triggered it. In the unicode era non-ascii is not a
defect, though it is an error when an attempt is made to serialize
it to ascii. The serialization issue was handled in #122540.
files:
A Misc/NEWS.d/next/Library/2024-07-30-19-19-33.gh-issue-81074.YAeWNf.rst
M Lib/email/_header_value_parser.py
M Lib/email/errors.py
M Lib/test/test_email/test__header_value_parser.py
M Lib/test/test_email/test_headerregistry.py
diff --git a/Lib/email/_header_value_parser.py
b/Lib/email/_header_value_parser.py
index 26b6e26ae652fa..9873958f5c2790 100644
--- a/Lib/email/_header_value_parser.py
+++ b/Lib/email/_header_value_parser.py
@@ -1503,11 +1503,6 @@ def get_local_part(value):
local_part.defects.append(errors.ObsoleteHeaderDefect(
"local-part is not a dot-atom (contains CFWS)"))
local_part[0] = obs_local_part
- try:
- local_part.value.encode('ascii')
- except UnicodeEncodeError:
- local_part.defects.append(errors.NonASCIILocalPartDefect(
- "local-part contains non-ASCII characters)"))
return local_part, value
def get_obs_local_part(value):
diff --git a/Lib/email/errors.py b/Lib/email/errors.py
index 6bc744bd59c5bb..859307dd85be11 100644
--- a/Lib/email/errors.py
+++ b/Lib/email/errors.py
@@ -109,9 +109,9 @@ class ObsoleteHeaderDefect(HeaderDefect):
"""Header uses syntax declared obsolete by RFC 5322"""
class NonASCIILocalPartDefect(HeaderDefect):
- """local_part contains non-ASCII characters"""
- # This defect only occurs during unicode parsing, not when
- # parsing messages decoded from binary.
+ """Unused. Note: this error is deprecated and may be removed in the
future."""
+ # RFC 6532 permits a non-ASCII local-part. _header_value_parser previously
+ # treated this as a parse-time defect (when parsing Unicode, but not
bytes).
class InvalidDateDefect(HeaderDefect):
"""Header has unparsable or invalid date"""
diff --git a/Lib/test/test_email/test__header_value_parser.py
b/Lib/test/test_email/test__header_value_parser.py
index bc698759614c36..aded44e85ee336 100644
--- a/Lib/test/test_email/test__header_value_parser.py
+++ b/Lib/test/test_email/test__header_value_parser.py
@@ -1235,17 +1235,6 @@ def
test_get_local_part_valid_and_invalid_qp_in_atom_list(self):
'@example.com')
self.assertEqual(local_part.local_part, r'\example\\ example')
- def test_get_local_part_unicode_defect(self):
- # Currently this only happens when parsing unicode, not when parsing
- # stuff that was originally binary.
- local_part = self._test_get_x(parser.get_local_part,
- 'exá[email protected]',
- 'exámple',
- 'exámple',
- [errors.NonASCIILocalPartDefect],
- '@example.com')
- self.assertEqual(local_part.local_part, 'exámple')
-
# get_dtext
def test_get_dtext_only(self):
diff --git a/Lib/test/test_email/test_headerregistry.py
b/Lib/test/test_email/test_headerregistry.py
index 2aaa7d68ca3fe1..aa918255d15c37 100644
--- a/Lib/test/test_email/test_headerregistry.py
+++ b/Lib/test/test_email/test_headerregistry.py
@@ -1543,17 +1543,19 @@ def test_quoting(self):
self.assertEqual(str(a), '"Sara J." <"bad name"@example.com>')
def test_il8n(self):
- a = Address('Éric', 'wok', 'exàmple.com')
+ a = Address('Éric', 'wők', 'exàmple.com')
self.assertEqual(a.display_name, 'Éric')
- self.assertEqual(a.username, 'wok')
+ self.assertEqual(a.username, 'wők')
self.assertEqual(a.domain, 'exàmple.com')
- self.assertEqual(a.addr_spec, 'wok@exàmple.com')
- self.assertEqual(str(a), 'Éric <wok@exàmple.com>')
+ self.assertEqual(a.addr_spec, 'wők@exàmple.com')
+ self.assertEqual(str(a), 'Éric <wők@exàmple.com>')
- # XXX: there is an API design issue that needs to be solved here.
- #def test_non_ascii_username_raises(self):
- # with self.assertRaises(ValueError):
- # Address('foo', 'wők', 'example.com')
+ def test_i18n_in_addr_spec(self):
+ a = Address(addr_spec='wők@exàmple.com')
+ self.assertEqual(a.username, 'wők')
+ self.assertEqual(a.domain, 'exàmple.com')
+ self.assertEqual(a.addr_spec, 'wők@exàmple.com')
+ self.assertEqual(str(a), 'wők@exàmple.com')
def test_crlf_in_constructor_args_raises(self):
cases = (
@@ -1574,10 +1576,6 @@ def test_crlf_in_constructor_args_raises(self):
with self.subTest(kwargs=kwargs),
self.assertRaisesRegex(ValueError, "invalid arguments"):
Address(**kwargs)
- def test_non_ascii_username_in_addr_spec_raises(self):
- with self.assertRaises(ValueError):
- Address('foo', addr_spec='wő[email protected]')
-
def test_address_addr_spec_and_username_raises(self):
with self.assertRaises(TypeError):
Address('foo', username='bing', addr_spec='bar@baz')
diff --git
a/Misc/NEWS.d/next/Library/2024-07-30-19-19-33.gh-issue-81074.YAeWNf.rst
b/Misc/NEWS.d/next/Library/2024-07-30-19-19-33.gh-issue-81074.YAeWNf.rst
new file mode 100644
index 00000000000000..87de4fade14dfb
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-07-30-19-19-33.gh-issue-81074.YAeWNf.rst
@@ -0,0 +1,8 @@
+The :mod:`email` module no longer treats email addresses with non-ASCII
+characters as defects when parsing a Unicode string or in the ``addr_spec``
+parameter to :class:`email.headerregistry.Address`. :rfc:`5322` permits such
+addresses, and they were already supported when parsing bytes and in the
Address
+``username`` parameter.
+
+The (undocumented) :exc:`!email.errors.NonASCIILocalPartDefect` is no longer
+used and should be considered deprecated.
_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]