Demian Brecht added the comment:
I’ve updated the patch to include the latin-1 charset in legal header values.
It still uses a space as delimiter, but all other comments should now be
addressed.
----------
Added file: http://bugs.python.org/file38158/issue22928_2.patch
_______________________________________
Python tracker <rep...@bugs.python.org>
<http://bugs.python.org/issue22928>
_______________________________________
diff -r d12c7938c4b0 Doc/library/http.client.rst
--- a/Doc/library/http.client.rst Mon Feb 16 13:33:32 2015 +0200
+++ b/Doc/library/http.client.rst Tue Feb 17 07:55:09 2015 -0800
@@ -292,10 +292,10 @@
.. method:: HTTPConnection.putheader(header, argument[, ...])
- Send an :rfc:`822`\ -style header to the server. It sends a line to the
server
- consisting of the header, a colon and a space, and the first argument. If
more
- arguments are given, continuation lines are sent, each consisting of a tab
and
- an argument.
+ Send an :rfc:`7230`\ -style header to the server. It sends a line to the
server
+ consisting of the header, a colon and a space, and the first argument. If
+ more arguments are given, they are appended to the header value, each
+ prepended with a single space.
.. method:: HTTPConnection.endheaders(message_body=None)
diff -r d12c7938c4b0 Lib/http/client.py
--- a/Lib/http/client.py Mon Feb 16 13:33:32 2015 +0200
+++ b/Lib/http/client.py Tue Feb 17 07:55:09 2015 -0800
@@ -71,6 +71,7 @@
import http
import io
import os
+import re
import socket
import collections
from urllib.parse import urlsplit
@@ -87,6 +88,7 @@
_UNKNOWN = 'UNKNOWN'
+
# connection states
_CS_IDLE = 'Idle'
_CS_REQ_STARTED = 'Request-started'
@@ -107,6 +109,36 @@
_MAXLINE = 65536
_MAXHEADERS = 100
+# Header name/value ABNF (http://tools.ietf.org/html/rfc7230#section-3.2)
+#
+# VCHAR = %x21-7E
+# obs-text = %x80-FF
+# header-field = field-name ":" OWS field-value OWS
+# field-name = token
+# field-value = *( field-content / obs-fold )
+# field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ]
+# field-vchar = VCHAR / obs-text
+#
+# obs-fold = CRLF 1*( SP / HTAB )
+# ; obsolete line folding
+# ; see Section 3.2.4
+
+# token = 1*tchar
+#
+# tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*"
+# / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~"
+# / DIGIT / ALPHA
+# ; any VCHAR, except delimiters
+#
+# VCHAR defined in http://tools.ietf.org/html/rfc5234#appendix-B.1
+
+_HEADER_LEGAL_NAME = re.compile(b'^[!#$%&\'*+-.^_`|~a-zA-z0-9]+$')
+# 0x20 (sp) is included in the valid character range for backwards
+# compatibility, where header values with spaces (i.e. auth headers) are passed
+# through to putheader as single values. latin-1 charset is also legal as ASCII
+# is only suggested in RFC 7230
+_HEADER_LEGAL_VALUE = re.compile(b'^\s*[\x20-\x7E\xA0-\xFF]*\s*$')
+
class HTTPMessage(email.message.Message):
# XXX The only usage of this method is in
@@ -1000,13 +1032,35 @@
if hasattr(header, 'encode'):
header = header.encode('ascii')
+
+ if not _HEADER_LEGAL_NAME.match(header):
+ raise ValueError('Invalid header name {!r}'.format(header))
+
values = list(values)
for i, one_value in enumerate(values):
if hasattr(one_value, 'encode'):
- values[i] = one_value.encode('latin-1')
+ encoded_value = one_value.encode('latin-1')
elif isinstance(one_value, int):
- values[i] = str(one_value).encode('ascii')
- value = b'\r\n\t'.join(values)
+ encoded_value = str(one_value).encode('ascii')
+ else:
+ encoded_value = one_value
+
+ # Newly defined header fields SHOULD limit their field values to
+ # US-ASCII octets. A recipient SHOULD treat other octets in field
+ # content (obs-text) as opaque data.
+ if not _HEADER_LEGAL_VALUE.match(encoded_value):
+ raise ValueError(
+ 'Invalid header value {}'.format(encoded_value))
+
+ values[i] = encoded_value
+
+ # http://tools.ietf.org/html/rfc7230#section-3.2.4 states that line
+ # folding is obsolete, unless message/http MIME type is used and rules
+ # are conformed to. otherwise, spaces should be used. it might be a
+ # good idea to put validation for this rule in sometime in the future.
+ # as it currently stands, there's no way to determine the MIME type of
+ # the message at this point.
+ value = b' '.join(values)
header = header + b': ' + value
self._output(header)
diff -r d12c7938c4b0 Lib/test/test_httplib.py
--- a/Lib/test/test_httplib.py Mon Feb 16 13:33:32 2015 +0200
+++ b/Lib/test/test_httplib.py Tue Feb 17 07:55:09 2015 -0800
@@ -171,6 +171,17 @@
conn.putheader('Content-length', 42)
self.assertIn(b'Content-length: 42', conn._buffer)
+ conn.putheader('Foo', ' bar ')
+ self.assertIn(b'Foo: bar ', conn._buffer)
+ conn.putheader('Bar', '\tbaz\t')
+ self.assertIn(b'Bar: \tbaz\t', conn._buffer)
+ conn.putheader('Authorization', 'Bearer mytoken')
+ self.assertIn(b'Authorization: Bearer mytoken', conn._buffer)
+ conn.putheader('IterHeader', 'IterA', 'IterB')
+ self.assertIn(b'IterHeader: IterA IterB', conn._buffer)
+ conn.putheader('LatinHeader', b'\xFF')
+ self.assertIn(b'LatinHeader: \xFF', conn._buffer)
+
def test_ipv6host_header(self):
# Default host header on IPv6 transaction should wrapped by [] if
# its actual IPv6 address
@@ -200,6 +211,22 @@
self.assertEqual(resp.getheader('First'), 'val')
self.assertEqual(resp.getheader('Second'), 'val')
+ def test_invalid_control_characters(self):
+ conn = client.HTTPConnection('example.com')
+ conn.sock = FakeSocket('')
+ conn.putrequest('GET', '/')
+
+ # http://tools.ietf.org/html/rfc7230#section-3.2.4, whitespace is no
+ # longer allowed in header names
+ self.assertRaises(
+ ValueError, conn.putheader, b' InvalidName ', b'foo')
+ self.assertRaises(
+ ValueError, conn.putheader, b'Invalid\x80Name', b'foo')
+ # issue22928
+ self.assertRaises(
+ ValueError, conn.putheader, b'User-agent',
+ 'Mozilla/5.0' + chr(0x0A) + 'Location: header injection')
+
class BasicTest(TestCase):
def test_status_lines(self):
diff -r d12c7938c4b0 Misc/NEWS
--- a/Misc/NEWS Mon Feb 16 13:33:32 2015 +0200
+++ b/Misc/NEWS Tue Feb 17 07:55:09 2015 -0800
@@ -44,6 +44,9 @@
argument which, if set to True, will pass messages to handlers taking handler
levels into account.
+- Issue #22928: HTTPConnection.putheader has been updated to conform to RFC
+ 7230.
+
Build
-----
_______________________________________________
Python-bugs-list mailing list
Unsubscribe:
https://mail.python.org/mailman/options/python-bugs-list/archive%40mail-archive.com