Demian Brecht added the comment:

I’ve updated the patch to include the latin-1 charset in legal header values. 
It still uses a space as delimiter, but all other comments should now be 
addressed.

----------
Added file: http://bugs.python.org/file38158/issue22928_2.patch

_______________________________________
Python tracker <rep...@bugs.python.org>
<http://bugs.python.org/issue22928>
_______________________________________
diff -r d12c7938c4b0 Doc/library/http.client.rst
--- a/Doc/library/http.client.rst       Mon Feb 16 13:33:32 2015 +0200
+++ b/Doc/library/http.client.rst       Tue Feb 17 07:55:09 2015 -0800
@@ -292,10 +292,10 @@
 
 .. method:: HTTPConnection.putheader(header, argument[, ...])
 
-   Send an :rfc:`822`\ -style header to the server.  It sends a line to the 
server
-   consisting of the header, a colon and a space, and the first argument.  If 
more
-   arguments are given, continuation lines are sent, each consisting of a tab 
and
-   an argument.
+   Send an :rfc:`7230`\ -style header to the server.  It sends a line to the 
server
+   consisting of the header, a colon and a space, and the first argument. If
+   more arguments are given, they are appended to the header value, each
+   prepended with a single space.
 
 
 .. method:: HTTPConnection.endheaders(message_body=None)
diff -r d12c7938c4b0 Lib/http/client.py
--- a/Lib/http/client.py        Mon Feb 16 13:33:32 2015 +0200
+++ b/Lib/http/client.py        Tue Feb 17 07:55:09 2015 -0800
@@ -71,6 +71,7 @@
 import http
 import io
 import os
+import re
 import socket
 import collections
 from urllib.parse import urlsplit
@@ -87,6 +88,7 @@
 
 _UNKNOWN = 'UNKNOWN'
 
+
 # connection states
 _CS_IDLE = 'Idle'
 _CS_REQ_STARTED = 'Request-started'
@@ -107,6 +109,36 @@
 _MAXLINE = 65536
 _MAXHEADERS = 100
 
+# Header name/value ABNF (http://tools.ietf.org/html/rfc7230#section-3.2)
+#
+# VCHAR          = %x21-7E
+# obs-text       = %x80-FF
+# header-field   = field-name ":" OWS field-value OWS
+# field-name     = token
+# field-value    = *( field-content / obs-fold )
+# field-content  = field-vchar [ 1*( SP / HTAB ) field-vchar ]
+# field-vchar    = VCHAR / obs-text
+#
+# obs-fold       = CRLF 1*( SP / HTAB )
+#                ; obsolete line folding
+#                ; see Section 3.2.4
+
+# token          = 1*tchar
+#
+# tchar          = "!" / "#" / "$" / "%" / "&" / "'" / "*"
+#                / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~"
+#                / DIGIT / ALPHA
+#                ; any VCHAR, except delimiters
+#
+# VCHAR defined in http://tools.ietf.org/html/rfc5234#appendix-B.1
+
+_HEADER_LEGAL_NAME = re.compile(b'^[!#$%&\'*+-.^_`|~a-zA-z0-9]+$')
+# 0x20 (sp) is included in the valid character range for backwards
+# compatibility, where header values with spaces (i.e. auth headers) are passed
+# through to putheader as single values. latin-1 charset is also legal as ASCII
+# is only suggested in RFC 7230
+_HEADER_LEGAL_VALUE = re.compile(b'^\s*[\x20-\x7E\xA0-\xFF]*\s*$')
+
 
 class HTTPMessage(email.message.Message):
     # XXX The only usage of this method is in
@@ -1000,13 +1032,35 @@
 
         if hasattr(header, 'encode'):
             header = header.encode('ascii')
+
+        if not _HEADER_LEGAL_NAME.match(header):
+            raise ValueError('Invalid header name {!r}'.format(header))
+
         values = list(values)
         for i, one_value in enumerate(values):
             if hasattr(one_value, 'encode'):
-                values[i] = one_value.encode('latin-1')
+                encoded_value = one_value.encode('latin-1')
             elif isinstance(one_value, int):
-                values[i] = str(one_value).encode('ascii')
-        value = b'\r\n\t'.join(values)
+                encoded_value = str(one_value).encode('ascii')
+            else:
+                encoded_value = one_value
+
+            # Newly defined header fields SHOULD limit their field values to
+            # US-ASCII octets. A recipient SHOULD treat other octets in field
+            # content (obs-text) as opaque data.
+            if not _HEADER_LEGAL_VALUE.match(encoded_value):
+                raise ValueError(
+                    'Invalid header value {}'.format(encoded_value))
+
+            values[i] = encoded_value
+
+        # http://tools.ietf.org/html/rfc7230#section-3.2.4 states that line
+        # folding is obsolete, unless message/http MIME type is used and rules
+        # are conformed to. otherwise, spaces should be used. it might be a
+        # good idea to put validation for this rule in sometime in the future.
+        # as it currently stands, there's no way to determine the MIME type of
+        # the message at this point.
+        value = b' '.join(values)
         header = header + b': ' + value
         self._output(header)
 
diff -r d12c7938c4b0 Lib/test/test_httplib.py
--- a/Lib/test/test_httplib.py  Mon Feb 16 13:33:32 2015 +0200
+++ b/Lib/test/test_httplib.py  Tue Feb 17 07:55:09 2015 -0800
@@ -171,6 +171,17 @@
         conn.putheader('Content-length', 42)
         self.assertIn(b'Content-length: 42', conn._buffer)
 
+        conn.putheader('Foo', ' bar ')
+        self.assertIn(b'Foo:  bar ', conn._buffer)
+        conn.putheader('Bar', '\tbaz\t')
+        self.assertIn(b'Bar: \tbaz\t', conn._buffer)
+        conn.putheader('Authorization', 'Bearer mytoken')
+        self.assertIn(b'Authorization: Bearer mytoken', conn._buffer)
+        conn.putheader('IterHeader', 'IterA', 'IterB')
+        self.assertIn(b'IterHeader: IterA IterB', conn._buffer)
+        conn.putheader('LatinHeader', b'\xFF')
+        self.assertIn(b'LatinHeader: \xFF', conn._buffer)
+
     def test_ipv6host_header(self):
         # Default host header on IPv6 transaction should wrapped by [] if
         # its actual IPv6 address
@@ -200,6 +211,22 @@
         self.assertEqual(resp.getheader('First'), 'val')
         self.assertEqual(resp.getheader('Second'), 'val')
 
+    def test_invalid_control_characters(self):
+        conn = client.HTTPConnection('example.com')
+        conn.sock = FakeSocket('')
+        conn.putrequest('GET', '/')
+
+        # http://tools.ietf.org/html/rfc7230#section-3.2.4, whitespace is no
+        # longer allowed in header names
+        self.assertRaises(
+            ValueError, conn.putheader, b' InvalidName ', b'foo')
+        self.assertRaises(
+            ValueError, conn.putheader, b'Invalid\x80Name', b'foo')
+        # issue22928
+        self.assertRaises(
+            ValueError, conn.putheader, b'User-agent',
+            'Mozilla/5.0' + chr(0x0A) + 'Location: header injection')
+
 
 class BasicTest(TestCase):
     def test_status_lines(self):
diff -r d12c7938c4b0 Misc/NEWS
--- a/Misc/NEWS Mon Feb 16 13:33:32 2015 +0200
+++ b/Misc/NEWS Tue Feb 17 07:55:09 2015 -0800
@@ -44,6 +44,9 @@
   argument which, if set to True, will pass messages to handlers taking handler
   levels into account.
 
+- Issue #22928: HTTPConnection.putheader has been updated to conform to RFC
+  7230.
+
 Build
 -----
 
_______________________________________________
Python-bugs-list mailing list
Unsubscribe: 
https://mail.python.org/mailman/options/python-bugs-list/archive%40mail-archive.com

Reply via email to