Demian Brecht added the comment:

> Maybe join them with tabs rather than spaces then, since it was previously 
> "\r\n\t". This way it is even closer to before.

After thinking about this a little more, I think I'd prefer to keep
spaces rather than tabs. The reason being is that, in my mind, now that
continuations have been made obsolete it's more natural to do something
like this:

putheader('Authorization', 'Bearer', 'my_token')

than

putheader('Authorization', 'Bearer my_token')

I realize it's a semantic change from previous behavior, but it seems to
me to be preferable given the latest RFCs. I'd think that at some point
in the future, we'd want to remove \x20 from the valid header value
range to entirely conform to the spec. This is the first step in
allowing for graceful deprecation.

----------
Added file: http://bugs.python.org/file38154/issue22928_1.patch

_______________________________________
Python tracker <rep...@bugs.python.org>
<http://bugs.python.org/issue22928>
_______________________________________
diff -r d12c7938c4b0 Doc/library/http.client.rst
--- a/Doc/library/http.client.rst       Mon Feb 16 13:33:32 2015 +0200
+++ b/Doc/library/http.client.rst       Mon Feb 16 09:42:20 2015 -0800
@@ -292,10 +292,10 @@
 
 .. method:: HTTPConnection.putheader(header, argument[, ...])
 
-   Send an :rfc:`822`\ -style header to the server.  It sends a line to the 
server
-   consisting of the header, a colon and a space, and the first argument.  If 
more
-   arguments are given, continuation lines are sent, each consisting of a tab 
and
-   an argument.
+   Send an :rfc:`7230`\ -style header to the server.  It sends a line to the 
server
+   consisting of the header, a colon and a space, and the first argument. If
+   more arguments are given, they are appended to the header value, each
+   prepended with a single space.
 
 
 .. method:: HTTPConnection.endheaders(message_body=None)
diff -r d12c7938c4b0 Lib/http/client.py
--- a/Lib/http/client.py        Mon Feb 16 13:33:32 2015 +0200
+++ b/Lib/http/client.py        Mon Feb 16 09:42:20 2015 -0800
@@ -71,6 +71,7 @@
 import http
 import io
 import os
+import re
 import socket
 import collections
 from urllib.parse import urlsplit
@@ -87,6 +88,7 @@
 
 _UNKNOWN = 'UNKNOWN'
 
+
 # connection states
 _CS_IDLE = 'Idle'
 _CS_REQ_STARTED = 'Request-started'
@@ -107,6 +109,35 @@
 _MAXLINE = 65536
 _MAXHEADERS = 100
 
+# Header name/value ABNF (http://tools.ietf.org/html/rfc7230#section-3.2)
+#
+# VCHAR          = %x21-7E
+# obs-text       = %x80-FF
+# header-field   = field-name ":" OWS field-value OWS
+# field-name     = token
+# field-value    = *( field-content / obs-fold )
+# field-content  = field-vchar [ 1*( SP / HTAB ) field-vchar ]
+# field-vchar    = VCHAR / obs-text
+#
+# obs-fold       = CRLF 1*( SP / HTAB )
+#                ; obsolete line folding
+#                ; see Section 3.2.4
+
+# token          = 1*tchar
+#
+# tchar          = "!" / "#" / "$" / "%" / "&" / "'" / "*"
+#                / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~"
+#                / DIGIT / ALPHA
+#                ; any VCHAR, except delimiters
+#
+# VCHAR defined in http://tools.ietf.org/html/rfc5234#appendix-B.1
+
+_HEADER_LEGAL_NAME = re.compile(b'^[!#$%&\'*+-.^_`|~a-zA-z0-9]+$')
+# 0x20 (sp) is included in the valid character range for backwards
+# compatibility, where header values with spaces (i.e. auth headers) are passed
+# through to putheader as single values
+_HEADER_LEGAL_VALUE = re.compile(b'^\s*[\x20-\x7E]*\s*$')
+
 
 class HTTPMessage(email.message.Message):
     # XXX The only usage of this method is in
@@ -1000,13 +1031,32 @@
 
         if hasattr(header, 'encode'):
             header = header.encode('ascii')
+
+        if not _HEADER_LEGAL_NAME.match(header):
+            raise ValueError('Invalid header name {}'.format(header))
+
         values = list(values)
         for i, one_value in enumerate(values):
             if hasattr(one_value, 'encode'):
-                values[i] = one_value.encode('latin-1')
+                encoded_value = one_value.encode('latin-1')
             elif isinstance(one_value, int):
-                values[i] = str(one_value).encode('ascii')
-        value = b'\r\n\t'.join(values)
+                encoded_value = str(one_value).encode('ascii')
+            else:
+                encoded_value = one_value
+
+            if not _HEADER_LEGAL_VALUE.match(encoded_value):
+                raise ValueError(
+                    'Invalid header value {}'.format(encoded_value))
+
+            values[i] = encoded_value
+
+        # http://tools.ietf.org/html/rfc7230#section-3.2.4 states that line
+        # folding is obsolete, unless message/http MIME type is used and rules
+        # are conformed to. otherwise, spaces should be used. it might be a
+        # good idea to put validation for this rule in sometime in the future.
+        # as it currently stands, there's no way to determine the MIME type of
+        # the message at this point.
+        value = b' '.join(values)
         header = header + b': ' + value
         self._output(header)
 
diff -r d12c7938c4b0 Lib/test/test_httplib.py
--- a/Lib/test/test_httplib.py  Mon Feb 16 13:33:32 2015 +0200
+++ b/Lib/test/test_httplib.py  Mon Feb 16 09:42:20 2015 -0800
@@ -171,6 +171,15 @@
         conn.putheader('Content-length', 42)
         self.assertIn(b'Content-length: 42', conn._buffer)
 
+        conn.putheader('Foo', ' bar ')
+        self.assertIn(b'Foo:  bar ', conn._buffer)
+        conn.putheader('Bar', '\tbaz\t')
+        self.assertIn(b'Bar: \tbaz\t', conn._buffer)
+        conn.putheader('Authorization', 'Bearer mytoken')
+        self.assertIn(b'Authorization: Bearer mytoken', conn._buffer)
+        conn.putheader('IterHeader', 'IterA', 'IterB')
+        self.assertIn(b'IterHeader: IterA IterB', conn._buffer)
+
     def test_ipv6host_header(self):
         # Default host header on IPv6 transaction should wrapped by [] if
         # its actual IPv6 address
@@ -200,6 +209,22 @@
         self.assertEqual(resp.getheader('First'), 'val')
         self.assertEqual(resp.getheader('Second'), 'val')
 
+    def test_invalid_control_characters(self):
+        conn = client.HTTPConnection('example.com')
+        conn.sock = FakeSocket('')
+        conn.putrequest('GET', '/')
+
+        # http://tools.ietf.org/html/rfc7230#section-3.2.4, whitespace is no
+        # longer allowed in header names
+        self.assertRaises(
+            ValueError, conn.putheader, b' InvalidName ', b'foo')
+        self.assertRaises(
+            ValueError, conn.putheader, b'Invalid\x80Name', b'foo')
+        # issue22928
+        self.assertRaises(
+            ValueError, conn.putheader, b'User-agent',
+            'Mozilla/5.0' + chr(0x0A) + 'Location: header injection')
+
 
 class BasicTest(TestCase):
     def test_status_lines(self):
diff -r d12c7938c4b0 Misc/NEWS
--- a/Misc/NEWS Mon Feb 16 13:33:32 2015 +0200
+++ b/Misc/NEWS Mon Feb 16 09:42:20 2015 -0800
@@ -44,6 +44,9 @@
   argument which, if set to True, will pass messages to handlers taking handler
   levels into account.
 
+- Issue #22928: HTTPConnection.putheader has been updated to conform to RFC
+  7230.
+
 Build
 -----
 
_______________________________________________
Python-bugs-list mailing list
Unsubscribe: 
https://mail.python.org/mailman/options/python-bugs-list/archive%40mail-archive.com

Reply via email to