Demian Brecht added the comment:

Thanks for the review Martin, I've addressed your comments.

> The length of an encoded Latin-1 string should equal the length of the 
> unencoded text string, since it is a one-to-one character-to-byte encoding.
Once in a while, I want to stop what I'm doing, put my head in my hands
and think to myself "how did that escape me"?! Of course you're right
and thanks for the catch. I've reverted the handling to how it was being
done in the previous patch.

> Though I’m not particularly excited by silently Latin-1 encoding text bodies 
> in the first place.
Truth be told, I'm more fond of only accepting pre-encoded byte strings
as input. However, that backwards incompatible change would likely break
many things. Request bodies can currently be strings, byte strings,
iterables or file objects. In the cases of string and file objects,
encoding is already supported. The change I made makes handling
iterables consistent with the other accepted data types.

I'm not sure why, but the auto-encoding of the raw string input object
was being done higher up in the general use case callstack
(Lib/http/client.py:1064). I've moved this handling to send() for
consistency with the auto-encoding of other input types. This also
ensures consistent behavior between calling request() with a string body
and calling send() directly.

----------
Added file: http://bugs.python.org/file38130/list_content_length_3.patch

_______________________________________
Python tracker <rep...@bugs.python.org>
<http://bugs.python.org/issue23350>
_______________________________________
diff -r e548ab4ce71d Doc/library/http.client.rst
--- a/Doc/library/http.client.rst       Mon Feb 09 19:49:00 2015 +0000
+++ b/Doc/library/http.client.rst       Fri Feb 13 07:45:43 2015 -0800
@@ -212,8 +212,10 @@
    contents of the file is sent; this file object should support ``fileno()``
    and ``read()`` methods. The header Content-Length is automatically set to
    the length of the file as reported by stat. The *body* argument may also be
-   an iterable and Content-Length header should be explicitly provided when the
-   body is an iterable.
+   an iterable. If the iterable is a tuple or list, the Content-Length will
+   automatically be set if not already supplied in the request headers.
+   In all other iterable cases, the Content-Length header should be explicitly
+   provided.
 
    The *headers* argument should be a mapping of extra HTTP
    headers to send with the request.
@@ -221,6 +223,10 @@
    .. versionadded:: 3.2
       *body* can now be an iterable.
 
+   .. versionadded:: 3.5
+      The Content-Length header will be set when *body* is a list or tuple.
+
+
 .. method:: HTTPConnection.getresponse()
 
    Should be called after a request is sent to get the response from the 
server.
diff -r e548ab4ce71d Lib/http/client.py
--- a/Lib/http/client.py        Mon Feb 09 19:49:00 2015 +0000
+++ b/Lib/http/client.py        Fri Feb 13 07:45:43 2015 -0800
@@ -836,11 +836,19 @@
                     datablock = datablock.encode("iso-8859-1")
                 self.sock.sendall(datablock)
             return
+
+        if isinstance(data, str):
+            # RFC 2616 Section 3.7.1 says that text default has a
+            # default charset of iso-8859-1.
+            data = data.encode('iso-8859-1')
+
         try:
             self.sock.sendall(data)
         except TypeError:
             if isinstance(data, collections.Iterable):
                 for d in data:
+                    if hasattr(d, 'encode'):
+                        d = d.encode('iso-8859-1')
                     self.sock.sendall(d)
             else:
                 raise TypeError("data should be a bytes-like object "
@@ -1031,20 +1039,25 @@
 
     def _set_content_length(self, body):
         # Set the content-length based on the body.
-        thelen = None
-        try:
-            thelen = str(len(body))
-        except TypeError as te:
-            # If this is a file-like object, try to
-            # fstat its file descriptor
+        size = None
+        if isinstance(body, (list, tuple)):
+            # the body will either be already encoded or will be latin-1
+            # encoded when being sent. as latin-1 and ascii strings are of
+            # equal size, there isn't a need to make a distinction here.
+            size = sum(len(line) for line in body)
+        else:
             try:
-                thelen = str(os.fstat(body.fileno()).st_size)
-            except (AttributeError, OSError):
-                # Don't send a length if this failed
-                if self.debuglevel > 0: print("Cannot stat!!")
+                size = len(body)
+            except TypeError:
+                try:
+                    size = os.fstat(body.fileno()).st_size
+                except (AttributeError, OSError):
+                    if self.debuglevel > 0:
+                        print("Cannot stat!!")
+                    size = None
 
-        if thelen is not None:
-            self.putheader('Content-Length', thelen)
+        if size is not None:
+            self.putheader('Content-Length', size)
 
     def _send_request(self, method, url, body, headers):
         # Honor explicitly requested Host: and Accept-Encoding: headers.
@@ -1061,10 +1074,6 @@
             self._set_content_length(body)
         for hdr, value in headers.items():
             self.putheader(hdr, value)
-        if isinstance(body, str):
-            # RFC 2616 Section 3.7.1 says that text default has a
-            # default charset of iso-8859-1.
-            body = body.encode('iso-8859-1')
         self.endheaders(body)
 
     def getresponse(self):
diff -r e548ab4ce71d Lib/test/test_httplib.py
--- a/Lib/test/test_httplib.py  Mon Feb 09 19:49:00 2015 +0000
+++ b/Lib/test/test_httplib.py  Fri Feb 13 07:45:43 2015 -0800
@@ -1161,7 +1161,6 @@
     def setUp(self):
         self.conn = client.HTTPConnection('example.com')
         self.conn.sock = self.sock = FakeSocket("")
-        self.conn.sock = self.sock
 
     def get_headers_and_fp(self):
         f = io.BytesIO(self.sock.data)
@@ -1226,6 +1225,25 @@
             self.assertEqual("5", message.get("content-length"))
             self.assertEqual(b'body\xc1', f.read())
 
+    def test_list_body(self):
+        cases = (
+            ([b'foo', b'bar'], b'foobar'),
+            ((b'foo', b'bar'), b'foobar'),
+            ((b'foo', 'bar'), b'foobar'),
+            ([b'foo', 'bar'], b'foobar'),
+        )
+        for body, expected in cases:
+            with self.subTest(body):
+                self.conn = client.HTTPConnection('example.com')
+                self.conn.sock = self.sock = FakeSocket('')
+
+                self.conn.request('PUT', '/url', body)
+                msg, f = self.get_headers_and_fp()
+                self.assertNotIn('Content-Type', msg)
+                self.assertIsNone(msg.get_charset())
+                self.assertEqual(len(expected), int(msg.get('content-length')))
+                self.assertEqual(expected, f.read())
+
 
 class HTTPResponseTest(TestCase):
 
_______________________________________________
Python-bugs-list mailing list
Unsubscribe: 
https://mail.python.org/mailman/options/python-bugs-list/archive%40mail-archive.com

Reply via email to