[issue1247] PEP 3137 patch (repr, names, parser)

Christian Heimes Mon, 08 Oct 2007 09:39:38 -0700

New submission from 
                                        Christian Heimes
                                :


The patches changes:

- change PyString's repr() to return "b'...'"
- change PyBytes's repr() to return "buffer(b'...')"
- change parser so that b"..." returns PyString, not PyBytes
- rename bytes -> buffer, str8 -> bytes

The patch breaks some unit test because some of the infrastructure like
the new buffer() isn't in place yet. I'm not happy with bytes_repr() but
the for loop with *p++ was the easiest way to implement it. Every other
implementation I could think of was either too complicated or wouldn't
work (like memcpy).

----------
components: Interpreter Core
files: pep3137.patch
messages: 56280
nosy: tiran
severity: major
status: open
title: PEP 3137 patch (repr, names, parser)
versions: Python 3.0

__________________________________
Tracker <[EMAIL PROTECTED]>
<http://bugs.python.org/issue1247>
__________________________________

Index: Python/compile.c
===================================================================
--- Python/compile.c	(revision 58324)
+++ Python/compile.c	(working copy)
@@ -787,8 +787,6 @@
 			return 1-oparg;
 		case BUILD_MAP:
 			return 1;
-		case MAKE_BYTES:
-			return 0;
 		case LOAD_ATTR:
 			return 0;
 		case COMPARE_OP:
@@ -3222,7 +3220,6 @@
 		break;
 	case Bytes_kind:
 		ADDOP_O(c, LOAD_CONST, e->v.Bytes.s, consts);
-		ADDOP(c, MAKE_BYTES);
 		break;
 	case Ellipsis_kind:
 		ADDOP_O(c, LOAD_CONST, Py_Ellipsis, consts);
Index: Objects/bytesobject.c
===================================================================
--- Objects/bytesobject.c	(revision 58324)
+++ Objects/bytesobject.c	(working copy)
@@ -889,9 +889,12 @@
 bytes_repr(PyBytesObject *self)
 {
     static const char *hexdigits = "0123456789abcdef";
-    size_t newsize = 3 + 4 * Py_Size(self);
+    static const char *quote_prefix = "buffer(b'";
+    static const char *quote_postfix = "')";
+    /* 9 prefix + 2 postfix */
+    size_t newsize = 11 + 4 * Py_Size(self);
     PyObject *v;
-    if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_Size(self)) {
+    if (newsize > PY_SSIZE_T_MAX || (newsize-11) / 4 != Py_Size(self)) {
         PyErr_SetString(PyExc_OverflowError,
             "bytes object is too large to make repr");
         return NULL;
@@ -904,17 +907,17 @@
         register Py_ssize_t i;
         register Py_UNICODE c;
         register Py_UNICODE *p;
-        int quote = '\'';
 
         p = PyUnicode_AS_UNICODE(v);
-        *p++ = 'b';
-        *p++ = quote;
+        for (i=0; i<strlen(quote_prefix); i++) {
+            *p++ = quote_prefix[i];
+        }
         for (i = 0; i < Py_Size(self); i++) {
             /* There's at least enough room for a hex escape
                and a closing quote. */
             assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5);
             c = self->ob_bytes[i];
-            if (c == quote || c == '\\')
+            if (c == '\'' || c == '\\')
                 *p++ = '\\', *p++ = c;
             else if (c == '\t')
                 *p++ = '\\', *p++ = 't';
@@ -934,7 +937,9 @@
                 *p++ = c;
         }
         assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 1);
-        *p++ = quote;
+        for (i=0; i<strlen(quote_postfix); i++) {
+           *p++ = quote_postfix[i];
+        }
         *p = '\0';
         if (PyUnicode_Resize(&v, (p - PyUnicode_AS_UNICODE(v)))) {
             Py_DECREF(v);
@@ -2986,7 +2991,7 @@
 
 PyTypeObject PyBytes_Type = {
     PyVarObject_HEAD_INIT(&PyType_Type, 0)
-    "bytes",
+    "buffer",
     sizeof(PyBytesObject),
     0,
     (destructor)bytes_dealloc,          /* tp_dealloc */
Index: Objects/stringobject.c
===================================================================
--- Objects/stringobject.c	(revision 58324)
+++ Objects/stringobject.c	(working copy)
@@ -772,7 +772,7 @@
 	Py_ssize_t length = PyString_GET_SIZE(op);
 	size_t newsize = 3 + 4 * Py_Size(op);
 	PyObject *v;
-	if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_Size(op)) {
+	if (newsize > PY_SSIZE_T_MAX || (newsize-3) / 4 != Py_Size(op)) {
 		PyErr_SetString(PyExc_OverflowError,
 			"string is too large to make repr");
 	}
@@ -803,7 +803,7 @@
 			;
 		}
 
-		*p++ = 's', *p++ = quote;
+		*p++ = 'b', *p++ = quote;
 		for (i = 0; i < Py_Size(op); i++) {
 			/* There's at least enough room for a hex escape
 			   and a closing quote. */
@@ -875,7 +875,7 @@
                 if (PyBytes_Check(bb))
 			return PyBytes_Concat((PyObject *)a, bb);
 		PyErr_Format(PyExc_TypeError,
-			     "cannot concatenate 'str8' and '%.200s' objects",
+			     "cannot concatenate 'bytes' and '%.200s' objects",
 			     Py_Type(bb)->tp_name);
 		return NULL;
 	}
@@ -3091,7 +3091,7 @@
         goto onError;
     if (!PyBytes_Check(v)) {
         PyErr_Format(PyExc_TypeError,
-                     "[str8] encoder did not return a bytes object "
+                     "[bytes] encoder did not return a bytes object "
                      "(type=%.400s)",
                      Py_Type(v)->tp_name);
         Py_DECREF(v);
@@ -3865,7 +3865,7 @@
 
 PyTypeObject PyString_Type = {
 	PyVarObject_HEAD_INIT(&PyType_Type, 0)
-	"str8",
+	"bytes",
 	sizeof(PyStringObject),
 	sizeof(char),
  	string_dealloc, 			/* tp_dealloc */
Index: Lib/pickletools.py
===================================================================
--- Lib/pickletools.py	(revision 58324)
+++ Lib/pickletools.py	(working copy)
@@ -272,7 +272,7 @@
     >>> read_stringnl(io.BytesIO(b"\n"))
     Traceback (most recent call last):
     ...
-    ValueError: no string quotes around b''
+    ValueError: no string quotes around buffer(b'')
 
     >>> read_stringnl(io.BytesIO(b"\n"), stripquotes=False)
     ''
@@ -493,7 +493,7 @@
     >>> read_decimalnl_short(io.BytesIO(b"1234L\n56"))
     Traceback (most recent call last):
     ...
-    ValueError: trailing 'L' not allowed in b'1234L'
+    ValueError: trailing 'L' not allowed in buffer(b'1234L')
     """
 
     s = read_stringnl(f, decode=False, stripquotes=False)
Index: Lib/test/test_unicode.py
===================================================================
--- Lib/test/test_unicode.py	(revision 58324)
+++ Lib/test/test_unicode.py	(working copy)
@@ -600,7 +600,7 @@
         self.assertEqual("%s, %s, %i, %f, %5.2f" % ("abc", "abc", -1, -2, 3.57), 'abc, abc, -1, -2.000000,  3.57')
         self.assertEqual("%s, %s, %i, %f, %5.2f" % ("abc", "abc", -1, -2, 1003.57), 'abc, abc, -1, -2.000000, 1003.57')
         if not sys.platform.startswith('java'):
-            self.assertEqual("%r, %r" % (b"abc", "abc"), "b'abc', 'abc'")
+            self.assertEqual("%r, %r" % (b"abc", "abc"), "buffer(b'abc'), 'abc'")
         self.assertEqual("%(x)s, %(y)s" % {'x':"abc", 'y':"def"}, 'abc, def')
         self.assertEqual("%(x)s, %(\xfc)s" % {'x':"abc", '\xfc':"def"}, 'abc, def')
 
Index: Lib/test/test_codeccallbacks.py
===================================================================
--- Lib/test/test_codeccallbacks.py	(revision 58324)
+++ Lib/test/test_codeccallbacks.py	(working copy)
@@ -140,17 +140,17 @@
             sin += chr(sys.maxunicode)
         sout = b"a\\xac\\u1234\\u20ac\\u8000"
         if sys.maxunicode > 0xffff:
-            sout += bytes("\\U%08x" % sys.maxunicode)
+            sout += bytes("\\U%08x" % sys.maxunicode, "ascii")
         self.assertEqual(sin.encode("ascii", "backslashreplace"), sout)
 
         sout = b"a\xac\\u1234\\u20ac\\u8000"
         if sys.maxunicode > 0xffff:
-            sout += bytes("\\U%08x" % sys.maxunicode)
+            sout += bytes("\\U%08x" % sys.maxunicode, "latin-1")
         self.assertEqual(sin.encode("latin-1", "backslashreplace"), sout)
 
         sout = b"a\xac\\u1234\xa4\\u8000"
         if sys.maxunicode > 0xffff:
-            sout += bytes("\\U%08x" % sys.maxunicode)
+            sout += bytes("\\U%08x" % sys.maxunicode, "iso-8859-15")
         self.assertEqual(sin.encode("iso-8859-15", "backslashreplace"), sout)
 
     def test_decoderelaxedutf8(self):
Index: Lib/test/test_bytes.py
===================================================================
--- Lib/test/test_bytes.py	(revision 58324)
+++ Lib/test/test_bytes.py	(working copy)
@@ -71,12 +71,12 @@
         self.assertRaises(ValueError, bytes, [10**100])
 
     def test_repr(self):
-        self.assertEqual(repr(bytes()), "b''")
-        self.assertEqual(repr(bytes([0])), "b'\\x00'")
+        self.assertEqual(repr(bytes()), "buffer(b'')")
+        self.assertEqual(repr(bytes([0])), "buffer(b'\\x00')")
         self.assertEqual(repr(bytes([0, 1, 254, 255])),
-                         "b'\\x00\\x01\\xfe\\xff'")
-        self.assertEqual(repr(b"abc"), "b'abc'")
-        self.assertEqual(repr(b"'"), "b'\\''")
+                         "buffer(b'\\x00\\x01\\xfe\\xff')")
+        self.assertEqual(repr(b"abc"), "buffer(b'abc')")
+        self.assertEqual(repr(b"'"), "buffer(b'\\'')")
 
     def test_compare(self):
         b1 = bytes([1, 2, 3])
Index: Lib/pickle.py
===================================================================
--- Lib/pickle.py	(revision 58324)
+++ Lib/pickle.py	(working copy)
@@ -1212,19 +1212,19 @@
     byte in the LONG1 pickling context.
 
     >>> encode_long(0)
-    b''
+    buffer(b'')
     >>> encode_long(255)
-    b'\xff\x00'
+    buffer(b'\xff\x00')
     >>> encode_long(32767)
-    b'\xff\x7f'
+    buffer(b'\xff\x7f')
     >>> encode_long(-256)
-    b'\x00\xff'
+    buffer(b'\x00\xff')
     >>> encode_long(-32768)
-    b'\x00\x80'
+    buffer(b'\x00\x80')
     >>> encode_long(-128)
-    b'\x80'
+    buffer(b'\x80')
     >>> encode_long(127)
-    b'\x7f'
+    buffer(b'\x7f')
     >>>
     """

_______________________________________________
Python-bugs-list mailing list 
Unsubscribe: 
http://mail.python.org/mailman/options/python-bugs-list/archive%40mail-archive.com

[issue1247] PEP 3137 patch (repr, names, parser)

Reply via email to