New submission from
Christian Heimes
:
The patches changes:
- change PyString's repr() to return "b'...'"
- change PyBytes's repr() to return "buffer(b'...')"
- change parser so that b"..." returns PyString, not PyBytes
- rename bytes -> buffer, str8 -> bytes
The patch breaks some unit test because some of the infrastructure like
the new buffer() isn't in place yet. I'm not happy with bytes_repr() but
the for loop with *p++ was the easiest way to implement it. Every other
implementation I could think of was either too complicated or wouldn't
work (like memcpy).
----------
components: Interpreter Core
files: pep3137.patch
messages: 56280
nosy: tiran
severity: major
status: open
title: PEP 3137 patch (repr, names, parser)
versions: Python 3.0
__________________________________
Tracker <[EMAIL PROTECTED]>
<http://bugs.python.org/issue1247>
__________________________________
Index: Python/compile.c
===================================================================
--- Python/compile.c (revision 58324)
+++ Python/compile.c (working copy)
@@ -787,8 +787,6 @@
return 1-oparg;
case BUILD_MAP:
return 1;
- case MAKE_BYTES:
- return 0;
case LOAD_ATTR:
return 0;
case COMPARE_OP:
@@ -3222,7 +3220,6 @@
break;
case Bytes_kind:
ADDOP_O(c, LOAD_CONST, e->v.Bytes.s, consts);
- ADDOP(c, MAKE_BYTES);
break;
case Ellipsis_kind:
ADDOP_O(c, LOAD_CONST, Py_Ellipsis, consts);
Index: Objects/bytesobject.c
===================================================================
--- Objects/bytesobject.c (revision 58324)
+++ Objects/bytesobject.c (working copy)
@@ -889,9 +889,12 @@
bytes_repr(PyBytesObject *self)
{
static const char *hexdigits = "0123456789abcdef";
- size_t newsize = 3 + 4 * Py_Size(self);
+ static const char *quote_prefix = "buffer(b'";
+ static const char *quote_postfix = "')";
+ /* 9 prefix + 2 postfix */
+ size_t newsize = 11 + 4 * Py_Size(self);
PyObject *v;
- if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_Size(self)) {
+ if (newsize > PY_SSIZE_T_MAX || (newsize-11) / 4 != Py_Size(self)) {
PyErr_SetString(PyExc_OverflowError,
"bytes object is too large to make repr");
return NULL;
@@ -904,17 +907,17 @@
register Py_ssize_t i;
register Py_UNICODE c;
register Py_UNICODE *p;
- int quote = '\'';
p = PyUnicode_AS_UNICODE(v);
- *p++ = 'b';
- *p++ = quote;
+ for (i=0; i<strlen(quote_prefix); i++) {
+ *p++ = quote_prefix[i];
+ }
for (i = 0; i < Py_Size(self); i++) {
/* There's at least enough room for a hex escape
and a closing quote. */
assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5);
c = self->ob_bytes[i];
- if (c == quote || c == '\\')
+ if (c == '\'' || c == '\\')
*p++ = '\\', *p++ = c;
else if (c == '\t')
*p++ = '\\', *p++ = 't';
@@ -934,7 +937,9 @@
*p++ = c;
}
assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 1);
- *p++ = quote;
+ for (i=0; i<strlen(quote_postfix); i++) {
+ *p++ = quote_postfix[i];
+ }
*p = '\0';
if (PyUnicode_Resize(&v, (p - PyUnicode_AS_UNICODE(v)))) {
Py_DECREF(v);
@@ -2986,7 +2991,7 @@
PyTypeObject PyBytes_Type = {
PyVarObject_HEAD_INIT(&PyType_Type, 0)
- "bytes",
+ "buffer",
sizeof(PyBytesObject),
0,
(destructor)bytes_dealloc, /* tp_dealloc */
Index: Objects/stringobject.c
===================================================================
--- Objects/stringobject.c (revision 58324)
+++ Objects/stringobject.c (working copy)
@@ -772,7 +772,7 @@
Py_ssize_t length = PyString_GET_SIZE(op);
size_t newsize = 3 + 4 * Py_Size(op);
PyObject *v;
- if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_Size(op)) {
+ if (newsize > PY_SSIZE_T_MAX || (newsize-3) / 4 != Py_Size(op)) {
PyErr_SetString(PyExc_OverflowError,
"string is too large to make repr");
}
@@ -803,7 +803,7 @@
;
}
- *p++ = 's', *p++ = quote;
+ *p++ = 'b', *p++ = quote;
for (i = 0; i < Py_Size(op); i++) {
/* There's at least enough room for a hex escape
and a closing quote. */
@@ -875,7 +875,7 @@
if (PyBytes_Check(bb))
return PyBytes_Concat((PyObject *)a, bb);
PyErr_Format(PyExc_TypeError,
- "cannot concatenate 'str8' and '%.200s' objects",
+ "cannot concatenate 'bytes' and '%.200s' objects",
Py_Type(bb)->tp_name);
return NULL;
}
@@ -3091,7 +3091,7 @@
goto onError;
if (!PyBytes_Check(v)) {
PyErr_Format(PyExc_TypeError,
- "[str8] encoder did not return a bytes object "
+ "[bytes] encoder did not return a bytes object "
"(type=%.400s)",
Py_Type(v)->tp_name);
Py_DECREF(v);
@@ -3865,7 +3865,7 @@
PyTypeObject PyString_Type = {
PyVarObject_HEAD_INIT(&PyType_Type, 0)
- "str8",
+ "bytes",
sizeof(PyStringObject),
sizeof(char),
string_dealloc, /* tp_dealloc */
Index: Lib/pickletools.py
===================================================================
--- Lib/pickletools.py (revision 58324)
+++ Lib/pickletools.py (working copy)
@@ -272,7 +272,7 @@
>>> read_stringnl(io.BytesIO(b"\n"))
Traceback (most recent call last):
...
- ValueError: no string quotes around b''
+ ValueError: no string quotes around buffer(b'')
>>> read_stringnl(io.BytesIO(b"\n"), stripquotes=False)
''
@@ -493,7 +493,7 @@
>>> read_decimalnl_short(io.BytesIO(b"1234L\n56"))
Traceback (most recent call last):
...
- ValueError: trailing 'L' not allowed in b'1234L'
+ ValueError: trailing 'L' not allowed in buffer(b'1234L')
"""
s = read_stringnl(f, decode=False, stripquotes=False)
Index: Lib/test/test_unicode.py
===================================================================
--- Lib/test/test_unicode.py (revision 58324)
+++ Lib/test/test_unicode.py (working copy)
@@ -600,7 +600,7 @@
self.assertEqual("%s, %s, %i, %f, %5.2f" % ("abc", "abc", -1, -2, 3.57), 'abc, abc, -1, -2.000000, 3.57')
self.assertEqual("%s, %s, %i, %f, %5.2f" % ("abc", "abc", -1, -2, 1003.57), 'abc, abc, -1, -2.000000, 1003.57')
if not sys.platform.startswith('java'):
- self.assertEqual("%r, %r" % (b"abc", "abc"), "b'abc', 'abc'")
+ self.assertEqual("%r, %r" % (b"abc", "abc"), "buffer(b'abc'), 'abc'")
self.assertEqual("%(x)s, %(y)s" % {'x':"abc", 'y':"def"}, 'abc, def')
self.assertEqual("%(x)s, %(\xfc)s" % {'x':"abc", '\xfc':"def"}, 'abc, def')
Index: Lib/test/test_codeccallbacks.py
===================================================================
--- Lib/test/test_codeccallbacks.py (revision 58324)
+++ Lib/test/test_codeccallbacks.py (working copy)
@@ -140,17 +140,17 @@
sin += chr(sys.maxunicode)
sout = b"a\\xac\\u1234\\u20ac\\u8000"
if sys.maxunicode > 0xffff:
- sout += bytes("\\U%08x" % sys.maxunicode)
+ sout += bytes("\\U%08x" % sys.maxunicode, "ascii")
self.assertEqual(sin.encode("ascii", "backslashreplace"), sout)
sout = b"a\xac\\u1234\\u20ac\\u8000"
if sys.maxunicode > 0xffff:
- sout += bytes("\\U%08x" % sys.maxunicode)
+ sout += bytes("\\U%08x" % sys.maxunicode, "latin-1")
self.assertEqual(sin.encode("latin-1", "backslashreplace"), sout)
sout = b"a\xac\\u1234\xa4\\u8000"
if sys.maxunicode > 0xffff:
- sout += bytes("\\U%08x" % sys.maxunicode)
+ sout += bytes("\\U%08x" % sys.maxunicode, "iso-8859-15")
self.assertEqual(sin.encode("iso-8859-15", "backslashreplace"), sout)
def test_decoderelaxedutf8(self):
Index: Lib/test/test_bytes.py
===================================================================
--- Lib/test/test_bytes.py (revision 58324)
+++ Lib/test/test_bytes.py (working copy)
@@ -71,12 +71,12 @@
self.assertRaises(ValueError, bytes, [10**100])
def test_repr(self):
- self.assertEqual(repr(bytes()), "b''")
- self.assertEqual(repr(bytes([0])), "b'\\x00'")
+ self.assertEqual(repr(bytes()), "buffer(b'')")
+ self.assertEqual(repr(bytes([0])), "buffer(b'\\x00')")
self.assertEqual(repr(bytes([0, 1, 254, 255])),
- "b'\\x00\\x01\\xfe\\xff'")
- self.assertEqual(repr(b"abc"), "b'abc'")
- self.assertEqual(repr(b"'"), "b'\\''")
+ "buffer(b'\\x00\\x01\\xfe\\xff')")
+ self.assertEqual(repr(b"abc"), "buffer(b'abc')")
+ self.assertEqual(repr(b"'"), "buffer(b'\\'')")
def test_compare(self):
b1 = bytes([1, 2, 3])
Index: Lib/pickle.py
===================================================================
--- Lib/pickle.py (revision 58324)
+++ Lib/pickle.py (working copy)
@@ -1212,19 +1212,19 @@
byte in the LONG1 pickling context.
>>> encode_long(0)
- b''
+ buffer(b'')
>>> encode_long(255)
- b'\xff\x00'
+ buffer(b'\xff\x00')
>>> encode_long(32767)
- b'\xff\x7f'
+ buffer(b'\xff\x7f')
>>> encode_long(-256)
- b'\x00\xff'
+ buffer(b'\x00\xff')
>>> encode_long(-32768)
- b'\x00\x80'
+ buffer(b'\x00\x80')
>>> encode_long(-128)
- b'\x80'
+ buffer(b'\x80')
>>> encode_long(127)
- b'\x7f'
+ buffer(b'\x7f')
>>>
"""
_______________________________________________
Python-bugs-list mailing list
Unsubscribe:
http://mail.python.org/mailman/options/python-bugs-list/archive%40mail-archive.com