New submission from Alexandre Vassalotti:
Here a preliminary patch to make PyString return integers on indexing
and iteration. There is still quite a few XXX in the patch, that I would
like to fix. However, the good thing is all tests passes.
----------
components: Interpreter Core
files: string_iter_ret_ints.patch
messages: 56442
nosy: alexandre.vassalotti
severity: normal
status: open
title: PEP 3137: Make PyString's indexing and iteration return integers
versions: Python 3.0
__________________________________
Tracker <[EMAIL PROTECTED]>
<http://bugs.python.org/issue1280>
__________________________________
Index: Objects/stringobject.c
===================================================================
--- Objects/stringobject.c (revision 58458)
+++ Objects/stringobject.c (working copy)
@@ -1233,23 +1233,13 @@
static PyObject *
string_item(PyStringObject *a, register Py_ssize_t i)
{
- char pchar;
- PyObject *v;
+ if (i < 0)
+ i += Py_Size(a);
if (i < 0 || i >= Py_Size(a)) {
PyErr_SetString(PyExc_IndexError, "string index out of range");
return NULL;
}
- pchar = a->ob_sval[i];
- v = (PyObject *)characters[pchar & UCHAR_MAX];
- if (v == NULL)
- v = PyString_FromStringAndSize(&pchar, 1);
- else {
-#ifdef COUNT_ALLOCS
- one_strings++;
-#endif
- Py_INCREF(v);
- }
- return v;
+ return PyInt_FromLong((unsigned char)a->ob_sval[i]);
}
static PyObject*
@@ -5150,8 +5140,8 @@
assert(PyString_Check(seq));
if (it->it_index < PyString_GET_SIZE(seq)) {
- item = PyString_FromStringAndSize(
- PyString_AS_STRING(seq)+it->it_index, 1);
+ item = PyInt_FromLong(
+ (unsigned char)seq->ob_sval[it->it_index]);
if (item != NULL)
++it->it_index;
return item;
Index: Lib/modulefinder.py
===================================================================
--- Lib/modulefinder.py (revision 58458)
+++ Lib/modulefinder.py (working copy)
@@ -367,7 +367,7 @@
consts = co.co_consts
LOAD_LOAD_AND_IMPORT = LOAD_CONST + LOAD_CONST + IMPORT_NAME
while code:
- c = code[0]
+ c = chr(code[0])
if c in STORE_OPS:
oparg, = unpack('<H', code[1:3])
yield "store", (names[oparg],)
Index: Lib/encodings/__init__.py
===================================================================
--- Lib/encodings/__init__.py (revision 58458)
+++ Lib/encodings/__init__.py (working copy)
@@ -54,7 +54,10 @@
"""
chars = []
punct = False
- for c in encoding:
+ # 'encoding' is a bytes (PyString) object, thus its iterator returns
+ # integers. So, convert 'encoding' to Unicode before iterating over it.
+ # XXX Should _PyCodec_Lookup() pass a Unicode object, instead?
+ for c in str(encoding):
if c.isalnum() or c == '.':
if punct and chars:
chars.append('_')
Index: Lib/test/test_set.py
===================================================================
--- Lib/test/test_set.py (revision 58458)
+++ Lib/test/test_set.py (working copy)
@@ -72,7 +72,7 @@
self.assertEqual(type(u), self.thetype)
self.assertRaises(PassThru, self.s.union, check_pass_thru())
self.assertRaises(TypeError, self.s.union, [[]])
- for C in set, frozenset, dict.fromkeys, str, str8, list, tuple:
+ for C in set, frozenset, dict.fromkeys, str, list, tuple:
self.assertEqual(self.thetype('abcba').union(C('cdc')), set('abcd'))
self.assertEqual(self.thetype('abcba').union(C('efgfe')), set('abcefg'))
self.assertEqual(self.thetype('abcba').union(C('ccb')), set('abc'))
@@ -96,7 +96,7 @@
self.assertEqual(self.s, self.thetype(self.word))
self.assertEqual(type(i), self.thetype)
self.assertRaises(PassThru, self.s.intersection, check_pass_thru())
- for C in set, frozenset, dict.fromkeys, str, str8, list, tuple:
+ for C in set, frozenset, dict.fromkeys, str, list, tuple:
self.assertEqual(self.thetype('abcba').intersection(C('cdc')), set('cc'))
self.assertEqual(self.thetype('abcba').intersection(C('efgfe')), set(''))
self.assertEqual(self.thetype('abcba').intersection(C('ccb')), set('bc'))
@@ -121,7 +121,7 @@
self.assertEqual(type(i), self.thetype)
self.assertRaises(PassThru, self.s.difference, check_pass_thru())
self.assertRaises(TypeError, self.s.difference, [[]])
- for C in set, frozenset, dict.fromkeys, str, str8, list, tuple:
+ for C in set, frozenset, dict.fromkeys, str, list, tuple:
self.assertEqual(self.thetype('abcba').difference(C('cdc')), set('ab'))
self.assertEqual(self.thetype('abcba').difference(C('efgfe')), set('abc'))
self.assertEqual(self.thetype('abcba').difference(C('ccb')), set('a'))
@@ -146,7 +146,7 @@
self.assertEqual(type(i), self.thetype)
self.assertRaises(PassThru, self.s.symmetric_difference, check_pass_thru())
self.assertRaises(TypeError, self.s.symmetric_difference, [[]])
- for C in set, frozenset, dict.fromkeys, str, str8, list, tuple:
+ for C in set, frozenset, dict.fromkeys, str, list, tuple:
self.assertEqual(self.thetype('abcba').symmetric_difference(C('cdc')), set('abd'))
self.assertEqual(self.thetype('abcba').symmetric_difference(C('efgfe')), set('abcefg'))
self.assertEqual(self.thetype('abcba').symmetric_difference(C('ccb')), set('a'))
@@ -390,7 +390,7 @@
self.assertRaises(PassThru, self.s.update, check_pass_thru())
self.assertRaises(TypeError, self.s.update, [[]])
for p, q in (('cdc', 'abcd'), ('efgfe', 'abcefg'), ('ccb', 'abc'), ('ef', 'abcef')):
- for C in set, frozenset, dict.fromkeys, str, str8, list, tuple:
+ for C in set, frozenset, dict.fromkeys, str, list, tuple:
s = self.thetype('abcba')
self.assertEqual(s.update(C(p)), None)
self.assertEqual(s, set(q))
@@ -411,7 +411,7 @@
self.assertRaises(PassThru, self.s.intersection_update, check_pass_thru())
self.assertRaises(TypeError, self.s.intersection_update, [[]])
for p, q in (('cdc', 'c'), ('efgfe', ''), ('ccb', 'bc'), ('ef', '')):
- for C in set, frozenset, dict.fromkeys, str, str8, list, tuple:
+ for C in set, frozenset, dict.fromkeys, str, list, tuple:
s = self.thetype('abcba')
self.assertEqual(s.intersection_update(C(p)), None)
self.assertEqual(s, set(q))
@@ -436,7 +436,7 @@
self.assertRaises(TypeError, self.s.difference_update, [[]])
self.assertRaises(TypeError, self.s.symmetric_difference_update, [[]])
for p, q in (('cdc', 'ab'), ('efgfe', 'abc'), ('ccb', 'a'), ('ef', 'abc')):
- for C in set, frozenset, dict.fromkeys, str, str8, list, tuple:
+ for C in set, frozenset, dict.fromkeys, str, list, tuple:
s = self.thetype('abcba')
self.assertEqual(s.difference_update(C(p)), None)
self.assertEqual(s, set(q))
@@ -460,7 +460,7 @@
self.assertRaises(PassThru, self.s.symmetric_difference_update, check_pass_thru())
self.assertRaises(TypeError, self.s.symmetric_difference_update, [[]])
for p, q in (('cdc', 'abd'), ('efgfe', 'abcefg'), ('ccb', 'a'), ('ef', 'abcef')):
- for C in set, frozenset, dict.fromkeys, str, str8, list, tuple:
+ for C in set, frozenset, dict.fromkeys, str, list, tuple:
s = self.thetype('abcba')
self.assertEqual(s.symmetric_difference_update(C(p)), None)
self.assertEqual(s, set(q))
Index: Lib/test/test_struct.py
===================================================================
--- Lib/test/test_struct.py (revision 58458)
+++ Lib/test/test_struct.py (working copy)
@@ -674,8 +674,11 @@
elif not prefix and verbose:
print('size of bool in native format is %i' % (len(packed)))
- for c in str8('\x01\x7f\xff\x0f\xf0'):
- if struct.unpack('>t', c)[0] is not True:
+ for c in b'\x01\x7f\xff\x0f\xf0':
+ # XXX str8 constructor uses UTF-8 by default. So, to converting
+ # XXX to int to a str8 of length-1 require this odd maneuver.
+ x = str8(bytes(chr(255), 'latin-1'))
+ if struct.unpack('>t', x)[0] is not True:
raise TestFailed('%c did not unpack as True' % c)
test_bool()
Index: Lib/test/string_tests.py
===================================================================
--- Lib/test/string_tests.py (revision 58458)
+++ Lib/test/string_tests.py (working copy)
@@ -558,6 +558,10 @@
a = self.type2test('DNSSEC')
b = self.type2test('')
for c in a:
+ # Special case for the str8, since indexing returns a integer
+ # XXX Maybe it would be a good idea to seperate str8's tests...
+ if self.type2test == str8:
+ c = chr(c)
b += c
hash(b)
self.assertEqual(hash(a), hash(b))
Index: Lib/test/test_bytes.py
===================================================================
--- Lib/test/test_bytes.py (revision 58458)
+++ Lib/test/test_bytes.py (working copy)
@@ -347,7 +347,7 @@
sample = str8("Hello world\n\x80\x81\xfe\xff")
buf = memoryview(sample)
b = bytes(buf)
- self.assertEqual(b, bytes(map(ord, sample)))
+ self.assertEqual(b, bytes(sample))
def test_to_str(self):
sample = "Hello world\n\x80\x81\xfe\xff"
Index: Lib/dis.py
===================================================================
--- Lib/dis.py (revision 58458)
+++ Lib/dis.py (working copy)
@@ -117,8 +117,7 @@
extended_arg = 0
free = None
while i < n:
- c = code[i]
- op = ord(c)
+ op = code[i]
if i in linestarts:
if i > 0:
print()
@@ -134,7 +133,7 @@
print(opname[op].ljust(20), end=' ')
i = i+1
if op >= HAVE_ARGUMENT:
- oparg = ord(code[i]) + ord(code[i+1])*256 + extended_arg
+ oparg = code[i] + code[i+1]*256 + extended_arg
extended_arg = 0
i = i+2
if op == EXTENDED_ARG:
@@ -162,8 +161,7 @@
n = len(code)
i = 0
while i < n:
- c = code[i]
- op = ord(c)
+ op = code[i]
if i == lasti: print('-->', end=' ')
else: print(' ', end=' ')
if i in labels: print('>>', end=' ')
@@ -172,7 +170,7 @@
print(opname[op].ljust(15), end=' ')
i = i+1
if op >= HAVE_ARGUMENT:
- oparg = ord(code[i]) + ord(code[i+1])*256
+ oparg = code[i] + code[i+1]*256
i = i+2
print(repr(oparg).rjust(5), end=' ')
if op in hasconst:
@@ -208,11 +206,10 @@
n = len(code)
i = 0
while i < n:
- c = code[i]
- op = ord(c)
+ op = code[i]
i = i+1
if op >= HAVE_ARGUMENT:
- oparg = ord(code[i]) + ord(code[i+1])*256
+ oparg = code[i] + code[i+1]*256
i = i+2
label = -1
if op in hasjrel:
@@ -230,8 +227,8 @@
Generate pairs (offset, lineno) as described in Python/compile.c.
"""
- byte_increments = [ord(c) for c in code.co_lnotab[0::2]]
- line_increments = [ord(c) for c in code.co_lnotab[1::2]]
+ byte_increments = list(code.co_lnotab[0::2])
+ line_increments = list(code.co_lnotab[1::2])
lastlineno = None
lineno = code.co_firstlineno
Index: Lib/sre_parse.py
===================================================================
--- Lib/sre_parse.py (revision 58458)
+++ Lib/sre_parse.py (working copy)
@@ -181,20 +181,27 @@
return self.width
class Tokenizer:
- def __init__(self, string):
+ def __init__(self, string):
self.string = string
self.index = 0
self.__next()
+
def __next(self):
if self.index >= len(self.string):
self.next = None
return
char = self.string[self.index]
- if char[0] == "\\":
+ # Special case for the str8, since indexing returns a integer
+ # XXX This is only needed for test_bug_926075 in test_re.py
+ if isinstance(self.string, str8):
+ char = chr(char)
+ if char == "\\":
try:
c = self.string[self.index + 1]
except IndexError:
raise error("bogus escape (end of line)")
+ if isinstance(self.string, str8):
+ char = chr(c)
char = char + c
self.index = self.index + len(char)
self.next = char
_______________________________________________
Python-bugs-list mailing list
Unsubscribe:
http://mail.python.org/mailman/options/python-bugs-list/archive%40mail-archive.com