Christian Heimes added the comment:
I've created a patch that adds optimization for PyUnicode while keeping
the existing optimization for PyString. The patch moves the optimization
trick for PyObject_Hash() into a macro and adds an optimized
_PyUnicode_Eq() to unicodeobject.c
----------
assignee: -> tiran
nosy: +tiran
priority: -> normal
Added file: http://bugs.python.org/file8896/py3k_optimize_set_unicode.patch
__________________________________
Tracker <[EMAIL PROTECTED]>
<http://bugs.python.org/issue1564>
__________________________________
Index: Include/unicodeobject.h
===================================================================
--- Include/unicodeobject.h (Revision 59424)
+++ Include/unicodeobject.h (Arbeitskopie)
@@ -1330,6 +1330,11 @@
const char *right
);
+PyAPI_FUNC(int) _PyUnicode_Eq(
+ PyObject *left, /* Left string */
+ PyObject *right /* Right string */
+ );
+
/* Rich compare two strings and return one of the following:
- NULL in case an exception was raised
Index: Objects/unicodeobject.c
===================================================================
--- Objects/unicodeobject.c (Revision 59424)
+++ Objects/unicodeobject.c (Arbeitskopie)
@@ -95,6 +95,9 @@
extern "C" {
#endif
+/* forward declarations */
+static long unicode_hash(PyUnicodeObject *);
+
/* This dictionary holds all interned unicode strings. Note that references
to strings in this dictionary are *not* counted in the string's ob_refcnt.
When the interned string reaches a refcnt of 0 the string deallocation
@@ -6349,6 +6352,18 @@
return PyBool_FromLong(result);
}
+/* optimized version used by Objects/setobject.c */
+int
+_PyUnicode_Eq(PyObject *o1, PyObject *o2)
+{
+ PyUnicodeObject *a = (PyUnicodeObject*) o1;
+ PyUnicodeObject *b = (PyUnicodeObject*) o2;
+ return Py_Size(a) == Py_Size(b)
+ && unicode_hash(a) == unicode_hash(b)
+ && unicode_compare(a, b) == 0;
+}
+
+
int PyUnicode_Contains(PyObject *container,
PyObject *element)
{
Index: Objects/setobject.c
===================================================================
--- Objects/setobject.c (Revision 59424)
+++ Objects/setobject.c (Arbeitskopie)
@@ -50,6 +50,20 @@
INIT_NONZERO_SET_SLOTS(so); \
} while(0)
+#define GET_HASH(hash, key) do { \
+ hash = -1; \
+ if (PyUnicode_CheckExact(key)) { \
+ hash = ((PyUnicodeObject *) key)->hash; \
+ } else if (PyString_CheckExact(key)) { \
+ hash = ((PyStringObject *) key)->ob_shash; \
+ } \
+ if (hash == -1) { \
+ hash = PyObject_Hash(key); \
+ if (hash == -1) \
+ return -1; \
+ } \
+ } while(0)
+
/* Reuse scheme to save calls to malloc, free, and memset */
#define MAXFREESETS 80
static PySetObject *free_sets[MAXFREESETS];
@@ -145,11 +159,11 @@
/*
* Hacked up version of set_lookkey which can assume keys are always strings;
- * This means we can always use _PyString_Eq directly and not have to check to
+ * This means we can always use _PyUnicode_Eq directly and not have to check to
* see if the comparison altered the table.
*/
static setentry *
-set_lookkey_string(PySetObject *so, PyObject *key, register long hash)
+set_lookkey_unicode(PySetObject *so, PyObject *key, register long hash)
{
register Py_ssize_t i;
register size_t perturb;
@@ -157,12 +171,19 @@
register size_t mask = so->mask;
setentry *table = so->table;
register setentry *entry;
+ int (*eq)(PyObject *, PyObject *);
/* Make sure this function doesn't have to handle non-string keys,
including subclasses of str; e.g., one reason to subclass
strings is to override __eq__, and for speed we don't cater to
that here. */
- if (!PyString_CheckExact(key)) {
+ if (PyUnicode_CheckExact(key)) {
+ eq = _PyUnicode_Eq;
+ }
+ else if (PyString_CheckExact(key)) {
+ eq = _PyString_Eq;
+ }
+ else {
so->lookup = set_lookkey;
return set_lookkey(so, key, hash);
}
@@ -173,7 +194,7 @@
if (entry->key == dummy)
freeslot = entry;
else {
- if (entry->hash == hash && _PyString_Eq(entry->key, key))
+ if (entry->hash == hash && eq(entry->key, key))
return entry;
freeslot = NULL;
}
@@ -188,7 +209,7 @@
if (entry->key == key
|| (entry->hash == hash
&& entry->key != dummy
- && _PyString_Eq(entry->key, key)))
+ && eq(entry->key, key)))
return entry;
if (entry->key == dummy && freeslot == NULL)
freeslot = entry;
@@ -375,12 +396,8 @@
register long hash;
register Py_ssize_t n_used;
- if (!PyString_CheckExact(key) ||
- (hash = ((PyStringObject *) key)->ob_shash) == -1) {
- hash = PyObject_Hash(key);
- if (hash == -1)
- return -1;
- }
+ GET_HASH(hash, key);
+
assert(so->fill <= so->mask); /* at least one empty slot */
n_used = so->used;
Py_INCREF(key);
@@ -422,12 +439,9 @@
PyObject *old_key;
assert (PyAnySet_Check(so));
- if (!PyString_CheckExact(key) ||
- (hash = ((PyStringObject *) key)->ob_shash) == -1) {
- hash = PyObject_Hash(key);
- if (hash == -1)
- return -1;
- }
+
+ GET_HASH(hash, key);
+
entry = (so->lookup)(so, key, hash);
if (entry == NULL)
return -1;
@@ -668,12 +682,8 @@
long hash;
setentry *entry;
- if (!PyString_CheckExact(key) ||
- (hash = ((PyStringObject *) key)->ob_shash) == -1) {
- hash = PyObject_Hash(key);
- if (hash == -1)
- return -1;
- }
+ GET_HASH(hash, key);
+
entry = (so->lookup)(so, key, hash);
if (entry == NULL)
return -1;
@@ -989,7 +999,7 @@
INIT_NONZERO_SET_SLOTS(so);
}
- so->lookup = set_lookkey_string;
+ so->lookup = set_lookkey_unicode;
so->weakreflist = NULL;
if (iterable != NULL) {
Index: Lib/test/test_set.py
===================================================================
--- Lib/test/test_set.py (Revision 59424)
+++ Lib/test/test_set.py (Arbeitskopie)
@@ -817,6 +817,26 @@
self.length = 3
self.repr = None
+#------------------------------------------------------------------------------
+
+class TestBasicOpsString(TestBasicOps):
+ def setUp(self):
+ self.case = "string set"
+ self.values = ["a", "b", "c"]
+ self.set = set(self.values)
+ self.dup = set(self.values)
+ self.length = 3
+ self.repr = "{'a', 'c', 'b'}"
+
+class TestBasicOpsBytes(TestBasicOps):
+ def setUp(self):
+ self.case = "string set"
+ self.values = [b"a", b"b", b"c"]
+ self.set = set(self.values)
+ self.dup = set(self.values)
+ self.length = 3
+ self.repr = "{b'a', b'c', b'b'}"
+
#==============================================================================
def baditer():
@@ -1581,6 +1601,8 @@
TestBasicOpsSingleton,
TestBasicOpsTuple,
TestBasicOpsTriple,
+ TestBasicOpsString,
+ TestBasicOpsBytes,
TestBinaryOps,
TestUpdateOps,
TestMutate,
_______________________________________________
Python-bugs-list mailing list
Unsubscribe:
http://mail.python.org/mailman/options/python-bugs-list/archive%40mail-archive.com