Marc-Andre Lemburg <m...@egenix.com> added the comment:
Here's a version of the collision counting patch that takes both hash
and slot collisions into account.
I've also added a test script which demonstrates both types of
collisions using integer objects (since it's trivial to calculate
their hashes).
To see the collision counting, enable the DEBUG_DICT_COLLISIONS
macro variable.
----------
Added file: http://bugs.python.org/file24299/hash-attack-3.patch
Added file: http://bugs.python.org/file24300/integercollision.py
_______________________________________
Python tracker <rep...@bugs.python.org>
<http://bugs.python.org/issue13703>
_______________________________________
Index: Objects/dictobject.c
===================================================================
--- Objects/dictobject.c (revision 88933)
+++ Objects/dictobject.c (working copy)
@@ -9,7 +9,13 @@
#include "Python.h"
+/* Maximum number of allowed collisions. */
+#define Py_MAX_DICT_HASH_COLLISIONS 1000
+#define Py_MAX_DICT_SLOT_COLLISIONS 1000
+/* Debug collision detection */
+#define DEBUG_DICT_COLLISIONS 0
+
/* Set a key error with the specified argument, wrapping it in a
* tuple automatically so that tuple keys are not unpacked as the
* exception arguments. */
@@ -327,6 +333,7 @@
register PyDictEntry *ep;
register int cmp;
PyObject *startkey;
+ size_t hash_collisions, slot_collisions;
i = (size_t)hash & mask;
ep = &ep0[i];
@@ -361,6 +368,8 @@
/* In the loop, me_key == dummy is by far (factor of 100s) the
least likely outcome, so test for that last. */
+ hash_collisions = 1;
+ slot_collisions = 1;
for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
i = (i << 2) + i + perturb + 1;
ep = &ep0[i & mask];
@@ -387,9 +396,27 @@
*/
return lookdict(mp, key, hash);
}
+ #if DEBUG_DICT_COLLISIONS
+ printf("hash collisions = %zu (i=%zu)\n", hash_collisions, i);
+ #endif
+ if (++hash_collisions > Py_MAX_DICT_HASH_COLLISIONS) {
+ PyErr_SetString(PyExc_KeyError,
+ "too many hash collisions");
+ return NULL;
+ }
}
- else if (ep->me_key == dummy && freeslot == NULL)
- freeslot = ep;
+ else {
+ if (ep->me_key == dummy && freeslot == NULL)
+ freeslot = ep;
+ #if DEBUG_DICT_COLLISIONS
+ printf("slot collisions = %zu (i=%zu)\n", slot_collisions, i);
+ #endif
+ if (++slot_collisions > Py_MAX_DICT_SLOT_COLLISIONS) {
+ PyErr_SetString(PyExc_KeyError,
+ "too many slot collisions");
+ return NULL;
+ }
+ }
}
assert(0); /* NOT REACHED */
return 0;
@@ -413,6 +440,7 @@
register size_t mask = (size_t)mp->ma_mask;
PyDictEntry *ep0 = mp->ma_table;
register PyDictEntry *ep;
+ size_t hash_collisions, slot_collisions;
/* Make sure this function doesn't have to handle non-string keys,
including subclasses of str; e.g., one reason to subclass
@@ -439,18 +467,39 @@
/* In the loop, me_key == dummy is by far (factor of 100s) the
least likely outcome, so test for that last. */
+ hash_collisions = 1;
+ slot_collisions = 1;
for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
i = (i << 2) + i + perturb + 1;
ep = &ep0[i & mask];
if (ep->me_key == NULL)
return freeslot == NULL ? ep : freeslot;
- if (ep->me_key == key
- || (ep->me_hash == hash
- && ep->me_key != dummy
- && _PyString_Eq(ep->me_key, key)))
+ if (ep->me_key == key)
return ep;
- if (ep->me_key == dummy && freeslot == NULL)
- freeslot = ep;
+ if (ep->me_hash == hash && ep->me_key != dummy) {
+ if (_PyString_Eq(ep->me_key, key))
+ return ep;
+ #if DEBUG_DICT_COLLISIONS
+ printf("hash collisions = %zu (i=%zu)\n", hash_collisions, i);
+ #endif
+ if (++hash_collisions > Py_MAX_DICT_HASH_COLLISIONS) {
+ PyErr_SetString(PyExc_KeyError,
+ "too many hash collisions");
+ return NULL;
+ }
+ }
+ else {
+ if (ep->me_key == dummy && freeslot == NULL)
+ freeslot = ep;
+ #if DEBUG_DICT_COLLISIONS
+ printf("slot collisions = %zu (i=%zu)\n", slot_collisions, i);
+ #endif
+ if (++slot_collisions > Py_MAX_DICT_SLOT_COLLISIONS) {
+ PyErr_SetString(PyExc_KeyError,
+ "too many slot collisions");
+ return NULL;
+ }
+ }
}
assert(0); /* NOT REACHED */
return 0;
def integer_hash_collisions(max=1000):
print 'Hash collision attack:'
d = {}
for x in xrange(max):
i = x*(2**64 - 1) + 1
#print 'adding %i with hash %i' % (i, hash(i))
d[i] = 1
print 'generated dict with %i items' % len(d)
return d
def integer_slot_collisions(max=1000):
print 'Slot collision attack:'
# Fill the dict slots starting at (hash) position 1
d = {1:1}
i = 1
perturb = i
print 'seeding the dict:'
for x in xrange(max):
i = ((i << 2) + i + perturb + 1) & 0xffffffffffffffff
#print 'adding %i with hash %i' % (i, hash(i))
d[i] = 1
perturb >>= 5
# Add new keys
print 'generating slot collisions:'
# cause a hash collision on the first try to enter the
# prepared slot collision path
i = 1*(2**64 - 1) + 1
for x in xrange(max):
#print 'adding %i with hash %i' % (i, hash(i))
d[i] = 1
print 'generated dict with %i items' % len(d)
return d
integer_hash_collisions(1000)
integer_slot_collisions(1000)
_______________________________________________
Python-bugs-list mailing list
Unsubscribe:
http://mail.python.org/mailman/options/python-bugs-list/archive%40mail-archive.com