> here's the crash now:
>
> Traceback (most recent call last):
> File
> "/nix/store/f2krmq3iv5nibcvn4rw7nrnrciqprdkh-python3-3.12.9/lib/python3.12/pdb.py",
> line 1960, in main
> pdb._run(target)
> File
> "/nix/store/f2krmq3iv5nibcvn4rw7nrnrciqprdkh-python3-3.12.9/lib/python3.12/pdb.py",
> line 1754, in _run
> self.run(target.code)
> File
> "/nix/store/f2krmq3iv5nibcvn4rw7nrnrciqprdkh-python3-3.12.9/lib/python3.12/bdb.py",
> line 627, in run
> exec(cmd, globals, locals)
> File "/home/karl3/projects/rep/rep/dict.py", line 394, in <module>
> doc.update([[val,val]])
> File "/home/karl3/projects/rep/rep/dict.py", line 349, in update
> super().update(keyhashitems())
> File "/home/karl3/projects/rep/rep/dict.py", line 164, in update
> assert int.from_bytes(keyhash[:hashbytes], 'big') >> hashshift == newidx
> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
> AssertionError
>
>
>
> for keyhash, item in keyhashitems:
>
> assert item
> != self._sentinel
>
> byteidx =
> int.from_bytes(keyhash[:hashbytes], 'big')
>
> newidx = byteidx >> hashshift
>
>
> if self._capacity > 0:
>
> # this
> block checks for collision with previous stored values
>
> if capacity > self._capacity:
>
>
> superidx = int.from_bytes(keyhash[:self._hashbytes],
> 'big') >> self._hashshift
> else:
>
>
> superidx = newidx
>
>
> place = self.array[superidx]
>
> if place !=
> self._sentinel:
>
> collision = self._key(place)
>
>
> if collision != keyhash:
>
> assert
> superidx == int.from_bytes(collision[:self._hashbytes], 'big') >>
> self._hashshift
> updates[newidx] =
> [collision, place, False]
>
> # this separated approach to checking for collisions
> allows for accepting
> #
> batched data that ends up containing hash collisions solely within
> itself
> placing =
> updates.get(newidx)
> if placing is not None:
> collision, place, is_new = placing
> while newidx == int.from_bytes(collision[:hashbytes],
> 'big') >> hashshift:
> capacity <<= 1
> expansion <<= 1
> #spread += 1
> #hashbits = self._hashbits + spread
> hashbits += 1
> hashbytes = (hashbits+7) >> 3
> hashshift = (hashbytes << 3) - hashbits
> byteidx = int.from_bytes(keyhash[:hashbytes], 'big')
> newidx = byteidx >> hashshift
> assert capacity == (1 << hashbits)
> new_updates = {}
> for keyhash, item, is_new in updates.values():
> if is_new:
> newnewidx =
> int.from_bytes(keyhash[:hashbytes], 'big') >> hashshift
> assert newnewidx not in new_updates
> new_updates[newnewidx] = [keyhash, item, True]
>
> updates
> = new_updates
> assert newidx not in updates
> assert int.from_bytes(keyhash[:hashbytes], 'big') >>
> hashshift == newidx
> updates[newidx] = [keyhash, item, True]
>
>
> it looks like the problem is that keyhash is shadowed in the loop at the end
> :D
now i've got this:
File "/home/karl3/projects/rep/rep/dict.py", line 220, in content_generator
assert superidx * expansion + subidx ==
int.from_bytes(dbg_keyhash[:hashbytes], 'big') >> hashshift
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AssertionError
Uncaught exception. Entering post mortem debugging
Running 'cont' or 'step' will restart the program
> /home/karl3/projects/rep/rep/dict.py(220)content_generator()
-> assert superidx * expansion + subidx ==
int.from_bytes(dbg_keyhash[:hashbytes], 'big') >> hashshift
(Pdb) p item
b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
but it's just because item is a sentinel which shouldn't hash to its
index, can check for that