here's that with comments but i gotta prepare for my day too [i'm just
spammin?]!


    def update(self, keyhashitemsdict = {}, **keyhashitemskws):
        '''insert many items at once'''
        updates = []
        spread = 0
        hashshift = self._hashshift
        hashbytes = self._hashbytes

        # collect the index and keyhash of each item and determine if
the hashtable needs to be grown
        for keyhashitems in [keyhashitemsdict, keyhashitemskws]:
            for keyhash, item in keyhashitems:
                assert item != self._sentinel
                idx = int.from_bytes(keyhash[:self._hashbytes], 'big')
>> self._hashshift
                place = self.array[idx]
                if place != self._sentinel:
                    collision = self._key(place)
                    if collision != keyhash:
                        assert idx ==
int.from_bytes(collision[:self._hashbytes], 'big') >> self._hashshift
                        while int.from_bytes(keyhash[:hashbytes],
'big') >> hashshift != int.from_bytes(collision[:hashbytes], 'big') >>
hashshift:
                            spread += 1
                            hashbits = self._hashbits + spread
                            expansion = 1 << spread
                            hashbytes = (hashbits+7) >> 3
                            hashshift = (hashbytes << 3) - hashbits
                updates.append([idx, keyhash, item])

        # sort the items into a stack to pop'd as used. hoping to
prevent doubling of memory if used with large data on small system
        updates.sort(reverse=True)

        if spread == 0:
            # no need to grow hashtable

            allocsz = self._rep._allocsize
            itemsz = self._itemsize

            # sort the items by the id of their underlying record, to
organize how many new writes are needed
            update_chunks = [[updates.pop()]]
            while len(updates):
                update = updates.pop()
                if (update[0] + 1 - update_chunks[-1][-1][0]) * itemsz
>= allocsize:
                    update_chunks.append([update])
                else:
                    update_chunks[-1].append(update)

            # write each modified record
            for update_chunk in update_chunks:
                if len(update_chunk) == 1:
                    # only one item modified in this record, simply set it
                    idx, keyhash, item = update_chunk[0]
                    self.array[idx] = item
                else:
                    # many items modified. fetch the region covering
the change and update the whole thing in one assignment.
                    min_idx, min_keyhash, min_item = update_chunk[0]
                    max_idx, max_keyhash, max_item = update_chunk[-1]
                    content = [min_item] +
self.array[min_idx+1:max_idx] + [max_item]
                    for idx, keyhash, item in update_chunk[1:-1]:
                        content[idx-min_idx] = item
                    self.array[min_idx:max_idx+1] = content

                # dereference the written items for memory use
minimization in case the operation is thrashing
                update_chunk[:] = []

        else:
            # there is an index collision. expand the hashtable.

            def content_generator():
                '''yield items for writing...'''
                # need updates by idx
                next_idx, next_keyhash, next_item = updates.pop() if
len(updates) else [float('inf'),None,None]
                for superidx, item in enumerate(tqdm.tqdm(self.array,
desc='growing sentinel hashtable', leave=False)):
                    update_chunk = []
                    while next_idx == superidx:
                        keyhash = self._key(item)
                        wholeidx = int.from_bytes(keyhash[:hashbytes], 'big')
                        assert superidx == wholeidx >> (hashbytes * 8
- self._hashbits)
                        subidx = (wholeidx >> hashshift) & expansionmask
                        assert superidx * expansion + subidx ==
wholeidx >> hashshift
                        update_chunk.append([next_keyhash, next_item])
                        next_idx, next_keyhash, next_item =
updates.pop() if len(updates) else [float('inf'),None,None]
                    if item == self._sentinel:
                        # fill the section only with update information
                    else:
                        # insert the old item in any update information

>
> i had to open it in a gui editor for it to copy right.
> i'm working on the second else branch for spread > 0, implementing the
> wasteful bigendian high-bits expansion that preserves item order and
> keeps the underlying data clearer for a third party to reverse and
> independently implement interfacing for. 'slave boss' keeps taking me
> over when i try to engage the branch, which is normal for me and i
> imagine many others. i've made progress!
>
> i'm using the slower high-bits expansion because it is less
> complicating to keep the implementation the same during these issues.
> one thing at a time gives me more spaces to sneak coding in through my
> experiences.
>
> i'm presently confused around the code duplication of calculting the
> subidx and superidx in potentially three places, the old __setitem__
> function and the different potential approaches to inserting the old
> item among new items here. it doesn't immediately look easy to
> generalize to a generally useful concise function (because it engages
> both class-local and scope-local data that hasn't been bundled
> together yet, and doesn't have use outside this function and its old
> implementation).
>
> a good next step might be to either quickly generalize that (even if
> the generalization is just a messy artefact of implementation) so as
> to make the work simpler and clearer and use less working memory
> opening more mental approaches, or to rote copy the code into 3
> places. because i tend to insert errors, it's usually better to use
> the former approach, but there's a lot of associated values here

Reply via email to