On Mon, May 02, 2011 at 10:09:29AM +0200, Otto Moerbeek wrote:
> Hi,
>
> Currently, malloc scans the bits of the chunk bitmap from position
> zero, skipping a random number (n) of free slots and then picking the
> next free one. This slows things down, especially if the number of
> full slots diminishes.
>
>
> This diff changes the scannning to start at a random position in the
> bitmap and then taking the first available free slot, wrapping if the
> end of the bitmap is reached. Of course we'll still scan more if the
> bitmap becomes more full, but the extra iterations skipping free slots
> and then some full slots are avoided.
>
> The random number is derived from a global, which is incremented by a
> few bits every time a chunk is needed (with a small optimization if
> only one free slot is left).
>
> The shows good speedups, especially if lots of small chunks are
> allocated and on e.g. loongson which uses a larger page size than
> others, while randomness of chunk allocation is preserved.
>
> Please test & review,
Litle feedback so far. I'd like to move on with this, I have more
goodies queued up. So get this on your machines!
-Otto
>
> Index: malloc.c
> ===================================================================
> RCS file: /cvs/src/lib/libc/stdlib/malloc.c,v
> retrieving revision 1.129
> diff -u -p -r1.129 malloc.c
> --- malloc.c 30 Apr 2011 15:46:46 -0000 1.129
> +++ malloc.c 2 May 2011 07:27:54 -0000
> @@ -113,6 +113,7 @@ struct dir_info {
> struct region_info free_regions[MALLOC_MAXCACHE];
> /* delayed free chunk slots */
> void *delayed_chunks[MALLOC_DELAYED_CHUNKS + 1];
> + u_short chunk_start;
> #ifdef MALLOC_STATS
> size_t inserts;
> size_t insert_collisions;
> @@ -1013,40 +1014,31 @@ malloc_bytes(struct dir_info *d, size_t
>
> if (bp->canary != d->canary1)
> wrterror("chunk info corrupted", NULL);
> - /* Find first word of bitmap which isn't empty */
> - for (lp = bp->bits; !*lp; lp++)
> - /* EMPTY */;
> -
> - /* Find that bit, and tweak it */
> - u = 1;
> - k = 0;
> - while (!(*lp & u)) {
> - u += u;
> - k++;
> - }
> -
> - /* advance a random # of positions */
> - if (bp->free > 1) {
> - i = getrnibble() % bp->free;
> - while (i > 0) {
> - u += u;
> - k++;
> - if (k >= MALLOC_BITS) {
> - while (!*++lp)
> - /* EMPTY */;
> - u = 1;
> - k = 0;
> - if (lp - bp->bits > (bp->total - 1) /
> - MALLOC_BITS) {
> - wrterror("chunk overflow", NULL);
> - errno = EFAULT;
> - return (NULL);
> - }
> - }
> - if (*lp & u)
> - i--;
> +
> + i = d->chunk_start;
> + if (bp->free > 1)
> + i += getrnibble();
> + if (i >= bp->total)
> + i &= bp->total - 1;
> + for (;;) {
> + for (;;) {
> + lp = &bp->bits[i / MALLOC_BITS];
> + if (!*lp) {
> + i += MALLOC_BITS;
> + i &= ~(MALLOC_BITS - 1);
> + if (i >= bp->total)
> + i = 0;
> + } else
> + break;
> }
> + k = i % MALLOC_BITS;
> + u = 1 << k;
> + if (*lp & u)
> + break;
> + if (++i >= bp->total)
> + i = 0;
> }
> + d->chunk_start += i + 1;
>
> *lp ^= u;