Hi,
I've been working on a malloc diff after being prompted by Fabien
Romano and Jonathan Armani that there are cases where malloc's
performance is quite bad. These cases occur when freeing lots (and i
mean lots) of small allocation in random or reverse order they were
allocated, and gets worse if there are a lot of allocations of the
same size.
The culprit is the linked list of chunk pages containing free chunks.
If a chunk page becomes empty (or full), work has to be done to
maintain the free chunk list. In both cases the free list is scanned
to find the proper spot.
One way of avoiding the scanning is to use a doubly linked list. It
will cost some space, but adding or removing chunk_info structs
becomes cheaper. I also removed the ordering of the list, since it
does not sem to serve a usefull purpose (it might have been good in
the sbrk(2) days).
Now the average program does not suffer much from the above problem,
and it should not suffer much from the diff.
So I like people to test this diff, for both correctness and speed, so
I can confirm my feeling that the increase in space used is paying
off. I know it is worthwhile for the extreme cases, but is it
worthwhile for your average program?
As a bonus, I changed the code to use the queue macros, which makes
things easier to read.
Thanks again to Fabien and Jonathan for coming up with the test
program and the analysis of the problem.
-Otto
PS: afaks, this problem has been around since the early days of phk's
malloc.
Index: malloc.c
===================================================================
RCS file: /cvs/src/lib/libc/stdlib/malloc.c,v
retrieving revision 1.118
diff -u -p -r1.118 malloc.c
--- malloc.c 2 Nov 2009 19:26:17 -0000 1.118
+++ malloc.c 8 Nov 2009 20:01:51 -0000
@@ -32,6 +32,7 @@
#include <sys/types.h>
#include <sys/param.h>
+#include <sys/queue.h>
#include <sys/mman.h>
#include <sys/uio.h>
#include <errno.h>
@@ -100,9 +101,9 @@ struct dir_info {
size_t regions_bits; /* log2 of total */
size_t regions_free; /* number of free slots */
/* list of free chunk info structs */
- struct chunk_info *chunk_info_list;
+ LIST_HEAD(_chunk_head, chunk_info) chunk_info_list;
/* lists of chunks with free slots */
- struct chunk_info *chunk_dir[MALLOC_MAXSHIFT];
+ LIST_HEAD(chunk_head, chunk_info) chunk_dir[MALLOC_MAXSHIFT];
size_t free_regions_size; /* free pages cached */
/* free pages cache */
struct region_info free_regions[MALLOC_MAXCACHE];
@@ -135,7 +136,7 @@ struct dir_info {
*/
#define MALLOC_BITS (NBBY * sizeof(u_long))
struct chunk_info {
- struct chunk_info *next; /* next on the free list */
+ LIST_ENTRY(chunk_info) entries;
void *page; /* pointer to the page */
u_int32_t canary;
u_short size; /* size of this page's chunks */
@@ -217,13 +218,13 @@ dump_chunk(int fd, struct chunk_info *p,
{
char buf[64];
- while (p) {
+ while (p != NULL) {
snprintf(buf, sizeof(buf), "chunk %d %d/%d %p\n", p->size,
p->free, p->total, p->page);
write(fd, buf, strlen(buf));
if (!fromfreelist)
break;
- p = p->next;
+ p = LIST_NEXT(p, entries);
if (p != NULL) {
snprintf(buf, sizeof(buf), " ");
write(fd, buf, strlen(buf));
@@ -240,7 +241,7 @@ dump_free_chunk_info(int fd, struct dir_
snprintf(buf, sizeof(buf), "Free chunk structs:\n");
write(fd, buf, strlen(buf));
for (i = 0; i < MALLOC_MAXSHIFT; i++) {
- struct chunk_info *p = d->chunk_dir[i];
+ struct chunk_info *p = LIST_FIRST(&d->chunk_dir[i]);
if (p != NULL) {
snprintf(buf, sizeof(buf), "%2d) ", i);
write(fd, buf, strlen(buf));
@@ -520,6 +521,8 @@ map(struct dir_info *d, size_t sz, int z
d->free_regions_size -= psz;
if (zero_fill)
memset(p, 0, sz);
+ else if (mopts.malloc_junk && mopts.malloc_freeprot)
+ memset(p, SOME_FREEJUNK, sz);
return p;
}
p = MMAP(sz);
@@ -714,8 +717,10 @@ omalloc_init(struct dir_info **dp)
d->regions_total = 0;
return 1;
}
+ LIST_INIT(&d->chunk_info_list);
+ for (i = 0; i < MALLOC_MAXSHIFT; i++)
+ LIST_INIT(&d->chunk_dir[i]);
malloc_used += regioninfo_size;
- memset(d->r, 0, regioninfo_size);
d->canary1 = mopts.malloc_canary ^ (u_int32_t)(uintptr_t)d;
d->canary2 = ~d->canary1;
@@ -787,31 +792,21 @@ alloc_chunk_info(struct dir_info *d)
struct chunk_info *p;
int i;
- if (d->chunk_info_list == NULL) {
+ if (LIST_EMPTY(&d->chunk_info_list)) {
p = MMAP(MALLOC_PAGESIZE);
if (p == MAP_FAILED)
return NULL;
malloc_used += MALLOC_PAGESIZE;
- for (i = 0; i < MALLOC_PAGESIZE / sizeof(*p); i++) {
- p[i].next = d->chunk_info_list;
- d->chunk_info_list = &p[i];
- }
+ for (i = 0; i < MALLOC_PAGESIZE / sizeof(*p); i++)
+ LIST_INSERT_HEAD(&d->chunk_info_list, &p[i], entries);
}
- p = d->chunk_info_list;
- d->chunk_info_list = p->next;
+ p = LIST_FIRST(&d->chunk_info_list);
+ LIST_REMOVE(p, entries);
memset(p, 0, sizeof *p);
p->canary = d->canary1;
return p;
}
-
-static void
-put_chunk_info(struct dir_info *d, struct chunk_info *p)
-{
- p->next = d->chunk_info_list;
- d->chunk_info_list = p;
-}
-
static int
insert(struct dir_info *d, void *p, size_t sz)
{
@@ -929,7 +924,7 @@ omalloc_make_chunks(struct dir_info *d,
k = mprotect(pp, MALLOC_PAGESIZE, PROT_NONE);
if (k < 0) {
unmap(d, pp, MALLOC_PAGESIZE);
- put_chunk_info(d, bp);
+ LIST_INSERT_HEAD(&d->chunk_info_list, bp, entries);
return NULL;
}
} else {
@@ -950,8 +945,7 @@ omalloc_make_chunks(struct dir_info *d,
for (; i < k; i++)
bp->bits[i / MALLOC_BITS] |= 1UL << (i % MALLOC_BITS);
- bp->next = d->chunk_dir[bits];
- d->chunk_dir[bits] = bp;
+ LIST_INSERT_HEAD(&d->chunk_dir[bits], bp, entries);
bits++;
if ((uintptr_t)pp & bits)
@@ -992,9 +986,12 @@ malloc_bytes(struct dir_info *d, size_t
}
/* If it's empty, make a page more of that size chunks */
- bp = d->chunk_dir[j];
- if (bp == NULL && (bp = omalloc_make_chunks(d, j)) == NULL)
- return NULL;
+ if (LIST_EMPTY(&d->chunk_dir[j])) {
+ bp = omalloc_make_chunks(d, j);
+ if (bp == NULL)
+ return NULL;
+ } else
+ bp = LIST_FIRST(&d->chunk_dir[j]);
if (bp->canary != d->canary1)
wrterror("chunk info corrupted");
@@ -1032,10 +1029,9 @@ malloc_bytes(struct dir_info *d, size_t
*lp ^= u;
/* If there are no more free, remove from free-list */
- if (!--bp->free) {
- d->chunk_dir[j] = bp->next;
- bp->next = NULL;
- }
+ if (!--bp->free)
+ LIST_REMOVE(bp, entries);
+
/* Adjust to the real offset of that chunk */
k += (lp - bp->bits) * MALLOC_BITS;
k <<= bp->shift;
@@ -1052,7 +1048,8 @@ malloc_bytes(struct dir_info *d, size_t
static void
free_bytes(struct dir_info *d, struct region_info *r, void *ptr)
{
- struct chunk_info *info, **mp;
+ struct chunk_head *mp;
+ struct chunk_info *info;
long i;
info = (struct chunk_info *)r->size;
@@ -1081,35 +1078,20 @@ free_bytes(struct dir_info *d, struct re
if (info->free == 1) {
/* Page became non-full */
-
- /* Insert in address order */
- while (*mp != NULL && (*mp)->next != NULL &&
- (*mp)->next->page < info->page)
- mp = &(*mp)->next;
- info->next = *mp;
- *mp = info;
+ LIST_INSERT_HEAD(mp, info, entries);
return;
}
if (info->free != info->total)
return;
- /* Find & remove this page in the queue */
- while (*mp != info) {
- mp = &((*mp)->next);
- if (!*mp) {
- wrterror("not on queue");
- errno = EFAULT;
- return;
- }
- }
- *mp = info->next;
+ LIST_REMOVE(info, entries);
if (info->size == 0 && !mopts.malloc_freeprot)
mprotect(info->page, MALLOC_PAGESIZE, PROT_READ | PROT_WRITE);
unmap(d, info->page, MALLOC_PAGESIZE);
delete(d, r);
- put_chunk_info(d, info);
+ LIST_INSERT_HEAD(&d->chunk_info_list, info, entries);
}