> -----Original Message-----
> From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of
> rsanford2 at gmail.com
> Sent: Friday, May 16, 2014 1:59 PM
> To: dev at dpdk.org
> Subject: [dpdk-dev] [PATCH v2] malloc: fix malloc and free linear complexity
> 
> Problems with lib rte_malloc:
>  1. Rte_malloc searches a heap's entire free list looking for
>     the best fit, resulting in linear complexity.
>  2. Heaps store free blocks in a singly-linked list, resulting
>     in linear complexity when rte_free needs to remove an
>     adjacent block.
>  3. The library inserts and removes free blocks with ad hoc,
>     in-line code, rather than using linked-list functions or
>     macros.
> 
> This patch addresses those problems as follows:
>  1. Replace single free list with a handful of free lists.
>     Each free list contains blocks of a specified size range,
>     for example:
>       list[0]: (0   , 2^7]
>       list[1]: (2^7 , 2^9]
>       list[2]: (2^9 , 2^11]
>       list[3]: (2^11, 2^13]
>       list[4]: (2^13, MAX_SIZE]
> 
>     When allocating a block, start at the first list that can
>     contain a big enough block. Search subsequent lists, if
>     necessary. Terminate the search as soon as we find a block
>     that is big enough.
>  2. Use doubly-linked lists, so that we can remove free blocks
>     in constant time.
>  3. Use BSD LIST macros, as defined in sys/queue.h and the
>     QUEUE(3) man page.
> 
> Signed-off-by: Robert Sanford <rsanford2 at gmail.com>
> ---
>  lib/librte_eal/common/include/rte_malloc_heap.h |    6 +-
>  lib/librte_malloc/malloc_elem.c                 |  121 
> +++++++++++++++--------
>  lib/librte_malloc/malloc_elem.h                 |   17 +++-
>  lib/librte_malloc/malloc_heap.c                 |   67 ++++++-------
>  4 files changed, 128 insertions(+), 83 deletions(-)
> 
> diff --git a/lib/librte_eal/common/include/rte_malloc_heap.h
> b/lib/librte_eal/common/include/rte_malloc_heap.h
> index 5e139cf..1f5d653 100644
> --- a/lib/librte_eal/common/include/rte_malloc_heap.h
> +++ b/lib/librte_eal/common/include/rte_malloc_heap.h
> @@ -35,14 +35,18 @@
>  #define _RTE_MALLOC_HEAP_H_
> 
>  #include <stddef.h>
> +#include <sys/queue.h>
>  #include <rte_spinlock.h>
> 
> +/* Number of free lists per heap, grouped by size. */
> +#define RTE_HEAP_NUM_FREELISTS  5
> +
>  /**
>   * Structure to hold malloc heap
>   */
>  struct malloc_heap {
>       rte_spinlock_t lock;
> -     struct malloc_elem * volatile free_head;
> +     LIST_HEAD(, malloc_elem) free_head[RTE_HEAP_NUM_FREELISTS];
>       unsigned mz_count;
>       unsigned alloc_count;
>       size_t total_size;
> diff --git a/lib/librte_malloc/malloc_elem.c b/lib/librte_malloc/malloc_elem.c
> index f0da640..13cd5d3 100644
> --- a/lib/librte_malloc/malloc_elem.c
> +++ b/lib/librte_malloc/malloc_elem.c
> @@ -33,6 +33,7 @@
>  #include <stdint.h>
>  #include <stddef.h>
>  #include <stdio.h>
> +#include <string.h>
>  #include <sys/queue.h>
> 
>  #include <rte_memory.h>
> @@ -60,7 +61,8 @@ malloc_elem_init(struct malloc_elem *elem,
>  {
>       elem->heap = heap;
>       elem->mz = mz;
> -     elem->prev = elem->next_free = NULL;
> +     elem->prev = NULL;
> +     memset(&elem->free_list, 0, sizeof(elem->free_list));
>       elem->state = ELEM_FREE;
>       elem->size = size;
>       elem->pad = 0;
> @@ -125,14 +127,71 @@ split_elem(struct malloc_elem *elem, struct
> malloc_elem *split_pt)
>  }
> 
>  /*
> + * Given an element size, compute its freelist index.
> + * We free an element into the freelist containing similarly-sized elements.
> + * We try to allocate elements starting with the freelist containing
> + * similarly-sized elements, and if necessary, we search freelists
> + * containing larger elements.
> + *
> + * Example element size ranges for a heap with five free lists:
> + *   heap->free_head[0] - (0   , 2^7]
> + *   heap->free_head[1] - (2^7 , 2^9]
> + *   heap->free_head[2] - (2^9 , 2^11]
> + *   heap->free_head[3] - (2^11, 2^13]
> + *   heap->free_head[4] - (2^13, MAX_SIZE]
> + */
> +size_t
> +malloc_elem_free_list_index(size_t size)
> +{
> +#define MALLOC_MINSIZE_LOG2   7
> +#define MALLOC_LOG2_INCREMENT 2
> +
> +     size_t log2;
> +     size_t index;
> +
> +     if (size <= (1UL << MALLOC_MINSIZE_LOG2))
> +             return 0;
> +
> +     /* Find next power of 2 >= size. */
> +     log2 = sizeof(size) * 8 - __builtin_clzl(size-1);
> +
> +     /* Compute freelist index, based on log2(size). */
> +     index = (log2 - MALLOC_MINSIZE_LOG2 +
> MALLOC_LOG2_INCREMENT - 1) /
> +             MALLOC_LOG2_INCREMENT;
> +
> +     return (index <= RTE_HEAP_NUM_FREELISTS-1?
> +             index: RTE_HEAP_NUM_FREELISTS-1);
> +}
> +
> +/*
> + * Add the specified element to its heap's free list.
> + */
> +void
> +malloc_elem_free_list_insert(struct malloc_elem *elem)
> +{
> +     size_t idx = malloc_elem_free_list_index(elem->size -
> MALLOC_ELEM_HEADER_LEN);
> +
> +     elem->state = ELEM_FREE;
> +     LIST_INSERT_HEAD(&elem->heap->free_head[idx], elem, free_list);
> +}
> +
> +/*
> + * Remove the specified element from its heap's free list.
> + */
> +static void
> +elem_free_list_remove(struct malloc_elem *elem)
> +{
> +     LIST_REMOVE(elem, free_list);
> +}
> +
> +/*
>   * reserve a block of data in an existing malloc_elem. If the malloc_elem
>   * is much larger than the data block requested, we split the element in two.
>   * This function is only called from malloc_heap_alloc so parameter checking
>   * is not done here, as it's done there previously.
>   */
>  struct malloc_elem *
> -malloc_elem_alloc(struct malloc_elem *elem, size_t size,
> -             unsigned align, struct malloc_elem *prev_free)
> +malloc_elem_alloc(struct malloc_elem *elem, size_t size, unsigned align)
>  {
>       struct malloc_elem *new_elem = elem_start_pt(elem, size, align);
>       const unsigned old_elem_size = (uintptr_t)new_elem -
> (uintptr_t)elem;
> @@ -151,20 +210,20 @@ malloc_elem_alloc(struct malloc_elem *elem,
> size_t size,
>                       set_header(new_elem);
>               }
>               /* remove element from free list */
> -             if (prev_free == NULL)
> -                     elem->heap->free_head = elem->next_free;
> -             else
> -                     prev_free->next_free = elem->next_free;
> +             elem_free_list_remove(elem);
> 
>               return new_elem;
>       }
> 
>       /* we are going to split the element in two. The original element
> -      * remains free, and the new element is the one allocated, so no free
> list
> -      * changes need to be made.
> +      * remains free, and the new element is the one allocated.
> +      * Re-insert original element, in case its new size makes it
> +      * belong on a different list.
>        */
> +     elem_free_list_remove(elem);
>       split_elem(elem, new_elem);
>       new_elem->state = ELEM_BUSY;
> +     malloc_elem_free_list_insert(elem);
> 
>       return new_elem;
>  }
> @@ -182,25 +241,6 @@ join_elem(struct malloc_elem *elem1, struct
> malloc_elem *elem2)
>  }
> 
>  /*
> - * scan the free list, and remove the request element from that
> - * free list. (Free list to scan is got from heap pointer in element)
> - */
> -static inline void
> -remove_from_free_list(struct malloc_elem *elem)
> -{
> -     if (elem == elem->heap->free_head)
> -             elem->heap->free_head = elem->next_free;
> -     else{
> -             struct malloc_elem *prev_free = elem->heap->free_head;
> -             while (prev_free && prev_free->next_free != elem)
> -                     prev_free = prev_free->next_free;
> -             if (!prev_free)
> -                     rte_panic("Corrupted free list\n");
> -             prev_free->next_free = elem->next_free;
> -     }
> -}
> -
> -/*
>   * free a malloc_elem block by adding it to the free list. If the
>   * blocks either immediately before or immediately after newly freed block
>   * are also free, the blocks are merged together.
> @@ -214,21 +254,22 @@ malloc_elem_free(struct malloc_elem *elem)
>       rte_spinlock_lock(&(elem->heap->lock));
>       struct malloc_elem *next = RTE_PTR_ADD(elem, elem->size);
>       if (next->state == ELEM_FREE){
> -             /* join to this one, and remove from free list */
> +             /* remove from free list, join to this one */
> +             elem_free_list_remove(next);
>               join_elem(elem, next);
> -             remove_from_free_list(next);
>       }
> 
>       /* check if previous element is free, if so join with it and return,
> -      * no need to update free list, as that element is already there
> +      * need to re-insert in free list, as that element's size is changing
>        */
> -     if (elem->prev != NULL && elem->prev->state == ELEM_FREE)
> +     if (elem->prev != NULL && elem->prev->state == ELEM_FREE) {
> +             elem_free_list_remove(elem->prev);
>               join_elem(elem->prev, elem);
> +             malloc_elem_free_list_insert(elem->prev);
> +     }
>       /* otherwise add ourselves to the free list */
>       else {
> -             elem->next_free = elem->heap->free_head;
> -             elem->heap->free_head = elem;
> -             elem->state = ELEM_FREE;
> +             malloc_elem_free_list_insert(elem);
>               elem->pad = 0;
>       }
>       /* decrease heap's count of allocated elements */
> @@ -258,20 +299,18 @@ malloc_elem_resize(struct malloc_elem *elem,
> size_t size)
>       if (current_size + next->size < new_size)
>               goto err_return;
> 
> -     /* we now know the element fits, so join the two, then remove from
> free
> -      * list
> +     /* we now know the element fits, so remove from free list,
> +      * join the two
>        */
> +     elem_free_list_remove(next);
>       join_elem(elem, next);
> -     remove_from_free_list(next);
> 
>       if (elem->size - new_size > MIN_DATA_SIZE +
> MALLOC_ELEM_OVERHEAD){
>               /* now we have a big block together. Lets cut it down a bit,
> by splitting */
>               struct malloc_elem *split_pt = RTE_PTR_ADD(elem,
> new_size);
>               split_pt = RTE_PTR_ALIGN_CEIL(split_pt, CACHE_LINE_SIZE);
>               split_elem(elem, split_pt);
> -             split_pt->state = ELEM_FREE;
> -             split_pt->next_free = elem->heap->free_head;
> -             elem->heap->free_head = split_pt;
> +             malloc_elem_free_list_insert(split_pt);
>       }
>       rte_spinlock_unlock(&elem->heap->lock);
>       return 0;
> diff --git a/lib/librte_malloc/malloc_elem.h
> b/lib/librte_malloc/malloc_elem.h
> index eadecf9..63c69e1 100644
> --- a/lib/librte_malloc/malloc_elem.h
> +++ b/lib/librte_malloc/malloc_elem.h
> @@ -46,7 +46,7 @@ enum elem_state {
>  struct malloc_elem {
>       struct malloc_heap *heap;
>       struct malloc_elem *volatile prev;      /* points to prev elem in
> memzone */
> -     struct malloc_elem *volatile next_free; /* to make list of free
> elements */
> +     LIST_ENTRY(malloc_elem) free_list;      /* list of free elements in heap
> */
>       const struct rte_memzone *mz;
>       volatile enum elem_state state;
>       uint32_t pad;
> @@ -156,8 +156,7 @@ malloc_elem_can_hold(struct malloc_elem *elem,
> size_t size, unsigned align);
>   * is much larger than the data block requested, we split the element in two.
>   */
>  struct malloc_elem *
> -malloc_elem_alloc(struct malloc_elem *elem, size_t size,
> -             unsigned align, struct malloc_elem *prev_free);
> +malloc_elem_alloc(struct malloc_elem *elem, size_t size, unsigned align);
> 
>  /*
>   * free a malloc_elem block by adding it to the free list. If the
> @@ -174,4 +173,16 @@ malloc_elem_free(struct malloc_elem *elem);
>  int
>  malloc_elem_resize(struct malloc_elem *elem, size_t size);
> 
> +/*
> + * Given an element size, compute its freelist index.
> + */
> +size_t
> +malloc_elem_free_list_index(size_t size);
> +
> +/*
> + * Add element to its heap's free list.
> + */
> +void
> +malloc_elem_free_list_insert(struct malloc_elem *elem);
> +
>  #endif /* MALLOC_ELEM_H_ */
> diff --git a/lib/librte_malloc/malloc_heap.c b/lib/librte_malloc/malloc_heap.c
> index 882749c..cfc02f1 100644
> --- a/lib/librte_malloc/malloc_heap.c
> +++ b/lib/librte_malloc/malloc_heap.c
> @@ -114,9 +114,8 @@ malloc_heap_add_memzone(struct malloc_heap
> *heap, size_t size, unsigned align)
>       const unsigned elem_size = (uintptr_t)end_elem -
> (uintptr_t)start_elem;
>       malloc_elem_init(start_elem, heap, mz, elem_size);
>       malloc_elem_mkend(end_elem, start_elem);
> +     malloc_elem_free_list_insert(start_elem);
> 
> -     start_elem->next_free = heap->free_head;
> -     heap->free_head = start_elem;
>       /* increase heap total size by size of new memzone */
>       heap->total_size+=mz_size - MALLOC_ELEM_OVERHEAD;
>       return 0;
> @@ -125,35 +124,25 @@ malloc_heap_add_memzone(struct malloc_heap
> *heap, size_t size, unsigned align)
>  /*
>   * Iterates through the freelist for a heap to find a free element
>   * which can store data of the required size and with the requested
> alignment.
> - * Returns null on failure, or pointer to element on success, with the 
> pointer
> - * to the previous element in the list, if any, being returned in a parameter
> - * (to make removing the element from the free list faster).
> + * Returns null on failure, or pointer to element on success.
>   */
>  static struct malloc_elem *
> -find_suitable_element(struct malloc_heap *heap, size_t size,
> -             unsigned align, struct malloc_elem **prev)
> +find_suitable_element(struct malloc_heap *heap, size_t size, unsigned
> align)
>  {
> -     struct malloc_elem *elem, *min_elem, *min_prev;
> -     size_t min_sz;
> -
> -     elem = heap->free_head;
> -     min_elem = NULL;
> -     min_prev = NULL;
> -     min_sz = (size_t) SIZE_MAX;
> -     *prev = NULL;
> -
> -     while(elem){
> -             if (malloc_elem_can_hold(elem, size, align)) {
> -                     if (min_sz > elem->size) {
> -                             min_elem = elem;
> -                             *prev = min_prev;
> -                             min_sz = elem->size;
> -                     }
> +     size_t idx;
> +     struct malloc_elem *elem;
> +
> +     for (idx = malloc_elem_free_list_index(size);
> +             idx < RTE_HEAP_NUM_FREELISTS; idx++)
> +     {
> +             for (elem = LIST_FIRST(&heap->free_head[idx]);
> +                     !!elem; elem = LIST_NEXT(elem, free_list))
> +             {
> +                     if (malloc_elem_can_hold(elem, size, align))
> +                             return elem;
>               }
> -             min_prev = elem;
> -             elem = elem->next_free;
>       }
> -     return (min_elem);
> +     return NULL;
>  }
> 
>  /*
> @@ -169,15 +158,14 @@ malloc_heap_alloc(struct malloc_heap *heap,
>       size = CACHE_LINE_ROUNDUP(size);
>       align = CACHE_LINE_ROUNDUP(align);
>       rte_spinlock_lock(&heap->lock);
> -     struct malloc_elem *prev, *elem = find_suitable_element(heap,
> -                     size, align, &prev);
> +     struct malloc_elem *elem = find_suitable_element(heap, size, align);
>       if (elem == NULL){
>               if ((malloc_heap_add_memzone(heap, size, align)) == 0)
> -                     elem = find_suitable_element(heap, size, align,
> &prev);
> +                     elem = find_suitable_element(heap, size, align);
>       }
> 
>       if (elem != NULL){
> -             elem = malloc_elem_alloc(elem, size, align, prev);
> +             elem = malloc_elem_alloc(elem, size, align);
>               /* increase heap's count of allocated elements */
>               heap->alloc_count++;
>       }
> @@ -193,7 +181,8 @@ int
>  malloc_heap_get_stats(const struct malloc_heap *heap,
>               struct rte_malloc_socket_stats *socket_stats)
>  {
> -     struct malloc_elem *elem = heap->free_head;
> +     size_t idx;
> +     struct malloc_elem *elem;
> 
>       /* Initialise variables for heap */
>       socket_stats->free_count = 0;
> @@ -201,13 +190,15 @@ malloc_heap_get_stats(const struct malloc_heap
> *heap,
>       socket_stats->greatest_free_size = 0;
> 
>       /* Iterate through free list */
> -     while(elem) {
> -             socket_stats->free_count++;
> -             socket_stats->heap_freesz_bytes += elem->size;
> -             if (elem->size > socket_stats->greatest_free_size)
> -                     socket_stats->greatest_free_size = elem->size;
> -
> -             elem = elem->next_free;
> +     for (idx = 0; idx < RTE_HEAP_NUM_FREELISTS; idx++) {
> +             for (elem = LIST_FIRST(&heap->free_head[idx]);
> +                     !!elem; elem = LIST_NEXT(elem, free_list))
> +             {
> +                     socket_stats->free_count++;
> +                     socket_stats->heap_freesz_bytes += elem->size;
> +                     if (elem->size > socket_stats->greatest_free_size)
> +                             socket_stats->greatest_free_size = elem-
> >size;
> +             }
>       }
>       /* Get stats on overall heap and allocated memory on this heap */
>       socket_stats->heap_totalsz_bytes = heap->total_size;
> --
> 1.7.1

Hi Robert,

Overall patch looks OK, but malloc unit tests fail on the last test 
(test_multi_alloc_statistics). Apparently, the biggest free chunk size changes 
as it allocates some memory, whereas in the previous implementation, this size 
did not change. I wonder if unit test is wrong or if there is actually an issue 
here. Could you look at this as well?

Thanks,
Pablo

Reply via email to