EAL malloc layer assumed all free elements content is filled with zeros ("clean"), as opposed to uninitialized ("dirty"). This assumption was ensured in two ways: 1. EAL memalloc layer always returned clean memory. 2. Freed memory was cleared before returning into the heap.
Clearing the memory can be as slow as around 14 GiB/s. To save doing so, memalloc layer is allowed to return dirty memory. Such segments being marked with RTE_MEMSEG_FLAG_DIRTY. The allocator tracks elements that contain dirty memory using the new flag in the element header. When clean memory is requested via rte_zmalloc*() and the suitable element is dirty, it is cleared on allocation. When memory is deallocated, the freed element is joined with adjacent free elements, and the dirty flag is updated: dirty + freed + dirty = dirty => no need to clean freed + dirty = dirty the freed memory clean + freed + clean = clean => freed memory clean + freed = clean must be cleared freed + clean = clean freed = clean As a result, memory is either cleared on free, as before, or it will be cleared on allocation if need be, but never twice. Signed-off-by: Dmitry Kozlyuk <dkozl...@nvidia.com> --- lib/eal/common/malloc_elem.c | 22 +++++++++++++++++++--- lib/eal/common/malloc_elem.h | 11 +++++++++-- lib/eal/common/malloc_heap.c | 18 ++++++++++++------ lib/eal/common/rte_malloc.c | 21 ++++++++++++++------- lib/eal/include/rte_memory.h | 8 ++++++-- 5 files changed, 60 insertions(+), 20 deletions(-) diff --git a/lib/eal/common/malloc_elem.c b/lib/eal/common/malloc_elem.c index bdd20a162e..e04e0890fb 100644 --- a/lib/eal/common/malloc_elem.c +++ b/lib/eal/common/malloc_elem.c @@ -129,7 +129,7 @@ malloc_elem_find_max_iova_contig(struct malloc_elem *elem, size_t align) void malloc_elem_init(struct malloc_elem *elem, struct malloc_heap *heap, struct rte_memseg_list *msl, size_t size, - struct malloc_elem *orig_elem, size_t orig_size) + struct malloc_elem *orig_elem, size_t orig_size, bool dirty) { elem->heap = heap; elem->msl = msl; @@ -137,6 +137,7 @@ malloc_elem_init(struct malloc_elem *elem, struct malloc_heap *heap, elem->next = NULL; memset(&elem->free_list, 0, sizeof(elem->free_list)); elem->state = ELEM_FREE; + elem->dirty = dirty; elem->size = size; elem->pad = 0; elem->orig_elem = orig_elem; @@ -300,7 +301,7 @@ split_elem(struct malloc_elem *elem, struct malloc_elem *split_pt) const size_t new_elem_size = elem->size - old_elem_size; malloc_elem_init(split_pt, elem->heap, elem->msl, new_elem_size, - elem->orig_elem, elem->orig_size); + elem->orig_elem, elem->orig_size, elem->dirty); split_pt->prev = elem; split_pt->next = next_elem; if (next_elem) @@ -506,6 +507,7 @@ join_elem(struct malloc_elem *elem1, struct malloc_elem *elem2) else elem1->heap->last = elem1; elem1->next = next; + elem1->dirty |= elem2->dirty; if (elem1->pad) { struct malloc_elem *inner = RTE_PTR_ADD(elem1, elem1->pad); inner->size = elem1->size - elem1->pad; @@ -579,6 +581,14 @@ malloc_elem_free(struct malloc_elem *elem) ptr = RTE_PTR_ADD(elem, MALLOC_ELEM_HEADER_LEN); data_len = elem->size - MALLOC_ELEM_OVERHEAD; + /* + * Consider the element clean for the purposes of joining. + * If both neighbors are clean or non-existent, + * the joint element will be clean, + * which means the memory should be cleared. + * There is no need to clear the memory if the joint element is dirty. + */ + elem->dirty = false; elem = malloc_elem_join_adjacent_free(elem); malloc_elem_free_list_insert(elem); @@ -588,8 +598,14 @@ malloc_elem_free(struct malloc_elem *elem) /* decrease heap's count of allocated elements */ elem->heap->alloc_count--; - /* poison memory */ +#ifndef RTE_MALLOC_DEBUG + /* Normally clear the memory when needed. */ + if (!elem->dirty) + memset(ptr, 0, data_len); +#else + /* Always poison the memory in debug mode. */ memset(ptr, MALLOC_POISON, data_len); +#endif return elem; } diff --git a/lib/eal/common/malloc_elem.h b/lib/eal/common/malloc_elem.h index 15d8ba7af2..f2aa98821b 100644 --- a/lib/eal/common/malloc_elem.h +++ b/lib/eal/common/malloc_elem.h @@ -27,7 +27,13 @@ struct malloc_elem { LIST_ENTRY(malloc_elem) free_list; /**< list of free elements in heap */ struct rte_memseg_list *msl; - volatile enum elem_state state; + /** Element state, @c dirty and @c pad validity depends on it. */ + /* An extra bit is needed to represent enum elem_state as signed int. */ + enum elem_state state : 3; + /** If state == ELEM_FREE: the memory is not filled with zeroes. */ + uint32_t dirty : 1; + /** Reserved for future use. */ + uint32_t reserved : 28; uint32_t pad; size_t size; struct malloc_elem *orig_elem; @@ -320,7 +326,8 @@ malloc_elem_init(struct malloc_elem *elem, struct rte_memseg_list *msl, size_t size, struct malloc_elem *orig_elem, - size_t orig_size); + size_t orig_size, + bool dirty); void malloc_elem_insert(struct malloc_elem *elem); diff --git a/lib/eal/common/malloc_heap.c b/lib/eal/common/malloc_heap.c index 55aad2711b..24080fc473 100644 --- a/lib/eal/common/malloc_heap.c +++ b/lib/eal/common/malloc_heap.c @@ -93,11 +93,11 @@ malloc_socket_to_heap_id(unsigned int socket_id) */ static struct malloc_elem * malloc_heap_add_memory(struct malloc_heap *heap, struct rte_memseg_list *msl, - void *start, size_t len) + void *start, size_t len, bool dirty) { struct malloc_elem *elem = start; - malloc_elem_init(elem, heap, msl, len, elem, len); + malloc_elem_init(elem, heap, msl, len, elem, len, dirty); malloc_elem_insert(elem); @@ -135,7 +135,8 @@ malloc_add_seg(const struct rte_memseg_list *msl, found_msl = &mcfg->memsegs[msl_idx]; - malloc_heap_add_memory(heap, found_msl, ms->addr, len); + malloc_heap_add_memory(heap, found_msl, ms->addr, len, + ms->flags & RTE_MEMSEG_FLAG_DIRTY); heap->total_size += len; @@ -303,7 +304,8 @@ alloc_pages_on_heap(struct malloc_heap *heap, uint64_t pg_sz, size_t elt_size, struct rte_memseg_list *msl; struct malloc_elem *elem = NULL; size_t alloc_sz; - int allocd_pages; + int allocd_pages, i; + bool dirty = false; void *ret, *map_addr; alloc_sz = (size_t)pg_sz * n_segs; @@ -372,8 +374,12 @@ alloc_pages_on_heap(struct malloc_heap *heap, uint64_t pg_sz, size_t elt_size, goto fail; } + /* Element is dirty if it contains at least one dirty page. */ + for (i = 0; i < allocd_pages; i++) + dirty |= ms[i]->flags & RTE_MEMSEG_FLAG_DIRTY; + /* add newly minted memsegs to malloc heap */ - elem = malloc_heap_add_memory(heap, msl, map_addr, alloc_sz); + elem = malloc_heap_add_memory(heap, msl, map_addr, alloc_sz, dirty); /* try once more, as now we have allocated new memory */ ret = find_suitable_element(heap, elt_size, flags, align, bound, @@ -1260,7 +1266,7 @@ malloc_heap_add_external_memory(struct malloc_heap *heap, memset(msl->base_va, 0, msl->len); /* now, add newly minted memory to the malloc heap */ - malloc_heap_add_memory(heap, msl, msl->base_va, msl->len); + malloc_heap_add_memory(heap, msl, msl->base_va, msl->len, false); heap->total_size += msl->len; diff --git a/lib/eal/common/rte_malloc.c b/lib/eal/common/rte_malloc.c index d0bec26920..71a3f7ecb4 100644 --- a/lib/eal/common/rte_malloc.c +++ b/lib/eal/common/rte_malloc.c @@ -115,15 +115,22 @@ rte_zmalloc_socket(const char *type, size_t size, unsigned align, int socket) { void *ptr = rte_malloc_socket(type, size, align, socket); + if (ptr != NULL) { + struct malloc_elem *elem = malloc_elem_from_data(ptr); + + if (elem->dirty) { + memset(ptr, 0, size); + } else { #ifdef RTE_MALLOC_DEBUG - /* - * If DEBUG is enabled, then freed memory is marked with poison - * value and set to zero on allocation. - * If DEBUG is not enabled then memory is already zeroed. - */ - if (ptr != NULL) - memset(ptr, 0, size); + /* + * If DEBUG is enabled, then freed memory is marked + * with a poison value and set to zero on allocation. + * If DEBUG is disabled then memory is already zeroed. + */ + memset(ptr, 0, size); #endif + } + } rte_eal_trace_mem_zmalloc(type, size, align, socket, ptr); return ptr; diff --git a/lib/eal/include/rte_memory.h b/lib/eal/include/rte_memory.h index 6d018629ae..68b069fd04 100644 --- a/lib/eal/include/rte_memory.h +++ b/lib/eal/include/rte_memory.h @@ -19,6 +19,7 @@ extern "C" { #endif +#include <rte_bitops.h> #include <rte_common.h> #include <rte_compat.h> #include <rte_config.h> @@ -37,11 +38,14 @@ extern "C" { #define SOCKET_ID_ANY -1 /**< Any NUMA socket. */ +/** Prevent this segment from being freed back to the OS. */ +#define RTE_MEMSEG_FLAG_DO_NOT_FREE RTE_BIT32(0) +/** This segment is not filled with zeros. */ +#define RTE_MEMSEG_FLAG_DIRTY RTE_BIT32(1) + /** * Physical memory segment descriptor. */ -#define RTE_MEMSEG_FLAG_DO_NOT_FREE (1 << 0) -/**< Prevent this segment from being freed back to the OS. */ struct rte_memseg { rte_iova_t iova; /**< Start IO address. */ RTE_STD_C11 -- 2.25.1