No major changes, just add some checks in a few key places, and
a new parameter to pass around.

Signed-off-by: Anatoly Burakov <anatoly.bura...@intel.com>
---
 lib/librte_eal/common/eal_common_memzone.c |  20 +++---
 lib/librte_eal/common/malloc_elem.c        | 101 ++++++++++++++++++++++-------
 lib/librte_eal/common/malloc_elem.h        |   4 +-
 lib/librte_eal/common/malloc_heap.c        |  57 ++++++++++------
 lib/librte_eal/common/malloc_heap.h        |   4 +-
 lib/librte_eal/common/rte_malloc.c         |   6 +-
 6 files changed, 134 insertions(+), 58 deletions(-)

diff --git a/lib/librte_eal/common/eal_common_memzone.c 
b/lib/librte_eal/common/eal_common_memzone.c
index 718dee8..75c7dd9 100644
--- a/lib/librte_eal/common/eal_common_memzone.c
+++ b/lib/librte_eal/common/eal_common_memzone.c
@@ -98,7 +98,8 @@ find_heap_max_free_elem(int *s, unsigned align)
 
 static const struct rte_memzone *
 memzone_reserve_aligned_thread_unsafe(const char *name, size_t len,
-               int socket_id, unsigned flags, unsigned align, unsigned bound)
+               int socket_id, unsigned int flags, unsigned int align,
+               unsigned int bound, bool contig)
 {
        struct rte_memzone *mz;
        struct rte_mem_config *mcfg;
@@ -182,7 +183,7 @@ memzone_reserve_aligned_thread_unsafe(const char *name, 
size_t len,
 
        /* allocate memory on heap */
        void *mz_addr = malloc_heap_alloc(NULL, requested_len, socket_id, flags,
-                       align, bound);
+                       align, bound, contig);
 
        if (mz_addr == NULL) {
                rte_errno = ENOMEM;
@@ -215,9 +216,9 @@ memzone_reserve_aligned_thread_unsafe(const char *name, 
size_t len,
 }
 
 static const struct rte_memzone *
-rte_memzone_reserve_thread_safe(const char *name, size_t len,
-                               int socket_id, unsigned flags, unsigned align,
-                               unsigned bound)
+rte_memzone_reserve_thread_safe(const char *name, size_t len, int socket_id,
+               unsigned int flags, unsigned int align, unsigned int bound,
+               bool contig)
 {
        struct rte_mem_config *mcfg;
        const struct rte_memzone *mz = NULL;
@@ -228,7 +229,7 @@ rte_memzone_reserve_thread_safe(const char *name, size_t 
len,
        rte_rwlock_write_lock(&mcfg->mlock);
 
        mz = memzone_reserve_aligned_thread_unsafe(
-               name, len, socket_id, flags, align, bound);
+               name, len, socket_id, flags, align, bound, contig);
 
        rte_rwlock_write_unlock(&mcfg->mlock);
 
@@ -245,7 +246,7 @@ rte_memzone_reserve_bounded(const char *name, size_t len, 
int socket_id,
                            unsigned flags, unsigned align, unsigned bound)
 {
        return rte_memzone_reserve_thread_safe(name, len, socket_id, flags,
-                                              align, bound);
+                                              align, bound, false);
 }
 
 /*
@@ -257,7 +258,7 @@ rte_memzone_reserve_aligned(const char *name, size_t len, 
int socket_id,
                            unsigned flags, unsigned align)
 {
        return rte_memzone_reserve_thread_safe(name, len, socket_id, flags,
-                                              align, 0);
+                                              align, 0, false);
 }
 
 /*
@@ -269,7 +270,8 @@ rte_memzone_reserve(const char *name, size_t len, int 
socket_id,
                    unsigned flags)
 {
        return rte_memzone_reserve_thread_safe(name, len, socket_id,
-                                              flags, RTE_CACHE_LINE_SIZE, 0);
+                                              flags, RTE_CACHE_LINE_SIZE, 0,
+                                              false);
 }
 
 int
diff --git a/lib/librte_eal/common/malloc_elem.c 
b/lib/librte_eal/common/malloc_elem.c
index eabad66..d2dba35 100644
--- a/lib/librte_eal/common/malloc_elem.c
+++ b/lib/librte_eal/common/malloc_elem.c
@@ -17,6 +17,7 @@
 #include <rte_common.h>
 #include <rte_spinlock.h>
 
+#include "eal_memalloc.h"
 #include "malloc_elem.h"
 #include "malloc_heap.h"
 
@@ -94,33 +95,88 @@ malloc_elem_insert(struct malloc_elem *elem)
 }
 
 /*
+ * Attempt to find enough physically contiguous memory in this block to store
+ * our data. Assume that element has at least enough space to fit in the data,
+ * so we just check the page addresses.
+ */
+static bool
+elem_check_phys_contig(struct rte_memseg_list *msl, void *start, size_t size)
+{
+       uint64_t page_sz;
+       void *aligned_start, *end, *aligned_end;
+       size_t aligned_len;
+
+       /* figure out how many pages we need to fit in current data */
+       page_sz = msl->hugepage_sz;
+       aligned_start = RTE_PTR_ALIGN_FLOOR(start, page_sz);
+       end = RTE_PTR_ADD(start, size);
+       aligned_end = RTE_PTR_ALIGN_CEIL(end, page_sz);
+
+       aligned_len = RTE_PTR_DIFF(aligned_end, aligned_start);
+
+       return eal_memalloc_is_contig(msl, aligned_start, aligned_len);
+}
+
+/*
  * calculate the starting point of where data of the requested size
  * and alignment would fit in the current element. If the data doesn't
  * fit, return NULL.
  */
 static void *
 elem_start_pt(struct malloc_elem *elem, size_t size, unsigned align,
-               size_t bound)
+               size_t bound, bool contig)
 {
-       const size_t bmask = ~(bound - 1);
-       uintptr_t end_pt = (uintptr_t)elem +
-                       elem->size - MALLOC_ELEM_TRAILER_LEN;
-       uintptr_t new_data_start = RTE_ALIGN_FLOOR((end_pt - size), align);
-       uintptr_t new_elem_start;
-
-       /* check boundary */
-       if ((new_data_start & bmask) != ((end_pt - 1) & bmask)) {
-               end_pt = RTE_ALIGN_FLOOR(end_pt, bound);
-               new_data_start = RTE_ALIGN_FLOOR((end_pt - size), align);
-               end_pt = new_data_start + size;
-               if (((end_pt - 1) & bmask) != (new_data_start & bmask))
-                       return NULL;
-       }
+       size_t elem_size = elem->size;
 
-       new_elem_start = new_data_start - MALLOC_ELEM_HEADER_LEN;
+       /*
+        * we're allocating from the end, so adjust the size of element by page
+        * size each time
+        */
+       while (elem_size >= size) {
+               const size_t bmask = ~(bound - 1);
+               uintptr_t end_pt = (uintptr_t)elem +
+                               elem_size - MALLOC_ELEM_TRAILER_LEN;
+               uintptr_t new_data_start = RTE_ALIGN_FLOOR((end_pt - size),
+                               align);
+               uintptr_t new_elem_start;
+
+               /* check boundary */
+               if ((new_data_start & bmask) != ((end_pt - 1) & bmask)) {
+                       end_pt = RTE_ALIGN_FLOOR(end_pt, bound);
+                       new_data_start = RTE_ALIGN_FLOOR((end_pt - size),
+                                       align);
+                       end_pt = new_data_start + size;
+
+                       if (((end_pt - 1) & bmask) != (new_data_start & bmask))
+                               return NULL;
+               }
 
-       /* if the new start point is before the exist start, it won't fit */
-       return (new_elem_start < (uintptr_t)elem) ? NULL : (void 
*)new_elem_start;
+               new_elem_start = new_data_start - MALLOC_ELEM_HEADER_LEN;
+
+               /* if the new start point is before the exist start,
+                * it won't fit
+                */
+               if (new_elem_start < (uintptr_t)elem)
+                       return NULL;
+
+               if (contig) {
+                       size_t new_data_size = end_pt - new_data_start;
+
+                       /*
+                        * if physical contiguousness was requested and we
+                        * couldn't fit all data into one physically contiguous
+                        * block, try again with lower addresses.
+                        */
+                       if (!elem_check_phys_contig(elem->msl,
+                                       (void *) new_data_start,
+                                       new_data_size)) {
+                               elem_size -= align;
+                               continue;
+                       }
+               }
+               return (void *) new_elem_start;
+       }
+       return NULL;
 }
 
 /*
@@ -129,9 +185,9 @@ elem_start_pt(struct malloc_elem *elem, size_t size, 
unsigned align,
  */
 int
 malloc_elem_can_hold(struct malloc_elem *elem, size_t size,    unsigned align,
-               size_t bound)
+               size_t bound, bool contig)
 {
-       return elem_start_pt(elem, size, align, bound) != NULL;
+       return elem_start_pt(elem, size, align, bound, contig) != NULL;
 }
 
 /*
@@ -259,9 +315,10 @@ malloc_elem_free_list_remove(struct malloc_elem *elem)
  */
 struct malloc_elem *
 malloc_elem_alloc(struct malloc_elem *elem, size_t size, unsigned align,
-               size_t bound)
+               size_t bound, bool contig)
 {
-       struct malloc_elem *new_elem = elem_start_pt(elem, size, align, bound);
+       struct malloc_elem *new_elem = elem_start_pt(elem, size, align, bound,
+                       contig);
        const size_t old_elem_size = (uintptr_t)new_elem - (uintptr_t)elem;
        const size_t trailer_size = elem->size - old_elem_size - size -
                MALLOC_ELEM_OVERHEAD;
diff --git a/lib/librte_eal/common/malloc_elem.h 
b/lib/librte_eal/common/malloc_elem.h
index 6d979d2..798472e 100644
--- a/lib/librte_eal/common/malloc_elem.h
+++ b/lib/librte_eal/common/malloc_elem.h
@@ -123,7 +123,7 @@ malloc_elem_insert(struct malloc_elem *elem);
  */
 int
 malloc_elem_can_hold(struct malloc_elem *elem, size_t size,
-               unsigned align, size_t bound);
+               unsigned int align, size_t bound, bool contig);
 
 /*
  * reserve a block of data in an existing malloc_elem. If the malloc_elem
@@ -131,7 +131,7 @@ malloc_elem_can_hold(struct malloc_elem *elem, size_t size,
  */
 struct malloc_elem *
 malloc_elem_alloc(struct malloc_elem *elem, size_t size,
-               unsigned align, size_t bound);
+               unsigned int align, size_t bound, bool contig);
 
 /*
  * free a malloc_elem block by adding it to the free list. If the
diff --git a/lib/librte_eal/common/malloc_heap.c 
b/lib/librte_eal/common/malloc_heap.c
index 87dc9ad..984e027 100644
--- a/lib/librte_eal/common/malloc_heap.c
+++ b/lib/librte_eal/common/malloc_heap.c
@@ -94,7 +94,7 @@ malloc_heap_add_memory(struct malloc_heap *heap, struct 
rte_memseg_list *msl,
  */
 static struct malloc_elem *
 find_suitable_element(struct malloc_heap *heap, size_t size,
-               unsigned flags, size_t align, size_t bound)
+               unsigned int flags, size_t align, size_t bound, bool contig)
 {
        size_t idx;
        struct malloc_elem *elem, *alt_elem = NULL;
@@ -103,7 +103,8 @@ find_suitable_element(struct malloc_heap *heap, size_t size,
                        idx < RTE_HEAP_NUM_FREELISTS; idx++) {
                for (elem = LIST_FIRST(&heap->free_head[idx]);
                                !!elem; elem = LIST_NEXT(elem, free_list)) {
-                       if (malloc_elem_can_hold(elem, size, align, bound)) {
+                       if (malloc_elem_can_hold(elem, size, align, bound,
+                                       contig)) {
                                if (check_hugepage_sz(flags,
                                                elem->msl->hugepage_sz))
                                        return elem;
@@ -127,16 +128,16 @@ find_suitable_element(struct malloc_heap *heap, size_t 
size,
  */
 static void *
 heap_alloc(struct malloc_heap *heap, const char *type __rte_unused, size_t 
size,
-               unsigned int flags, size_t align, size_t bound)
+               unsigned int flags, size_t align, size_t bound, bool contig)
 {
        struct malloc_elem *elem;
 
        size = RTE_CACHE_LINE_ROUNDUP(size);
        align = RTE_CACHE_LINE_ROUNDUP(align);
 
-       elem = find_suitable_element(heap, size, flags, align, bound);
+       elem = find_suitable_element(heap, size, flags, align, bound, contig);
        if (elem != NULL) {
-               elem = malloc_elem_alloc(elem, size, align, bound);
+               elem = malloc_elem_alloc(elem, size, align, bound, contig);
 
                /* increase heap's count of allocated elements */
                heap->alloc_count++;
@@ -147,14 +148,15 @@ heap_alloc(struct malloc_heap *heap, const char *type 
__rte_unused, size_t size,
 
 static int
 try_expand_heap(struct malloc_heap *heap, uint64_t pg_sz, size_t elt_size,
-               int socket, unsigned int flags, size_t align, size_t bound)
+               int socket, unsigned int flags, size_t align, size_t bound,
+               bool contig)
 {
+       size_t map_len, data_start_offset;
        struct rte_memseg_list *msl;
        struct rte_memseg **ms;
        struct malloc_elem *elem;
-       size_t map_len;
        int i, n_pages, allocd_pages;
-       void *ret, *map_addr;
+       void *ret, *map_addr, *data_start;
 
        align = RTE_MAX(align, MALLOC_ELEM_HEADER_LEN);
        map_len = RTE_ALIGN_CEIL(align + elt_size + MALLOC_ELEM_TRAILER_LEN,
@@ -175,11 +177,22 @@ try_expand_heap(struct malloc_heap *heap, uint64_t pg_sz, 
size_t elt_size,
        map_addr = ms[0]->addr;
        msl = rte_mem_virt2memseg_list(map_addr);
 
+       /* check if we wanted contiguous memory but didn't get it */
+       data_start_offset = RTE_ALIGN(MALLOC_ELEM_HEADER_LEN, align);
+       data_start = RTE_PTR_ADD(ms[0]->addr, data_start_offset);
+       if (contig && !eal_memalloc_is_contig(msl, data_start,
+                       n_pages * msl->hugepage_sz)) {
+               RTE_LOG(DEBUG, EAL, "%s(): couldn't allocate physically 
contiguous space\n",
+                               __func__);
+               goto free_pages;
+       }
+
        /* add newly minted memsegs to malloc heap */
        elem = malloc_heap_add_memory(heap, msl, map_addr, map_len);
 
        /* try once more, as now we have allocated new memory */
-       ret = find_suitable_element(heap, elt_size, flags, align, bound);
+       ret = find_suitable_element(heap, elt_size, flags, align, bound,
+                       contig);
 
        if (ret == NULL)
                goto free_elem;
@@ -196,6 +209,7 @@ try_expand_heap(struct malloc_heap *heap, uint64_t pg_sz, 
size_t elt_size,
        malloc_elem_hide_region(elem, map_addr, map_len);
        heap->total_size -= map_len;
 
+free_pages:
        for (i = 0; i < n_pages; i++)
                eal_memalloc_free_page(ms[i]);
 free_ms:
@@ -223,7 +237,7 @@ compare_pagesz(const void *a, const void *b)
 
 static int
 alloc_mem_on_socket(size_t size, int socket, unsigned int flags, size_t align,
-               size_t bound)
+               size_t bound, bool contig)
 {
        struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
        struct malloc_heap *heap = &mcfg->malloc_heaps[socket];
@@ -304,14 +318,14 @@ alloc_mem_on_socket(size_t size, int socket, unsigned int 
flags, size_t align,
                 * sizes first, before resorting to best effort allocation.
                 */
                if (!try_expand_heap(heap, pg_sz, size, socket, size_flags,
-                               align, bound))
+                               align, bound, contig))
                        return 0;
        }
        if (n_other_pg_sz == 0)
                return -1;
 
        /* now, check if we can reserve anything with size hint */
-       ret = find_suitable_element(heap, size, flags, align, bound);
+       ret = find_suitable_element(heap, size, flags, align, bound, contig);
        if (ret != NULL)
                return 0;
 
@@ -323,7 +337,7 @@ alloc_mem_on_socket(size_t size, int socket, unsigned int 
flags, size_t align,
                uint64_t pg_sz = other_pg_sz[i];
 
                if (!try_expand_heap(heap, pg_sz, size, socket, flags,
-                               align, bound))
+                               align, bound, contig))
                        return 0;
        }
        return -1;
@@ -332,7 +346,7 @@ alloc_mem_on_socket(size_t size, int socket, unsigned int 
flags, size_t align,
 /* this will try lower page sizes first */
 static void *
 heap_alloc_on_socket(const char *type, size_t size, int socket,
-               unsigned int flags, size_t align, size_t bound)
+               unsigned int flags, size_t align, size_t bound, bool contig)
 {
        struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
        struct malloc_heap *heap = &mcfg->malloc_heaps[socket];
@@ -345,7 +359,7 @@ heap_alloc_on_socket(const char *type, size_t size, int 
socket,
 
        /* for legacy mode, try once and with all flags */
        if (internal_config.legacy_mem) {
-               ret = heap_alloc(heap, type, size, flags, align, bound);
+               ret = heap_alloc(heap, type, size, flags, align, bound, contig);
                goto alloc_unlock;
        }
 
@@ -354,12 +368,12 @@ heap_alloc_on_socket(const char *type, size_t size, int 
socket,
         * we may still be able to allocate memory from appropriate page sizes,
         * we just need to request more memory first.
         */
-       ret = heap_alloc(heap, type, size, size_flags, align, bound);
+       ret = heap_alloc(heap, type, size, size_flags, align, bound, contig);
        if (ret != NULL)
                goto alloc_unlock;
 
-       if (!alloc_mem_on_socket(size, socket, flags, align, bound)) {
-               ret = heap_alloc(heap, type, size, flags, align, bound);
+       if (!alloc_mem_on_socket(size, socket, flags, align, bound, contig)) {
+               ret = heap_alloc(heap, type, size, flags, align, bound, contig);
 
                /* this should have succeeded */
                if (ret == NULL)
@@ -372,7 +386,7 @@ heap_alloc_on_socket(const char *type, size_t size, int 
socket,
 
 void *
 malloc_heap_alloc(const char *type, size_t size, int socket_arg,
-               unsigned int flags, size_t align, size_t bound)
+               unsigned int flags, size_t align, size_t bound, bool contig)
 {
        int socket, i;
        void *ret;
@@ -393,7 +407,8 @@ malloc_heap_alloc(const char *type, size_t size, int 
socket_arg,
        if (socket >= RTE_MAX_NUMA_NODES)
                return NULL;
 
-       ret = heap_alloc_on_socket(type, size, socket, flags, align, bound);
+       ret = heap_alloc_on_socket(type, size, socket, flags, align, bound,
+                       contig);
        if (ret != NULL || socket_arg != SOCKET_ID_ANY)
                return ret;
 
@@ -402,7 +417,7 @@ malloc_heap_alloc(const char *type, size_t size, int 
socket_arg,
                if (i == socket)
                        continue;
                ret = heap_alloc_on_socket(type, size, i, flags,
-                               align, bound);
+                               align, bound, contig);
                if (ret != NULL)
                        return ret;
        }
diff --git a/lib/librte_eal/common/malloc_heap.h 
b/lib/librte_eal/common/malloc_heap.h
index 292d578..03b8014 100644
--- a/lib/librte_eal/common/malloc_heap.h
+++ b/lib/librte_eal/common/malloc_heap.h
@@ -5,6 +5,8 @@
 #ifndef MALLOC_HEAP_H_
 #define MALLOC_HEAP_H_
 
+#include <stdbool.h>
+
 #include <rte_malloc.h>
 #include <rte_malloc_heap.h>
 
@@ -25,7 +27,7 @@ malloc_get_numa_socket(void)
 
 void *
 malloc_heap_alloc(const char *type, size_t size, int socket, unsigned int 
flags,
-               size_t align, size_t bound);
+               size_t align, size_t bound, bool contig);
 
 int
 malloc_heap_free(struct malloc_elem *elem);
diff --git a/lib/librte_eal/common/rte_malloc.c 
b/lib/librte_eal/common/rte_malloc.c
index b0fe11c..5cd92d1 100644
--- a/lib/librte_eal/common/rte_malloc.c
+++ b/lib/librte_eal/common/rte_malloc.c
@@ -37,7 +37,8 @@ void rte_free(void *addr)
  * Allocate memory on specified heap.
  */
 void *
-rte_malloc_socket(const char *type, size_t size, unsigned align, int 
socket_arg)
+rte_malloc_socket(const char *type, size_t size, unsigned int align,
+               int socket_arg)
 {
        /* return NULL if size is 0 or alignment is not power-of-2 */
        if (size == 0 || (align && !rte_is_power_of_2(align)))
@@ -50,8 +51,7 @@ rte_malloc_socket(const char *type, size_t size, unsigned 
align, int socket_arg)
        if (socket_arg >= RTE_MAX_NUMA_NODES)
                return NULL;
 
-       return malloc_heap_alloc(type, size, socket_arg, 0,
-                       align == 0 ? 1 : align, 0);
+       return malloc_heap_alloc(type, size, socket_arg, 0, align, 0, false);
 }
 
 /*
-- 
2.7.4

Reply via email to