Also, rewrite VFIO to rely on memory callbacks instead of manually
registering memory with VFIO. Callbacks will only be registered if
VFIO is enabled.

Signed-off-by: Anatoly Burakov <anatoly.bura...@intel.com>
---
 lib/librte_eal/common/malloc_heap.c        | 21 +++++++++++++++++
 lib/librte_eal/linuxapp/eal/eal_memalloc.c | 37 +++++++++++++++++++++---------
 lib/librte_eal/linuxapp/eal/eal_vfio.c     | 35 ++++++++++++++++++++++++++++
 3 files changed, 82 insertions(+), 11 deletions(-)

diff --git a/lib/librte_eal/common/malloc_heap.c 
b/lib/librte_eal/common/malloc_heap.c
index 9109555..9d055c8 100644
--- a/lib/librte_eal/common/malloc_heap.c
+++ b/lib/librte_eal/common/malloc_heap.c
@@ -223,6 +223,7 @@ try_expand_heap_primary(struct malloc_heap *heap, uint64_t 
pg_sz,
        void *map_addr;
        size_t map_len;
        int n_pages;
+       bool callback_triggered = false;
 
        map_len = RTE_ALIGN_CEIL(align + elt_size +
                        MALLOC_ELEM_TRAILER_LEN, pg_sz);
@@ -242,14 +243,25 @@ try_expand_heap_primary(struct malloc_heap *heap, 
uint64_t pg_sz,
 
        map_addr = ms[0]->addr;
 
+       /* notify user about changes in memory map */
+       eal_memalloc_notify(RTE_MEM_EVENT_ALLOC, map_addr, map_len);
+
        /* notify other processes that this has happened */
        if (request_sync()) {
                /* we couldn't ensure all processes have mapped memory,
                 * so free it back and notify everyone that it's been
                 * freed back.
+                *
+                * technically, we could've avoided adding memory addresses to
+                * the map, but that would've led to inconsistent behavior
+                * between primary and secondary processes, as those get
+                * callbacks during sync. therefore, force primary process to
+                * do alloc-and-rollback syncs as well.
                 */
+               callback_triggered = true;
                goto free_elem;
        }
+
        heap->total_size += map_len;
 
        RTE_LOG(DEBUG, EAL, "Heap on socket %d was expanded by %zdMB\n",
@@ -260,6 +272,9 @@ try_expand_heap_primary(struct malloc_heap *heap, uint64_t 
pg_sz,
        return 0;
 
 free_elem:
+       if (callback_triggered)
+               eal_memalloc_notify(RTE_MEM_EVENT_FREE, map_addr, map_len);
+
        rollback_expand_heap(ms, n_pages, elem, map_addr, map_len);
 
        request_sync();
@@ -615,6 +630,10 @@ malloc_heap_free(struct malloc_elem *elem)
        heap->total_size -= n_pages * msl->hugepage_sz;
 
        if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+               /* notify user about changes in memory map */
+               eal_memalloc_notify(RTE_MEM_EVENT_FREE,
+                               aligned_start, aligned_len);
+
                /* don't care if any of this fails */
                malloc_heap_free_pages(aligned_start, aligned_len);
 
@@ -637,6 +656,8 @@ malloc_heap_free(struct malloc_elem *elem)
                 * already removed from the heap, so it is, for all intents and
                 * purposes, hidden from the rest of DPDK even if some other
                 * process (including this one) may have these pages mapped.
+                *
+                * notifications about deallocated memory happen during sync.
                 */
                request_to_primary(&req);
        }
diff --git a/lib/librte_eal/linuxapp/eal/eal_memalloc.c 
b/lib/librte_eal/linuxapp/eal/eal_memalloc.c
index 227d703..1008fae 100644
--- a/lib/librte_eal/linuxapp/eal/eal_memalloc.c
+++ b/lib/librte_eal/linuxapp/eal/eal_memalloc.c
@@ -34,7 +34,6 @@
 #include <rte_eal.h>
 #include <rte_memory.h>
 #include <rte_spinlock.h>
-#include <rte_vfio.h>
 
 #include "eal_filesystem.h"
 #include "eal_internal_cfg.h"
@@ -480,10 +479,6 @@ alloc_page(struct rte_memseg *ms, void *addr, uint64_t 
size, int socket_id,
        ms->iova = iova;
        ms->socket_id = socket_id;
 
-       /* map the segment so that VFIO has access to it */
-       if (rte_eal_iova_mode() == RTE_IOVA_VA &&
-                       rte_vfio_dma_map(ms->addr_64, iova, size))
-               RTE_LOG(DEBUG, EAL, "Cannot register segment with VFIO\n");
        return 0;
 
 mapped:
@@ -515,12 +510,6 @@ free_page(struct rte_memseg *ms, struct hugepage_info *hi,
        char path[PATH_MAX];
        int fd, ret;
 
-       /* unmap the segment from VFIO */
-       if (rte_eal_iova_mode() == RTE_IOVA_VA &&
-                       rte_vfio_dma_unmap(ms->addr_64, ms->iova, ms->len)) {
-               RTE_LOG(DEBUG, EAL, "Cannot unregister segment with VFIO\n");
-       }
-
        if (mmap(ms->addr, ms->hugepage_sz, PROT_READ,
                        MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0) ==
                                MAP_FAILED) {
@@ -808,6 +797,19 @@ sync_chunk(struct rte_memseg_list *primary_msl,
 
        diff_len = RTE_MIN(chunk_len, diff_len);
 
+       /* if we are freeing memory, notif the application */
+       if (!used) {
+               struct rte_memseg *ms;
+               void *start_va;
+               size_t len;
+
+               ms = rte_fbarray_get(l_arr, start);
+               start_va = ms->addr;
+               len = ms->len * diff_len;
+
+               eal_memalloc_notify(RTE_MEM_EVENT_FREE, start_va, len);
+       }
+
        for (i = 0; i < diff_len; i++) {
                struct rte_memseg *p_ms, *l_ms;
                int seg_idx = start + i;
@@ -834,6 +836,19 @@ sync_chunk(struct rte_memseg_list *primary_msl,
                }
        }
 
+       /* if we just allocated memory, notify the application */
+       if (used) {
+               struct rte_memseg *ms;
+               void *start_va;
+               size_t len;
+
+               ms = rte_fbarray_get(l_arr, start);
+               start_va = ms->addr;
+               len = ms->len * diff_len;
+
+               eal_memalloc_notify(RTE_MEM_EVENT_ALLOC, start_va, len);
+       }
+
        /* calculate how much we can advance until next chunk */
        diff_len = used ?
                        rte_fbarray_find_contig_used(l_arr, start) :
diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.c 
b/lib/librte_eal/linuxapp/eal/eal_vfio.c
index 8fe8984..d3c3b70 100644
--- a/lib/librte_eal/linuxapp/eal/eal_vfio.c
+++ b/lib/librte_eal/linuxapp/eal/eal_vfio.c
@@ -214,6 +214,37 @@ vfio_group_device_count(int vfio_group_fd)
        return vfio_cfg.vfio_groups[i].devices;
 }
 
+static void
+vfio_mem_event_callback(enum rte_mem_event type, const void *addr, size_t len)
+{
+       struct rte_memseg_list *msl;
+       struct rte_memseg *ms;
+       size_t cur_len = 0;
+       uint64_t pgsz;
+
+       msl = rte_mem_virt2memseg_list(addr);
+       pgsz = msl->hugepage_sz;
+
+       while (cur_len < len) {
+               const void *va = RTE_PTR_ADD(addr, cur_len);
+               uint64_t vfio_va, iova;
+
+               ms = rte_mem_virt2memseg(va, msl);
+               vfio_va = (uint64_t) (uintptr_t) va;
+               iova = ms->iova;
+
+               /* this never gets called in legacy mode, so we can be sure that
+                * each segment is a single page.
+                */
+               if (type == RTE_MEM_EVENT_ALLOC)
+                       rte_vfio_dma_map(vfio_va, iova, pgsz);
+               else
+                       rte_vfio_dma_unmap(vfio_va, iova, pgsz);
+
+               cur_len += pgsz;
+       }
+}
+
 int
 rte_vfio_clear_group(int vfio_group_fd)
 {
@@ -507,6 +538,10 @@ rte_vfio_enable(const char *modname)
        if (vfio_cfg.vfio_container_fd != -1) {
                RTE_LOG(NOTICE, EAL, "VFIO support initialized\n");
                vfio_cfg.vfio_enabled = 1;
+
+               /* register callback for mem events */
+               rte_mem_event_register_callback("vfio_mem_event_clb",
+                               vfio_mem_event_callback);
        } else {
                RTE_LOG(NOTICE, EAL, "VFIO support could not be initialized\n");
        }
-- 
2.7.4

Reply via email to