Adjust PCI infrastructure to reserve device memory through the
new device memory API. Any hotplug event will reserve memory, any
hot-unplug event will release memory back to the system.

This allows for more reliable PCI mappings in secondary processes,
and will be crucial to support multiprocess hotplug.

Signed-off-by: Anatoly Burakov <anatoly.bura...@intel.com>
---
 drivers/bus/pci/linux/pci_init.h |  1 -
 drivers/bus/pci/linux/pci_uio.c  | 11 +----------
 drivers/bus/pci/linux/pci_vfio.c | 27 ++++++++++++---------------
 lib/librte_pci/Makefile          |  1 +
 lib/librte_pci/rte_pci.c         | 20 +++++++++++++++++++-
 5 files changed, 33 insertions(+), 27 deletions(-)

diff --git a/drivers/bus/pci/linux/pci_init.h b/drivers/bus/pci/linux/pci_init.h
index c2e603a37..bc9279c66 100644
--- a/drivers/bus/pci/linux/pci_init.h
+++ b/drivers/bus/pci/linux/pci_init.h
@@ -14,7 +14,6 @@
 /*
  * Helper function to map PCI resources right after hugepages in virtual memory
  */
-extern void *pci_map_addr;
 void *pci_find_max_end_va(void);
 
 /* parse one line of the "resource" sysfs file (note that the 'line'
diff --git a/drivers/bus/pci/linux/pci_uio.c b/drivers/bus/pci/linux/pci_uio.c
index d423e4bb0..dbf108b6f 100644
--- a/drivers/bus/pci/linux/pci_uio.c
+++ b/drivers/bus/pci/linux/pci_uio.c
@@ -26,8 +26,6 @@
 #include "eal_filesystem.h"
 #include "pci_init.h"
 
-void *pci_map_addr = NULL;
-
 #define OFF_MAX              ((uint64_t)(off_t)-1)
 
 int
@@ -316,19 +314,12 @@ pci_uio_map_resource_by_index(struct rte_pci_device *dev, 
int res_idx,
                goto error;
        }
 
-       /* try mapping somewhere close to the end of hugepages */
-       if (pci_map_addr == NULL)
-               pci_map_addr = pci_find_max_end_va();
-
-       mapaddr = pci_map_resource(pci_map_addr, fd, 0,
+       mapaddr = pci_map_resource(NULL, fd, 0,
                        (size_t)dev->mem_resource[res_idx].len, 0);
        close(fd);
        if (mapaddr == MAP_FAILED)
                goto error;
 
-       pci_map_addr = RTE_PTR_ADD(mapaddr,
-                       (size_t)dev->mem_resource[res_idx].len);
-
        maps[map_idx].phaddr = dev->mem_resource[res_idx].phys_addr;
        maps[map_idx].size = dev->mem_resource[res_idx].len;
        maps[map_idx].addr = mapaddr;
diff --git a/drivers/bus/pci/linux/pci_vfio.c b/drivers/bus/pci/linux/pci_vfio.c
index aeeaa9ed8..f390ea37a 100644
--- a/drivers/bus/pci/linux/pci_vfio.c
+++ b/drivers/bus/pci/linux/pci_vfio.c
@@ -324,7 +324,7 @@ pci_rte_vfio_setup_device(struct rte_pci_device *dev, int 
vfio_dev_fd)
 
 static int
 pci_vfio_mmap_bar(int vfio_dev_fd, struct mapped_pci_resource *vfio_res,
-               int bar_index, int additional_flags)
+               int bar_index)
 {
        struct memreg {
                unsigned long offset, size;
@@ -371,9 +371,14 @@ pci_vfio_mmap_bar(int vfio_dev_fd, struct 
mapped_pci_resource *vfio_res,
                memreg[0].size = bar->size;
        }
 
-       /* reserve the address using an inaccessible mapping */
-       bar_addr = mmap(bar->addr, bar->size, 0, MAP_PRIVATE |
-                       MAP_ANONYMOUS | additional_flags, -1, 0);
+       if (bar->addr == NULL) {
+               bar_addr = rte_mem_dev_memory_alloc(bar->size, 0);
+               if (bar_addr == NULL) {
+                       RTE_LOG(ERR, EAL, "%s(): cannot reserve space for 
device\n",
+                               __func__);
+                       return -1;
+               }
+       }
        if (bar_addr != MAP_FAILED) {
                void *map_addr = NULL;
                if (memreg[0].size) {
@@ -469,7 +474,6 @@ pci_vfio_map_resource_primary(struct rte_pci_device *dev)
 
        for (i = 0; i < (int) vfio_res->nb_maps; i++) {
                struct vfio_region_info reg = { .argsz = sizeof(reg) };
-               void *bar_addr;
 
                reg.index = i;
 
@@ -494,19 +498,12 @@ pci_vfio_map_resource_primary(struct rte_pci_device *dev)
                if ((reg.flags & VFIO_REGION_INFO_FLAG_MMAP) == 0)
                        continue;
 
-               /* try mapping somewhere close to the end of hugepages */
-               if (pci_map_addr == NULL)
-                       pci_map_addr = pci_find_max_end_va();
-
-               bar_addr = pci_map_addr;
-               pci_map_addr = RTE_PTR_ADD(bar_addr, (size_t) reg.size);
-
-               maps[i].addr = bar_addr;
+               maps[i].addr = NULL;
                maps[i].offset = reg.offset;
                maps[i].size = reg.size;
                maps[i].path = NULL; /* vfio doesn't have per-resource paths */
 
-               ret = pci_vfio_mmap_bar(vfio_dev_fd, vfio_res, i, 0);
+               ret = pci_vfio_mmap_bar(vfio_dev_fd, vfio_res, i);
                if (ret < 0) {
                        RTE_LOG(ERR, EAL, "  %s mapping BAR%i failed: %s\n",
                                        pci_addr, i, strerror(errno));
@@ -574,7 +571,7 @@ pci_vfio_map_resource_secondary(struct rte_pci_device *dev)
        maps = vfio_res->maps;
 
        for (i = 0; i < (int) vfio_res->nb_maps; i++) {
-               ret = pci_vfio_mmap_bar(vfio_dev_fd, vfio_res, i, MAP_FIXED);
+               ret = pci_vfio_mmap_bar(vfio_dev_fd, vfio_res, i);
                if (ret < 0) {
                        RTE_LOG(ERR, EAL, "  %s mapping BAR%i failed: %s\n",
                                        pci_addr, i, strerror(errno));
diff --git a/lib/librte_pci/Makefile b/lib/librte_pci/Makefile
index 94a632670..f996fe33c 100644
--- a/lib/librte_pci/Makefile
+++ b/lib/librte_pci/Makefile
@@ -8,6 +8,7 @@ LIB = librte_pci.a
 
 CFLAGS := -I$(SRCDIR) $(CFLAGS)
 CFLAGS += $(WERROR_FLAGS) -O3
+CFLAGS += -DALLOW_EXPERIMENTAL_API
 LDLIBS += -lrte_eal
 
 EXPORT_MAP := rte_pci_version.map
diff --git a/lib/librte_pci/rte_pci.c b/lib/librte_pci/rte_pci.c
index 530738dbd..c425a624e 100644
--- a/lib/librte_pci/rte_pci.c
+++ b/lib/librte_pci/rte_pci.c
@@ -151,6 +151,16 @@ pci_map_resource(void *requested_addr, int fd, off_t 
offset, size_t size,
 {
        void *mapaddr;
 
+       if (requested_addr == NULL) {
+               requested_addr = rte_mem_dev_memory_alloc(size, 0);
+               if (requested_addr == NULL) {
+                       RTE_LOG(ERR, EAL, "%s(): cannot reserve space for 
device\n",
+                               __func__);
+                       return MAP_FAILED;
+               }
+       }
+       additional_flags |= MAP_FIXED;
+
        /* Map the PCI memory resource of device */
        mapaddr = mmap(requested_addr, size, PROT_READ | PROT_WRITE,
                        MAP_SHARED | additional_flags, fd, offset);
@@ -170,15 +180,23 @@ pci_map_resource(void *requested_addr, int fd, off_t 
offset, size_t size,
 void
 pci_unmap_resource(void *requested_addr, size_t size)
 {
+       void *mapped;
        if (requested_addr == NULL)
                return;
 
+       mapped = mmap(requested_addr, size, PROT_READ,
+                       MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+
        /* Unmap the PCI memory resource of device */
-       if (munmap(requested_addr, size)) {
+       if (mapped == MAP_FAILED) {
                RTE_LOG(ERR, EAL, "%s(): cannot munmap(%p, %#zx): %s\n",
                        __func__, requested_addr, size,
                        strerror(errno));
        } else
                RTE_LOG(DEBUG, EAL, "  PCI memory unmapped at %p\n",
                                requested_addr);
+       if (rte_mem_dev_memory_free(requested_addr, size))
+               RTE_LOG(ERR, EAL, "%s(): cannot mark %p-%p as free\n",
+                       __func__, requested_addr,
+                       RTE_PTR_ADD(requested_addr, size));
 }
-- 
2.17.0

Reply via email to