On Thu, Nov 13, 2014 at 11:34:22AM +0000, Burakov, Anatoly wrote: > Hi Thomas and all > > Are there any objections to this patch? If there are no objections to it, > could someone perhaps ack it? >
No objections, just a quick request for clarification below. /Bruce > Thanks, > Anatoly > > -----Original Message----- > From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Anatoly Burakov > Sent: Tuesday, November 11, 2014 10:09 AM > To: dev at dpdk.org > Subject: [dpdk-dev] [PATCH v8] eal: map PCI memory resources after hugepages > > Multi-process DPDK application must mmap hugepages and PCI resources into the > same virtual address space. By default the virtual addresses are chosen by > the primary process automatically when calling the mmap. > But sometimes the chosen virtual addresses aren't usable in secondary process > - for example, secondary process is linked with more libraries than primary > process, and the library occupies the same address space that the primary > process has requested for PCI mappings. > > This patch makes EAL try and map PCI BARs right after the hugepages (instead > of location chosen by mmap) in virtual memory, so that PCI BARs have less > chance of ending up in random places in virtual memory. > > Signed-off-by: Liang Xu <liang.xu at cinfotech.cn> > Signed-off-by: Anatoly Burakov <anatoly.burakov at intel.com> > --- > lib/librte_eal/linuxapp/eal/eal_pci.c | 30 > ++++++++++++++++------ > lib/librte_eal/linuxapp/eal/eal_pci_uio.c | 13 ++++++++-- > lib/librte_eal/linuxapp/eal/eal_pci_vfio.c | 19 +++++++++++--- > lib/librte_eal/linuxapp/eal/include/eal_pci_init.h | 6 +++++ > 4 files changed, 55 insertions(+), 13 deletions(-) > > diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c > b/lib/librte_eal/linuxapp/eal/eal_pci.c > index 5fe3961..79fbbb8 100644 > --- a/lib/librte_eal/linuxapp/eal/eal_pci.c > +++ b/lib/librte_eal/linuxapp/eal/eal_pci.c > @@ -97,6 +97,25 @@ error: > return -1; > } > > +void * > +pci_find_max_end_va(void) > +{ > + const struct rte_memseg *seg = rte_eal_get_physmem_layout(); > + const struct rte_memseg *last = seg; > + unsigned i = 0; > + > + for (i = 0; i < RTE_MAX_MEMSEG; i++, seg++) { > + if (seg->addr == NULL) > + break; > + > + if (seg->addr > last->addr) > + last = seg; > + > + } > + return RTE_PTR_ADD(last->addr, last->len); } > + > + > /* map a particular resource from a file */ void * pci_map_resource(void > *requested_addr, int fd, off_t offset, size_t size) @@ -106,21 +125,16 @@ > pci_map_resource(void *requested_addr, int fd, off_t offset, size_t size) > /* Map the PCI memory resource of device */ > mapaddr = mmap(requested_addr, size, PROT_READ | PROT_WRITE, > MAP_SHARED, fd, offset); > - if (mapaddr == MAP_FAILED || > - (requested_addr != NULL && mapaddr != requested_addr)) { Why has this check been removed from here. I assume it is replaced by a new check in secondary processes that I see added below, but perhaps you could explain the reason for the change? > + if (mapaddr == MAP_FAILED) { > RTE_LOG(ERR, EAL, "%s(): cannot mmap(%d, %p, 0x%lx, 0x%lx): %s > (%p)\n", > __func__, fd, requested_addr, > (unsigned long)size, (unsigned long)offset, > strerror(errno), mapaddr); > - goto fail; > + } else { > + RTE_LOG(DEBUG, EAL, " PCI memory mapped at %p\n", mapaddr); > } > > - RTE_LOG(DEBUG, EAL, " PCI memory mapped at %p\n", mapaddr); > - > return mapaddr; > - > -fail: > - return NULL; > } > > /* parse the "resource" sysfs file */ > diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c > b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c > index 7e62266..e53f06b 100644 > --- a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c > +++ b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c > @@ -35,6 +35,7 @@ > #include <fcntl.h> > #include <dirent.h> > #include <sys/stat.h> > +#include <sys/mman.h> > > #include <rte_log.h> > #include <rte_pci.h> > @@ -48,6 +49,8 @@ > > static int pci_parse_sysfs_value(const char *filename, uint64_t *val); > > +void *pci_map_addr = NULL; > + > > #define OFF_MAX ((uint64_t)(off_t)-1) > static int > @@ -371,10 +374,16 @@ pci_uio_map_resource(struct rte_pci_device *dev) > if (maps[j].addr != NULL) > fail = 1; > else { > - mapaddr = pci_map_resource(NULL, fd, > (off_t)offset, > + /* try mapping somewhere close to the end of > hugepages */ > + if (pci_map_addr == NULL) > + pci_map_addr = pci_find_max_end_va(); > + > + mapaddr = pci_map_resource(pci_map_addr, fd, > (off_t)offset, > (size_t)maps[j].size); > - if (mapaddr == NULL) > + if (mapaddr == MAP_FAILED) > fail = 1; > + > + pci_map_addr = RTE_PTR_ADD(mapaddr, (size_t) > maps[j].size); > } > > if (fail) { > diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c > b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c > index c776ddc..c1246e8 100644 > --- a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c > +++ b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c > @@ -37,6 +37,7 @@ > #include <sys/eventfd.h> > #include <sys/socket.h> > #include <sys/ioctl.h> > +#include <sys/mman.h> > > #include <rte_log.h> > #include <rte_pci.h> > @@ -720,10 +721,22 @@ pci_vfio_map_resource(struct rte_pci_device *dev) > if (i == msix_bar) > continue; > > - bar_addr = pci_map_resource(maps[i].addr, vfio_dev_fd, > reg.offset, > - reg.size); > + if (internal_config.process_type == RTE_PROC_PRIMARY) { > + /* try mapping somewhere close to the end of hugepages > */ > + if (pci_map_addr == NULL) > + pci_map_addr = pci_find_max_end_va(); > + > + bar_addr = pci_map_resource(pci_map_addr, vfio_dev_fd, > reg.offset, > + reg.size); > + pci_map_addr = RTE_PTR_ADD(bar_addr, (size_t) reg.size); > + } else { > + bar_addr = pci_map_resource(maps[i].addr, vfio_dev_fd, > reg.offset, > + reg.size); > + } > > - if (bar_addr == NULL) { > + if (bar_addr == MAP_FAILED || > + (internal_config.process_type == > RTE_PROC_SECONDARY && > + bar_addr != maps[i].addr)) { > RTE_LOG(ERR, EAL, " %s mapping BAR%i failed: %s\n", > pci_addr, i, > strerror(errno)); > close(vfio_dev_fd); > diff --git a/lib/librte_eal/linuxapp/eal/include/eal_pci_init.h > b/lib/librte_eal/linuxapp/eal/include/eal_pci_init.h > index d758bee..1070eb8 100644 > --- a/lib/librte_eal/linuxapp/eal/include/eal_pci_init.h > +++ b/lib/librte_eal/linuxapp/eal/include/eal_pci_init.h > @@ -59,6 +59,12 @@ struct mapped_pci_resource { > TAILQ_HEAD(mapped_pci_res_list, mapped_pci_resource); extern struct > mapped_pci_res_list *pci_res_list; > > +/* > + * Helper function to map PCI resources right after hugepages in > +virtual memory */ extern void *pci_map_addr; void > +*pci_find_max_end_va(void); > + > void *pci_map_resource(void *requested_addr, int fd, off_t offset, > size_t size); > > -- > 1.8.1.4 >