On Wed, Feb 13, 2019 at 11:10:25AM +0200, Shahaf Shuler wrote: > The implementation reuses the external memory registration work done by > commit[1]. > > Note about representors: > > The current representor design will not work > with those map and unmap functions. The reason is that for representors > we have multiple IB devices share the same PCI function, so mapping will > happen only on one of the representors and not all of them. > > While it is possible to implement such support, the IB representor > design is going to be changed during DPDK19.05. The new design will have > a single IB device for all representors, hence sharing of a single > memory region between all representors will be possible. > > [1] > commit 7e43a32ee060 > ("net/mlx5: support externally allocated static memory") > > Signed-off-by: Shahaf Shuler <shah...@mellanox.com> > --- > drivers/net/mlx5/mlx5.c | 2 + > drivers/net/mlx5/mlx5_mr.c | 146 ++++++++++++++++++++++++++++++++++++++ > drivers/net/mlx5/mlx5_rxtx.h | 5 ++ > 3 files changed, 153 insertions(+) > > diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c > index a913a5955f..7c91701713 100644 > --- a/drivers/net/mlx5/mlx5.c > +++ b/drivers/net/mlx5/mlx5.c > @@ -1626,6 +1626,8 @@ static struct rte_pci_driver mlx5_driver = { > .id_table = mlx5_pci_id_map, > .probe = mlx5_pci_probe, > .remove = mlx5_pci_remove, > + .map = mlx5_dma_map, > + .unmap = mlx5_dma_unmap, > .drv_flags = (RTE_PCI_DRV_INTR_LSC | RTE_PCI_DRV_INTR_RMV | > RTE_PCI_DRV_PROBE_AGAIN), > }; > diff --git a/drivers/net/mlx5/mlx5_mr.c b/drivers/net/mlx5/mlx5_mr.c > index 32be6a5445..7059181959 100644 > --- a/drivers/net/mlx5/mlx5_mr.c > +++ b/drivers/net/mlx5/mlx5_mr.c > @@ -14,6 +14,7 @@ > #include <rte_mempool.h> > #include <rte_malloc.h> > #include <rte_rwlock.h> > +#include <rte_bus_pci.h> > > #include "mlx5.h" > #include "mlx5_mr.h" > @@ -1215,6 +1216,151 @@ mlx5_mr_update_ext_mp_cb(struct rte_mempool *mp, void > *opaque, > } > > /** > + * Finds the first ethdev that match the pci device. > + * The existence of multiple ethdev per pci device is only with representors. > + * On such case, it is enough to get only one of the ports as they all share > + * the same ibv context. > + * > + * @param pdev > + * Pointer to the PCI device. > + * > + * @return > + * Pointer to the ethdev if found, NULL otherwise. > + */ > +static struct rte_eth_dev * > +pci_dev_to_eth_dev(struct rte_pci_device *pdev) > +{ > + struct rte_eth_dev *dev; > + const char *drv_name; > + uint16_t port_id = 0; > + > + /** > + * We really need to iterate all eth devices regardless of > + * their owner. > + */ > + while (port_id < RTE_MAX_ETHPORTS) { > + port_id = rte_eth_find_next(port_id); > + if (port_id >= RTE_MAX_ETHPORTS) > + break; > + dev = &rte_eth_devices[port_id]; > + drv_name = dev->device->driver->name; > + if (!strncmp(drv_name, MLX5_DRIVER_NAME, > + sizeof(MLX5_DRIVER_NAME) + 1) && > + pdev == RTE_DEV_TO_PCI(dev->device)) { > + /* found the PCI device. */ > + return dev; > + } > + } > + return NULL; > +}
Might I interest you in the new API? { struct rte_dev_iterator it; struct rte_device *dev; RTE_DEV_FOREACH(dev, "class=eth", &it) if (dev == &pdev->device) return it.class_device; return NULL; } > + > +/** > + * DPDK callback to DMA map external memory to a PCI device. > + * > + * @param pdev > + * Pointer to the PCI device. > + * @param addr > + * Starting virtual address of memory to be mapped. > + * @param iova > + * Starting IOVA address of memory to be mapped. > + * @param len > + * Length of memory segment being mapped. > + * > + * @return > + * 0 on success, negative value on error. > + */ > +int > +mlx5_dma_map(struct rte_pci_device *pdev, void *addr, > + uint64_t iova __rte_unused, size_t len) > +{ > + struct rte_eth_dev *dev; > + struct mlx5_mr *mr; > + struct priv *priv; > + > + dev = pci_dev_to_eth_dev(pdev); > + if (!dev) { > + DRV_LOG(WARNING, "unable to find matching ethdev " > + "to PCI device %p", (void *)pdev); > + return -1; > + } > + priv = dev->data->dev_private; > + mr = mlx5_create_mr_ext(dev, (uintptr_t)addr, len, SOCKET_ID_ANY); > + if (!mr) { > + DRV_LOG(WARNING, > + "port %u unable to dma map", dev->data->port_id); > + return -1; > + } > + rte_rwlock_write_lock(&priv->mr.rwlock); > + LIST_INSERT_HEAD(&priv->mr.mr_list, mr, mr); > + /* Insert to the global cache table. */ > + mr_insert_dev_cache(dev, mr); > + rte_rwlock_write_unlock(&priv->mr.rwlock); > + return 0; > +} > + > +/** > + * DPDK callback to DMA unmap external memory to a PCI device. > + * > + * @param pdev > + * Pointer to the PCI device. > + * @param addr > + * Starting virtual address of memory to be unmapped. > + * @param iova > + * Starting IOVA address of memory to be unmapped. > + * @param len > + * Length of memory segment being unmapped. > + * > + * @return > + * 0 on success, negative value on error. > + */ > +int > +mlx5_dma_unmap(struct rte_pci_device *pdev, void *addr, > + uint64_t iova __rte_unused, size_t len __rte_unused) > +{ > + struct rte_eth_dev *dev; > + struct priv *priv; > + struct mlx5_mr *mr; > + struct mlx5_mr_cache entry; > + > + dev = pci_dev_to_eth_dev(pdev); > + if (!dev) { > + DRV_LOG(WARNING, "unable to find matching ethdev " > + "to PCI device %p", (void *)pdev); > + return -1; > + } > + priv = dev->data->dev_private; > + rte_rwlock_read_lock(&priv->mr.rwlock); > + mr = mr_lookup_dev_list(dev, &entry, (uintptr_t)addr); > + if (!mr) { > + DRV_LOG(WARNING, "address 0x%" PRIxPTR " wasn't registered " > + "to PCI device %p", (uintptr_t)addr, > + (void *)pdev); > + rte_rwlock_read_unlock(&priv->mr.rwlock); > + return -1; > + } > + LIST_REMOVE(mr, mr); > + LIST_INSERT_HEAD(&priv->mr.mr_free_list, mr, mr); > + DEBUG("port %u remove MR(%p) from list", dev->data->port_id, > + (void *)mr); > + mr_rebuild_dev_cache(dev); > + /* > + * Flush local caches by propagating invalidation across cores. > + * rte_smp_wmb() is enough to synchronize this event. If one of > + * freed memsegs is seen by other core, that means the memseg > + * has been allocated by allocator, which will come after this > + * free call. Therefore, this store instruction (incrementing > + * generation below) will be guaranteed to be seen by other core > + * before the core sees the newly allocated memory. > + */ > + ++priv->mr.dev_gen; > + DEBUG("broadcasting local cache flush, gen=%d", > + priv->mr.dev_gen); > + rte_smp_wmb(); > + rte_rwlock_read_unlock(&priv->mr.rwlock); > + return 0; > +} > + > +/** > * Register MR for entire memory chunks in a Mempool having externally > allocated > * memory and fill in local cache. > * > diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h > index c2529f96bc..f3f84dbac3 100644 > --- a/drivers/net/mlx5/mlx5_rxtx.h > +++ b/drivers/net/mlx5/mlx5_rxtx.h > @@ -28,6 +28,7 @@ > #include <rte_atomic.h> > #include <rte_spinlock.h> > #include <rte_io.h> > +#include <rte_bus_pci.h> > > #include "mlx5_utils.h" > #include "mlx5.h" > @@ -367,6 +368,10 @@ uint32_t mlx5_rx_addr2mr_bh(struct mlx5_rxq_data *rxq, > uintptr_t addr); > uint32_t mlx5_tx_mb2mr_bh(struct mlx5_txq_data *txq, struct rte_mbuf *mb); > uint32_t mlx5_tx_update_ext_mp(struct mlx5_txq_data *txq, uintptr_t addr, > struct rte_mempool *mp); > +int mlx5_dma_map(struct rte_pci_device *pdev, void *addr, uint64_t iova, > + size_t len); > +int mlx5_dma_unmap(struct rte_pci_device *pdev, void *addr, uint64_t iova, > + size_t len); > > /** > * Provide safe 64bit store operation to mlx5 UAR region for both 32bit and > -- > 2.12.0 > -- Gaëtan Rivet 6WIND