Add the vDPA datapath update logic. Signed-off-by: Chaoyong He <chaoyong...@corigine.com> Signed-off-by: Shujing Dong <shujing.d...@corigine.com> Reviewed-by: Long Wu <long...@corigine.com> Reviewed-by: Peng Zhang <peng.zh...@corigine.com> --- drivers/common/nfp/nfp_common_ctrl.h | 1 + drivers/vdpa/nfp/nfp_vdpa.c | 315 +++++++++++++++++++++++++++ drivers/vdpa/nfp/nfp_vdpa_core.c | 78 +++++++ drivers/vdpa/nfp/nfp_vdpa_core.h | 15 ++ 4 files changed, 409 insertions(+)
diff --git a/drivers/common/nfp/nfp_common_ctrl.h b/drivers/common/nfp/nfp_common_ctrl.h index 3c8cd916cf..f92ce50fc0 100644 --- a/drivers/common/nfp/nfp_common_ctrl.h +++ b/drivers/common/nfp/nfp_common_ctrl.h @@ -238,6 +238,7 @@ struct nfp_net_fw_ver { #define NFP_NET_CFG_CTRL_IPSEC (0x1 << 1) /**< IPsec offload */ #define NFP_NET_CFG_CTRL_IPSEC_SM_LOOKUP (0x1 << 3) /**< SA short match lookup */ #define NFP_NET_CFG_CTRL_IPSEC_LM_LOOKUP (0x1 << 4) /**< SA long match lookup */ +#define NFP_NET_CFG_CTRL_IN_ORDER (0x1 << 11) /**< Virtio in-order flag */ #define NFP_NET_CFG_CAP_WORD1 0x00a4 diff --git a/drivers/vdpa/nfp/nfp_vdpa.c b/drivers/vdpa/nfp/nfp_vdpa.c index 00d8f7e007..465ee4841d 100644 --- a/drivers/vdpa/nfp/nfp_vdpa.c +++ b/drivers/vdpa/nfp/nfp_vdpa.c @@ -4,6 +4,7 @@ */ #include <pthread.h> +#include <sys/ioctl.h> #include <nfp_common_pci.h> #include <nfp_dev.h> @@ -15,6 +16,9 @@ #define NFP_VDPA_DRIVER_NAME nfp_vdpa +#define MSIX_IRQ_SET_BUF_LEN (sizeof(struct vfio_irq_set) + \ + sizeof(int) * (NFP_VDPA_MAX_QUEUES * 2 + 1)) + struct nfp_vdpa_dev { struct rte_pci_device *pci_dev; struct rte_vdpa_device *vdev; @@ -25,7 +29,15 @@ struct nfp_vdpa_dev { int vfio_dev_fd; int iommu_group; + int vid; uint16_t max_queues; + uint32_t started; + uint32_t dev_attached; + uint32_t running; + rte_spinlock_t lock; + + /** Eventfd for used ring interrupt */ + int intr_fd[NFP_VDPA_MAX_QUEUES * 2]; }; struct nfp_vdpa_dev_node { @@ -112,6 +124,302 @@ nfp_vdpa_vfio_teardown(struct nfp_vdpa_dev *device) rte_vfio_container_destroy(device->vfio_container_fd); } +static int +nfp_vdpa_dma_do_unmap(struct rte_vhost_memory *mem, + uint32_t times, + int vfio_container_fd) +{ + uint32_t i; + int ret = 0; + struct rte_vhost_mem_region *region; + + for (i = 0; i < times; i++) { + region = &mem->regions[i]; + + ret = rte_vfio_container_dma_unmap(vfio_container_fd, + region->host_user_addr, region->guest_phys_addr, + region->size); + if (ret < 0) { + /* Here should not return, even error happened. */ + DRV_VDPA_LOG(ERR, "DMA unmap failed. Times: %u", i); + } + } + + return ret; +} + +static int +nfp_vdpa_dma_do_map(struct rte_vhost_memory *mem, + uint32_t times, + int vfio_container_fd) +{ + int ret; + uint32_t i; + struct rte_vhost_mem_region *region; + + for (i = 0; i < times; i++) { + region = &mem->regions[i]; + + ret = rte_vfio_container_dma_map(vfio_container_fd, + region->host_user_addr, region->guest_phys_addr, + region->size); + if (ret < 0) { + DRV_VDPA_LOG(ERR, "DMA map failed."); + nfp_vdpa_dma_do_unmap(mem, i, vfio_container_fd); + return ret; + } + } + + return 0; +} + +static int +nfp_vdpa_dma_map(struct nfp_vdpa_dev *device, + bool do_map) +{ + int ret; + int vfio_container_fd; + struct rte_vhost_memory *mem = NULL; + + ret = rte_vhost_get_mem_table(device->vid, &mem); + if (ret < 0) { + DRV_VDPA_LOG(ERR, "Failed to get memory layout."); + return ret; + } + + vfio_container_fd = device->vfio_container_fd; + DRV_VDPA_LOG(DEBUG, "vfio_container_fd %d", vfio_container_fd); + + if (do_map) + ret = nfp_vdpa_dma_do_map(mem, mem->nregions, vfio_container_fd); + else + ret = nfp_vdpa_dma_do_unmap(mem, mem->nregions, vfio_container_fd); + + free(mem); + + return ret; +} + +static uint64_t +nfp_vdpa_qva_to_gpa(int vid, + uint64_t qva) +{ + int ret; + uint32_t i; + uint64_t gpa = 0; + struct rte_vhost_memory *mem = NULL; + struct rte_vhost_mem_region *region; + + ret = rte_vhost_get_mem_table(vid, &mem); + if (ret < 0) { + DRV_VDPA_LOG(ERR, "Failed to get memory layout."); + return gpa; + } + + for (i = 0; i < mem->nregions; i++) { + region = &mem->regions[i]; + + if (qva >= region->host_user_addr && + qva < region->host_user_addr + region->size) { + gpa = qva - region->host_user_addr + region->guest_phys_addr; + break; + } + } + + free(mem); + + return gpa; +} + +static int +nfp_vdpa_start(struct nfp_vdpa_dev *device) +{ + int ret; + int vid; + uint16_t i; + uint64_t gpa; + struct rte_vhost_vring vring; + struct nfp_vdpa_hw *vdpa_hw = &device->hw; + + vid = device->vid; + vdpa_hw->nr_vring = rte_vhost_get_vring_num(vid); + + ret = rte_vhost_get_negotiated_features(vid, &vdpa_hw->req_features); + if (ret != 0) + return ret; + + for (i = 0; i < vdpa_hw->nr_vring; i++) { + ret = rte_vhost_get_vhost_vring(vid, i, &vring); + if (ret != 0) + return ret; + + gpa = nfp_vdpa_qva_to_gpa(vid, (uint64_t)(uintptr_t)vring.desc); + if (gpa == 0) { + DRV_VDPA_LOG(ERR, "Fail to get GPA for descriptor ring."); + return -1; + } + + vdpa_hw->vring[i].desc = gpa; + + gpa = nfp_vdpa_qva_to_gpa(vid, (uint64_t)(uintptr_t)vring.avail); + if (gpa == 0) { + DRV_VDPA_LOG(ERR, "Fail to get GPA for available ring."); + return -1; + } + + vdpa_hw->vring[i].avail = gpa; + + gpa = nfp_vdpa_qva_to_gpa(vid, (uint64_t)(uintptr_t)vring.used); + if (gpa == 0) { + DRV_VDPA_LOG(ERR, "Fail to get GPA for used ring."); + return -1; + } + + vdpa_hw->vring[i].used = gpa; + + vdpa_hw->vring[i].size = vring.size; + + ret = rte_vhost_get_vring_base(vid, i, + &vdpa_hw->vring[i].last_avail_idx, + &vdpa_hw->vring[i].last_used_idx); + if (ret != 0) + return ret; + } + + return nfp_vdpa_hw_start(&device->hw, vid); +} + +static void +nfp_vdpa_stop(struct nfp_vdpa_dev *device) +{ + int vid; + uint32_t i; + struct nfp_vdpa_hw *vdpa_hw = &device->hw; + + nfp_vdpa_hw_stop(vdpa_hw); + + vid = device->vid; + for (i = 0; i < vdpa_hw->nr_vring; i++) + rte_vhost_set_vring_base(vid, i, + vdpa_hw->vring[i].last_avail_idx, + vdpa_hw->vring[i].last_used_idx); +} + +static int +nfp_vdpa_enable_vfio_intr(struct nfp_vdpa_dev *device) +{ + int ret; + uint16_t i; + int *fd_ptr; + uint16_t nr_vring; + struct vfio_irq_set *irq_set; + struct rte_vhost_vring vring; + char irq_set_buf[MSIX_IRQ_SET_BUF_LEN]; + + nr_vring = rte_vhost_get_vring_num(device->vid); + + irq_set = (struct vfio_irq_set *)irq_set_buf; + irq_set->argsz = sizeof(irq_set_buf); + irq_set->count = nr_vring + 1; + irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER; + irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX; + irq_set->start = 0; + + fd_ptr = (int *)&irq_set->data; + fd_ptr[RTE_INTR_VEC_ZERO_OFFSET] = rte_intr_fd_get(device->pci_dev->intr_handle); + + for (i = 0; i < nr_vring; i++) + device->intr_fd[i] = -1; + + for (i = 0; i < nr_vring; i++) { + rte_vhost_get_vhost_vring(device->vid, i, &vring); + fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd; + } + + ret = ioctl(device->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); + if (ret != 0) { + DRV_VDPA_LOG(ERR, "Error enabling MSI-X interrupts."); + return -EIO; + } + + return 0; +} + +static int +nfp_vdpa_disable_vfio_intr(struct nfp_vdpa_dev *device) +{ + int ret; + struct vfio_irq_set *irq_set; + char irq_set_buf[MSIX_IRQ_SET_BUF_LEN]; + + irq_set = (struct vfio_irq_set *)irq_set_buf; + irq_set->argsz = sizeof(irq_set_buf); + irq_set->count = 0; + irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER; + irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX; + irq_set->start = 0; + + ret = ioctl(device->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); + if (ret != 0) { + DRV_VDPA_LOG(ERR, "Error disabling MSI-X interrupts."); + return -EIO; + } + + return 0; +} + +static int +update_datapath(struct nfp_vdpa_dev *device) +{ + int ret; + + rte_spinlock_lock(&device->lock); + + if ((__atomic_load_n(&device->running, __ATOMIC_RELAXED) == 0) && + (__atomic_load_n(&device->started, __ATOMIC_RELAXED) != 0) && + (__atomic_load_n(&device->dev_attached, __ATOMIC_RELAXED) != 0)) { + ret = nfp_vdpa_dma_map(device, true); + if (ret != 0) + goto unlock_exit; + + ret = nfp_vdpa_enable_vfio_intr(device); + if (ret != 0) + goto dma_map_rollback; + + ret = nfp_vdpa_start(device); + if (ret != 0) + goto disable_vfio_intr; + + __atomic_store_n(&device->running, 1, __ATOMIC_RELAXED); + } else if ((__atomic_load_n(&device->running, __ATOMIC_RELAXED) != 0) && + ((__atomic_load_n(&device->started, __ATOMIC_RELAXED) != 0) || + (__atomic_load_n(&device->dev_attached, __ATOMIC_RELAXED) != 0))) { + + nfp_vdpa_stop(device); + + ret = nfp_vdpa_disable_vfio_intr(device); + if (ret != 0) + goto unlock_exit; + + ret = nfp_vdpa_dma_map(device, false); + if (ret != 0) + goto unlock_exit; + + __atomic_store_n(&device->running, 0, __ATOMIC_RELAXED); + } + + rte_spinlock_unlock(&device->lock); + return 0; + +disable_vfio_intr: + nfp_vdpa_disable_vfio_intr(device); +dma_map_rollback: + nfp_vdpa_dma_map(device, false); +unlock_exit: + rte_spinlock_unlock(&device->lock); + return ret; +} + struct rte_vdpa_dev_ops nfp_vdpa_ops = { }; @@ -156,6 +464,10 @@ nfp_vdpa_pci_probe(struct rte_pci_device *pci_dev) TAILQ_INSERT_TAIL(&vdpa_dev_list, node, next); pthread_mutex_unlock(&vdpa_list_lock); + rte_spinlock_init(&device->lock); + __atomic_store_n(&device->started, 1, __ATOMIC_RELAXED); + update_datapath(device); + return 0; vfio_teardown: @@ -185,6 +497,9 @@ nfp_vdpa_pci_remove(struct rte_pci_device *pci_dev) device = node->device; + __atomic_store_n(&device->started, 0, __ATOMIC_RELAXED); + update_datapath(device); + pthread_mutex_lock(&vdpa_list_lock); TAILQ_REMOVE(&vdpa_dev_list, node, next); pthread_mutex_unlock(&vdpa_list_lock); diff --git a/drivers/vdpa/nfp/nfp_vdpa_core.c b/drivers/vdpa/nfp/nfp_vdpa_core.c index a7e15fa88a..db9b8462b4 100644 --- a/drivers/vdpa/nfp/nfp_vdpa_core.c +++ b/drivers/vdpa/nfp/nfp_vdpa_core.c @@ -5,6 +5,7 @@ #include "nfp_vdpa_core.h" +#include <nfp_common.h> #include <rte_vhost.h> #include "nfp_vdpa_log.h" @@ -52,3 +53,80 @@ nfp_vdpa_hw_init(struct nfp_vdpa_hw *vdpa_hw, return 0; } + +static uint32_t +nfp_vdpa_check_offloads(void) +{ + return NFP_NET_CFG_CTRL_SCATTER | + NFP_NET_CFG_CTRL_IN_ORDER; +} + +int +nfp_vdpa_hw_start(struct nfp_vdpa_hw *vdpa_hw, + int vid) +{ + int ret; + uint32_t update; + uint32_t new_ctrl; + struct timespec wait_tst; + struct nfp_hw *hw = &vdpa_hw->super; + uint8_t mac_addr[RTE_ETHER_ADDR_LEN]; + + nn_cfg_writeq(hw, NFP_NET_CFG_TXR_ADDR(0), vdpa_hw->vring[1].desc); + nn_cfg_writeb(hw, NFP_NET_CFG_TXR_SZ(0), rte_log2_u32(vdpa_hw->vring[1].size)); + nn_cfg_writeq(hw, NFP_NET_CFG_TXR_ADDR(1), vdpa_hw->vring[1].avail); + nn_cfg_writeq(hw, NFP_NET_CFG_TXR_ADDR(2), vdpa_hw->vring[1].used); + + nn_cfg_writeq(hw, NFP_NET_CFG_RXR_ADDR(0), vdpa_hw->vring[0].desc); + nn_cfg_writeb(hw, NFP_NET_CFG_RXR_SZ(0), rte_log2_u32(vdpa_hw->vring[0].size)); + nn_cfg_writeq(hw, NFP_NET_CFG_RXR_ADDR(1), vdpa_hw->vring[0].avail); + nn_cfg_writeq(hw, NFP_NET_CFG_RXR_ADDR(2), vdpa_hw->vring[0].used); + + rte_wmb(); + + nfp_disable_queues(hw); + nfp_enable_queues(hw, NFP_VDPA_MAX_QUEUES, NFP_VDPA_MAX_QUEUES); + + new_ctrl = nfp_vdpa_check_offloads(); + + nn_cfg_writel(hw, NFP_NET_CFG_MTU, 9216); + nn_cfg_writel(hw, NFP_NET_CFG_FLBUFSZ, 10240); + + /* TODO: Temporary set MAC to fixed value fe:1b:ac:05:a5:22 */ + mac_addr[0] = 0xfe; + mac_addr[1] = 0x1b; + mac_addr[2] = 0xac; + mac_addr[3] = 0x05; + mac_addr[4] = 0xa5; + mac_addr[5] = (0x22 + vid); + + /* Writing new MAC to the specific port BAR address */ + nfp_write_mac(hw, (uint8_t *)mac_addr); + + /* Enable device */ + new_ctrl |= NFP_NET_CFG_CTRL_ENABLE; + + /* Signal the NIC about the change */ + update = NFP_NET_CFG_UPDATE_MACADDR | + NFP_NET_CFG_UPDATE_GEN | + NFP_NET_CFG_UPDATE_RING; + + ret = nfp_reconfig(hw, new_ctrl, update); + if (ret < 0) + return -EIO; + + hw->ctrl = new_ctrl; + + DRV_CORE_LOG(DEBUG, "Enabling the device, sleep 1 seconds..."); + wait_tst.tv_sec = 1; + wait_tst.tv_nsec = 0; + nanosleep(&wait_tst, 0); + + return 0; +} + +void +nfp_vdpa_hw_stop(struct nfp_vdpa_hw *vdpa_hw) +{ + nfp_disable_queues(&vdpa_hw->super); +} diff --git a/drivers/vdpa/nfp/nfp_vdpa_core.h b/drivers/vdpa/nfp/nfp_vdpa_core.h index c9403e0ea4..a88de768dd 100644 --- a/drivers/vdpa/nfp/nfp_vdpa_core.h +++ b/drivers/vdpa/nfp/nfp_vdpa_core.h @@ -15,6 +15,15 @@ #define NFP_VDPA_NOTIFY_ADDR_BASE 0x4000 #define NFP_VDPA_NOTIFY_ADDR_INTERVAL 0x1000 +struct nfp_vdpa_vring { + uint64_t desc; + uint64_t avail; + uint64_t used; + uint16_t size; + uint16_t last_avail_idx; + uint16_t last_used_idx; +}; + struct nfp_vdpa_hw { struct nfp_hw super; @@ -22,11 +31,17 @@ struct nfp_vdpa_hw { uint64_t req_features; uint8_t *notify_addr[NFP_VDPA_MAX_QUEUES * 2]; + struct nfp_vdpa_vring vring[NFP_VDPA_MAX_QUEUES * 2]; uint8_t mac_addr[RTE_ETHER_ADDR_LEN]; uint8_t notify_region; + uint8_t nr_vring; }; int nfp_vdpa_hw_init(struct nfp_vdpa_hw *vdpa_hw, struct rte_pci_device *dev); +int nfp_vdpa_hw_start(struct nfp_vdpa_hw *vdpa_hw, int vid); + +void nfp_vdpa_hw_stop(struct nfp_vdpa_hw *vdpa_hw); + #endif /* __NFP_VDPA_CORE_H__ */ -- 2.39.1