On Wed, Sep 04, 2019 at 08:56:32AM +0200, Maxime Coquelin wrote: > On 9/4/19 6:06 AM, Jason Wang wrote: > > On 2019/9/3 下午4:49, Tiwei Bie wrote: > >> On Tue, Sep 03, 2019 at 09:40:25AM +0200, Maxime Coquelin wrote: > >>> On 9/3/19 7:30 AM, Tiwei Bie wrote: > >>>> On Thu, Aug 29, 2019 at 09:59:55AM +0200, Maxime Coquelin wrote: > >>>>> In order to support multi-queue, we need to implement the control > >>>>> path. The problem is that both the Vhost-user master and slave use > >>>>> VAs in their processes address spaces as IOVAs, which creates > >>>>> collusions between the data rings IOVAs managed the master, and > >>>>> the Control ring IOVAs. The trick here is to remmap the Control > >>>>> ring memory to another range, after the slave is aware of master's > >>>>> ranges. > >>>>> > >>>>> Signed-off-by: Maxime Coquelin <maxime.coque...@redhat.com> > >>>>> --- > >>>>> drivers/net/virtio/virtio_vdpa.c | 255 > >>>>> +++++++++++++++++++++++++++++++ > >>>>> 1 file changed, 255 insertions(+) > >>>>> > >>>>> diff --git a/drivers/net/virtio/virtio_vdpa.c > >>>>> b/drivers/net/virtio/virtio_vdpa.c > >>>>> index fc52a8e92..13b4dd07d 100644 > >>>>> --- a/drivers/net/virtio/virtio_vdpa.c > >>>>> +++ b/drivers/net/virtio/virtio_vdpa.c > >>>>> @@ -106,6 +106,127 @@ find_internal_resource_by_dev(struct > >>>>> rte_pci_device *pdev) > >>>>> return list; > >>>>> } > >>>>> +static int > >>>>> +virtio_vdpa_dma_map_ctrl_queue(struct virtio_vdpa_device *dev, int > >>>>> do_map, > >>>>> + uint64_t iova) > >>>>> +{ > >>>>> + const struct rte_memzone *mz; > >>>>> + int ret; > >>>>> + > >>>>> + /* > >>>>> + * IOVAs are processes VAs. We cannot use them as the Data and > >>>>> Control > >>>>> + * paths are run in different processes, which may (does) lead to > >>>>> + * collusions. The trick here is to fixup Ctrl path IOVAs so > >>>>> that they > >>>>> + * start after the Data path ranges. > >>>>> + */ > >>>>> + if (do_map) { > >>>>> + mz = dev->cvq->cq.mz; > >>>>> + ret = rte_vfio_container_dma_map(dev->vfio_container_fd, > >>>>> + (uint64_t)(uintptr_t)mz->addr, > >>>>> + iova, mz->len); > >>>>> + if (ret < 0) { > >>>>> + DRV_LOG(ERR, "Failed to map ctrl ring (%d)", ret); > >>>>> + return ret; > >>>>> + } > >>>>> + > >>>>> + dev->cvq->vq_ring_mem = iova; > >>>>> + iova += mz->len; > >>>>> + > >>>>> + mz = dev->cvq->cq.virtio_net_hdr_mz; > >>>>> + ret = rte_vfio_container_dma_map(dev->vfio_container_fd, > >>>>> + (uint64_t)(uintptr_t)mz->addr, > >>>>> + iova, mz->len); > >>>>> + if (ret < 0) { > >>>>> + DRV_LOG(ERR, "Failed to map ctrl headers (%d)", ret); > >>>>> + return ret; > >>>>> + } > >>>> This will allow guest to access the cq.mz and cq.virtio_net_hdr_mz > >>>> via the device which may have potential risks. > >>> I get what you mean, but I'm not sure to see how we could avoid that. > >>> AFAIU, we need to map the control queue in the device IOMMU, otherwise > >>> how could the host (in case of virtual device) or the NIC (in case of > >>> Virtio offload), could access the ring? > >>> Any thoughts? > >> I also don't see a way to avoid that. That's why I said in below > >> thread that I think the control queue based interface seems not a > >> quite good interface for a backend device: > >> > >> https://lkml.org/lkml/2019/9/2/934 > >> > >> In IFCVF NIC, we added a MMIO based interface to replace control > >> queue for the multiqueue setup in vDPA mode. > >> > >> Jason is proposing some changes to make virtio device suitable > >> for backend device. I'm not sure whether it's possible to cover > >> this case as well.. > > > > > > A silly question, can we do dynamic mapping like what kernel driver did > > here? > > Not silly at all, it is of course possible.
+1. It's a good idea to mitigate the risks (if possible, we should make the Rx/Tx held while cvq is being used, or try to make cvq's iova unpredictable each time from guest side). > I will implement that in my v2. Thanks! Tiwei > > Thanks! > Maxime > > > Thanks > > > > > >> > >> Regards, > >> Tiwei > >> > >>> Thanks, > >>> Maxime > >>>> Regards, > >>>> Tiwei > >>>> > >>>>> + > >>>>> + dev->cvq->cq.virtio_net_hdr_mem = iova; > >>>>> + } else { > >>>>> + mz = dev->cvq->cq.mz; > >>>>> + ret = rte_vfio_container_dma_unmap(dev->vfio_container_fd, > >>>>> + (uint64_t)(uintptr_t)mz->addr, > >>>>> + iova, mz->len); > >>>>> + if (ret < 0) { > >>>>> + DRV_LOG(ERR, "Failed to unmap ctrl ring (%d)", ret); > >>>>> + return ret; > >>>>> + } > >>>>> + > >>>>> + dev->cvq->vq_ring_mem = 0; > >>>>> + iova += mz->len; > >>>>> + > >>>>> + mz = dev->cvq->cq.virtio_net_hdr_mz; > >>>>> + ret = rte_vfio_container_dma_unmap(dev->vfio_container_fd, > >>>>> + (uint64_t)(uintptr_t)mz->addr, > >>>>> + iova, mz->len); > >>>>> + if (ret < 0) { > >>>>> + DRV_LOG(ERR, "Failed to unmap ctrl headers (%d)", ret); > >>>>> + return ret; > >>>>> + } > >>>>> + > >>>>> + dev->cvq->cq.virtio_net_hdr_mem = 0; > >>>>> + } > >>>>> + > >>>>> + return 0; > >>>>> +} > >>>>> + > >>>>> +static int > >>>>> +virtio_vdpa_dma_map(struct virtio_vdpa_device *dev, int do_map) > >>>>> +{ > >>>>> + uint32_t i; > >>>>> + int ret; > >>>>> + struct rte_vhost_memory *mem = NULL; > >>>>> + int vfio_container_fd; > >>>>> + uint64_t avail_iova = 0; > >>>>> + > >>>>> + ret = rte_vhost_get_mem_table(dev->vid, &mem); > >>>>> + if (ret < 0 || !mem) { > >>>>> + DRV_LOG(ERR, "failed to get VM memory layout."); > >>>>> + return ret; > >>>>> + } > >>>>> + > >>>>> + vfio_container_fd = dev->vfio_container_fd; > >>>>> + > >>>>> + for (i = 0; i < mem->nregions; i++) { > >>>>> + struct rte_vhost_mem_region *reg; > >>>>> + > >>>>> + reg = &mem->regions[i]; > >>>>> + DRV_LOG(INFO, "%s, region %u: HVA 0x%" PRIx64 ", " > >>>>> + "GPA 0x%" PRIx64 ", size 0x%" PRIx64 ".", > >>>>> + do_map ? "DMA map" : "DMA unmap", i, > >>>>> + reg->host_user_addr, reg->guest_phys_addr, reg->size); > >>>>> + > >>>>> + if (reg->guest_phys_addr + reg->size > avail_iova) > >>>>> + avail_iova = reg->guest_phys_addr + reg->size; > >>>>> + > >>>>> + if (do_map) { > >>>>> + ret = rte_vfio_container_dma_map(vfio_container_fd, > >>>>> + reg->host_user_addr, reg->guest_phys_addr, > >>>>> + reg->size); > >>>>> + if (ret < 0) { > >>>>> + DRV_LOG(ERR, "DMA map failed."); > >>>>> + goto exit; > >>>>> + } > >>>>> + } else { > >>>>> + ret = rte_vfio_container_dma_unmap(vfio_container_fd, > >>>>> + reg->host_user_addr, reg->guest_phys_addr, > >>>>> + reg->size); > >>>>> + if (ret < 0) { > >>>>> + DRV_LOG(ERR, "DMA unmap failed."); > >>>>> + goto exit; > >>>>> + } > >>>>> + } > >>>>> + } > >>>>> + > >>>>> + if (dev->cvq) > >>>>> + ret = virtio_vdpa_dma_map_ctrl_queue(dev, do_map, > >>>>> avail_iova); > >>>>> + > >>>>> +exit: > >>>>> + free(mem); > >>>>> + > >>>>> + return ret; > >>>>> +} > >>>>> + > >>>>> static int > >>>>> virtio_vdpa_vfio_setup(struct virtio_vdpa_device *dev) > >>>>> { > >>>>> @@ -216,10 +337,144 @@ virtio_vdpa_get_protocol_features(int did > >>>>> __rte_unused, uint64_t *features) > >>>>> return 0; > >>>>> } > >>>>> +static uint64_t > >>>>> +hva_to_gpa(int vid, uint64_t hva) > >>>>> +{ > >>>>> + struct rte_vhost_memory *mem = NULL; > >>>>> + struct rte_vhost_mem_region *reg; > >>>>> + uint32_t i; > >>>>> + uint64_t gpa = 0; > >>>>> + > >>>>> + if (rte_vhost_get_mem_table(vid, &mem) < 0) > >>>>> + goto exit; > >>>>> + > >>>>> + for (i = 0; i < mem->nregions; i++) { > >>>>> + reg = &mem->regions[i]; > >>>>> + > >>>>> + if (hva >= reg->host_user_addr && > >>>>> + hva < reg->host_user_addr + reg->size) { > >>>>> + gpa = hva - reg->host_user_addr + reg->guest_phys_addr; > >>>>> + break; > >>>>> + } > >>>>> + } > >>>>> + > >>>>> +exit: > >>>>> + if (mem) > >>>>> + free(mem); > >>>>> + return gpa; > >>>>> +} > >>>>> + > >>>>> +static int > >>>>> +virtio_vdpa_start(struct virtio_vdpa_device *dev) > >>>>> +{ > >>>>> + struct virtio_hw *hw = &dev->hw; > >>>>> + int i, vid, nr_vring, ret; > >>>>> + struct rte_vhost_vring vr; > >>>>> + struct virtio_pmd_ctrl ctrl; > >>>>> + int dlen[1]; > >>>>> + > >>>>> + vid = dev->vid; > >>>>> + nr_vring = rte_vhost_get_vring_num(vid); > >>>>> + > >>>>> + if (dev->vqs) > >>>>> + rte_free(dev->vqs); > >>>>> + > >>>>> + dev->vqs = rte_zmalloc("virtio_vdpa", sizeof(*dev->vqs) * > >>>>> nr_vring, 0); > >>>>> + > >>>>> + for (i = 0; i < nr_vring; i++) { > >>>>> + struct virtqueue *vq = &dev->vqs[i]; > >>>>> + > >>>>> + rte_vhost_get_vhost_vring(vid, i, &vr); > >>>>> + > >>>>> + vq->vq_queue_index = i; > >>>>> + vq->vq_nentries = vr.size; > >>>>> + vq->vq_ring_mem = hva_to_gpa(vid, > >>>>> (uint64_t)(uintptr_t)vr.desc); > >>>>> + if (vq->vq_ring_mem == 0) { > >>>>> + DRV_LOG(ERR, "Fail to get GPA for descriptor ring."); > >>>>> + ret = -1; > >>>>> + goto out_free_vqs; > >>>>> + } > >>>>> + > >>>>> + ret = VTPCI_OPS(hw)->setup_queue(hw, vq); > >>>>> + if (ret) { > >>>>> + DRV_LOG(ERR, "Fail to setup queue."); > >>>>> + goto out_free_vqs; > >>>>> + } > >>>>> + } > >>>>> + > >>>>> + if (dev->cvq) { > >>>>> + ret = VTPCI_OPS(hw)->setup_queue(hw, dev->cvq); > >>>>> + if (ret) { > >>>>> + DRV_LOG(ERR, "Fail to setup ctrl queue."); > >>>>> + goto out_free_vqs; > >>>>> + } > >>>>> + } > >>>>> + > >>>>> + vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_DRIVER_OK); > >>>>> + > >>>>> + if (!dev->cvq) > >>>>> + return 0; > >>>>> + > >>>>> + ctrl.hdr.class = VIRTIO_NET_CTRL_MQ; > >>>>> + ctrl.hdr.cmd = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET; > >>>>> + memcpy(ctrl.data, &dev->max_queue_pairs, sizeof(uint16_t)); > >>>>> + > >>>>> + dlen[0] = sizeof(uint16_t); > >>>>> + > >>>>> + ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1); > >>>>> + if (ret) { > >>>>> + DRV_LOG(ERR, "Multiqueue configured but send command " > >>>>> + "failed, this is too late now..."); > >>>>> + ret = -EINVAL; > >>>>> + goto out_free_vqs; > >>>>> + } > >>>>> + > >>>>> + return 0; > >>>>> +out_free_vqs: > >>>>> + rte_free(dev->vqs); > >>>>> + > >>>>> + return ret; > >>>>> +} > >>>>> + > >>>>> +static int > >>>>> +virtio_vdpa_dev_config(int vid) > >>>>> +{ > >>>>> + int did, ret; > >>>>> + struct internal_list *list; > >>>>> + struct virtio_vdpa_device *dev; > >>>>> + > >>>>> + did = rte_vhost_get_vdpa_device_id(vid); > >>>>> + list = find_internal_resource_by_did(did); > >>>>> + if (list == NULL) { > >>>>> + DRV_LOG(ERR, "Invalid device id: %d", did); > >>>>> + return -1; > >>>>> + } > >>>>> + > >>>>> + dev = list->dev; > >>>>> + dev->vid = vid; > >>>>> + > >>>>> + rte_spinlock_lock(&dev->lock); > >>>>> + > >>>>> + ret = virtio_vdpa_dma_map(dev, 1); > >>>>> + if (ret) > >>>>> + goto out_unlock; > >>>>> + > >>>>> + ret = virtio_vdpa_start(dev); > >>>>> + > >>>>> + if (rte_vhost_host_notifier_ctrl(vid, true) != 0) > >>>>> + DRV_LOG(NOTICE, "vDPA (%d): software relay is used.", did); > >>>>> + > >>>>> +out_unlock: > >>>>> + rte_spinlock_unlock(&dev->lock); > >>>>> + > >>>>> + return ret; > >>>>> +} > >>>>> + > >>>>> static struct rte_vdpa_dev_ops virtio_vdpa_ops = { > >>>>> .get_queue_num = virtio_vdpa_get_queue_num, > >>>>> .get_features = virtio_vdpa_get_features, > >>>>> .get_protocol_features = virtio_vdpa_get_protocol_features, > >>>>> + .dev_conf = virtio_vdpa_dev_config, > >>>>> }; > >>>>> static inline int > >>>>> -- > >>>>> 2.21.0 > >>>>>