On Wed, Sep 04, 2019 at 08:56:32AM +0200, Maxime Coquelin wrote:
> On 9/4/19 6:06 AM, Jason Wang wrote:
> > On 2019/9/3 下午4:49, Tiwei Bie wrote:
> >> On Tue, Sep 03, 2019 at 09:40:25AM +0200, Maxime Coquelin wrote:
> >>> On 9/3/19 7:30 AM, Tiwei Bie wrote:
> >>>> On Thu, Aug 29, 2019 at 09:59:55AM +0200, Maxime Coquelin wrote:
> >>>>> In order to support multi-queue, we need to implement the control
> >>>>> path. The problem is that both the Vhost-user master and slave use
> >>>>> VAs in their processes address spaces as IOVAs, which creates
> >>>>> collusions between the data rings IOVAs managed the master, and
> >>>>> the Control ring IOVAs. The trick here is to remmap the Control
> >>>>> ring memory to another range, after the slave is aware of master's
> >>>>> ranges.
> >>>>>
> >>>>> Signed-off-by: Maxime Coquelin <maxime.coque...@redhat.com>
> >>>>> ---
> >>>>>   drivers/net/virtio/virtio_vdpa.c | 255
> >>>>> +++++++++++++++++++++++++++++++
> >>>>>   1 file changed, 255 insertions(+)
> >>>>>
> >>>>> diff --git a/drivers/net/virtio/virtio_vdpa.c
> >>>>> b/drivers/net/virtio/virtio_vdpa.c
> >>>>> index fc52a8e92..13b4dd07d 100644
> >>>>> --- a/drivers/net/virtio/virtio_vdpa.c
> >>>>> +++ b/drivers/net/virtio/virtio_vdpa.c
> >>>>> @@ -106,6 +106,127 @@ find_internal_resource_by_dev(struct
> >>>>> rte_pci_device *pdev)
> >>>>>       return list;
> >>>>>   }
> >>>>>   +static int
> >>>>> +virtio_vdpa_dma_map_ctrl_queue(struct virtio_vdpa_device *dev, int
> >>>>> do_map,
> >>>>> +        uint64_t iova)
> >>>>> +{
> >>>>> +    const struct rte_memzone *mz;
> >>>>> +    int ret;
> >>>>> +
> >>>>> +    /*
> >>>>> +     * IOVAs are processes VAs. We cannot use them as the Data and
> >>>>> Control
> >>>>> +     * paths are run in different processes, which may (does) lead to
> >>>>> +     * collusions. The trick here is to fixup Ctrl path IOVAs so
> >>>>> that they
> >>>>> +     * start after the Data path ranges.
> >>>>> +     */
> >>>>> +    if (do_map) {
> >>>>> +        mz = dev->cvq->cq.mz;
> >>>>> +        ret = rte_vfio_container_dma_map(dev->vfio_container_fd,
> >>>>> +                (uint64_t)(uintptr_t)mz->addr,
> >>>>> +                iova, mz->len);
> >>>>> +        if (ret < 0) {
> >>>>> +            DRV_LOG(ERR, "Failed to map ctrl ring (%d)", ret);
> >>>>> +            return ret;
> >>>>> +        }
> >>>>> +
> >>>>> +        dev->cvq->vq_ring_mem = iova;
> >>>>> +        iova += mz->len;
> >>>>> +
> >>>>> +        mz = dev->cvq->cq.virtio_net_hdr_mz;
> >>>>> +        ret = rte_vfio_container_dma_map(dev->vfio_container_fd,
> >>>>> +                (uint64_t)(uintptr_t)mz->addr,
> >>>>> +                iova, mz->len);
> >>>>> +        if (ret < 0) {
> >>>>> +            DRV_LOG(ERR, "Failed to map ctrl headers (%d)", ret);
> >>>>> +            return ret;
> >>>>> +        }
> >>>> This will allow guest to access the cq.mz and cq.virtio_net_hdr_mz
> >>>> via the device which may have potential risks.
> >>> I get what you mean, but I'm not sure to see how we could avoid that.
> >>> AFAIU, we need to map the control queue in the device IOMMU, otherwise
> >>> how could the host (in case of virtual device) or the NIC (in case of
> >>> Virtio offload), could access the ring?
> >>> Any thoughts?
> >> I also don't see a way to avoid that. That's why I said in below
> >> thread that I think the control queue based interface seems not a
> >> quite good interface for a backend device:
> >>
> >> https://lkml.org/lkml/2019/9/2/934
> >>
> >> In IFCVF NIC, we added a MMIO based interface to replace control
> >> queue for the multiqueue setup in vDPA mode.
> >>
> >> Jason is proposing some changes to make virtio device suitable
> >> for backend device. I'm not sure whether it's possible to cover
> >> this case as well..
> > 
> > 
> > A silly question, can we do dynamic mapping like what kernel driver did
> > here?
> 
> Not silly at all, it is of course possible.

+1. It's a good idea to mitigate the risks (if possible, we should
make the Rx/Tx held while cvq is being used, or try to make cvq's
iova unpredictable each time from guest side).

> I will implement that in my v2.

Thanks!
Tiwei

> 
> Thanks!
> Maxime
> 
> > Thanks
> > 
> > 
> >>
> >> Regards,
> >> Tiwei
> >>
> >>> Thanks,
> >>> Maxime
> >>>> Regards,
> >>>> Tiwei
> >>>>
> >>>>> +
> >>>>> +        dev->cvq->cq.virtio_net_hdr_mem = iova;
> >>>>> +    } else {
> >>>>> +        mz = dev->cvq->cq.mz;
> >>>>> +        ret = rte_vfio_container_dma_unmap(dev->vfio_container_fd,
> >>>>> +                (uint64_t)(uintptr_t)mz->addr,
> >>>>> +                iova, mz->len);
> >>>>> +        if (ret < 0) {
> >>>>> +            DRV_LOG(ERR, "Failed to unmap ctrl ring (%d)", ret);
> >>>>> +            return ret;
> >>>>> +        }
> >>>>> +
> >>>>> +        dev->cvq->vq_ring_mem = 0;
> >>>>> +        iova += mz->len;
> >>>>> +
> >>>>> +        mz = dev->cvq->cq.virtio_net_hdr_mz;
> >>>>> +        ret = rte_vfio_container_dma_unmap(dev->vfio_container_fd,
> >>>>> +                (uint64_t)(uintptr_t)mz->addr,
> >>>>> +                iova, mz->len);
> >>>>> +        if (ret < 0) {
> >>>>> +            DRV_LOG(ERR, "Failed to unmap ctrl headers (%d)", ret);
> >>>>> +            return ret;
> >>>>> +        }
> >>>>> +
> >>>>> +        dev->cvq->cq.virtio_net_hdr_mem = 0;
> >>>>> +    }
> >>>>> +
> >>>>> +    return 0;
> >>>>> +}
> >>>>> +
> >>>>> +static int
> >>>>> +virtio_vdpa_dma_map(struct virtio_vdpa_device *dev, int do_map)
> >>>>> +{
> >>>>> +    uint32_t i;
> >>>>> +    int ret;
> >>>>> +    struct rte_vhost_memory *mem = NULL;
> >>>>> +    int vfio_container_fd;
> >>>>> +    uint64_t avail_iova = 0;
> >>>>> +
> >>>>> +    ret = rte_vhost_get_mem_table(dev->vid, &mem);
> >>>>> +    if (ret < 0 || !mem) {
> >>>>> +        DRV_LOG(ERR, "failed to get VM memory layout.");
> >>>>> +        return ret;
> >>>>> +    }
> >>>>> +
> >>>>> +    vfio_container_fd = dev->vfio_container_fd;
> >>>>> +
> >>>>> +    for (i = 0; i < mem->nregions; i++) {
> >>>>> +        struct rte_vhost_mem_region *reg;
> >>>>> +
> >>>>> +        reg = &mem->regions[i];
> >>>>> +        DRV_LOG(INFO, "%s, region %u: HVA 0x%" PRIx64 ", "
> >>>>> +            "GPA 0x%" PRIx64 ", size 0x%" PRIx64 ".",
> >>>>> +            do_map ? "DMA map" : "DMA unmap", i,
> >>>>> +            reg->host_user_addr, reg->guest_phys_addr, reg->size);
> >>>>> +
> >>>>> +        if (reg->guest_phys_addr + reg->size > avail_iova)
> >>>>> +            avail_iova = reg->guest_phys_addr + reg->size;
> >>>>> +
> >>>>> +        if (do_map) {
> >>>>> +            ret = rte_vfio_container_dma_map(vfio_container_fd,
> >>>>> +                reg->host_user_addr, reg->guest_phys_addr,
> >>>>> +                reg->size);
> >>>>> +            if (ret < 0) {
> >>>>> +                DRV_LOG(ERR, "DMA map failed.");
> >>>>> +                goto exit;
> >>>>> +            }
> >>>>> +        } else {
> >>>>> +            ret = rte_vfio_container_dma_unmap(vfio_container_fd,
> >>>>> +                reg->host_user_addr, reg->guest_phys_addr,
> >>>>> +                reg->size);
> >>>>> +            if (ret < 0) {
> >>>>> +                DRV_LOG(ERR, "DMA unmap failed.");
> >>>>> +                goto exit;
> >>>>> +            }
> >>>>> +        }
> >>>>> +    }
> >>>>> +
> >>>>> +    if (dev->cvq)
> >>>>> +        ret = virtio_vdpa_dma_map_ctrl_queue(dev, do_map,
> >>>>> avail_iova);
> >>>>> +
> >>>>> +exit:
> >>>>> +    free(mem);
> >>>>> +
> >>>>> +    return ret;
> >>>>> +}
> >>>>> +
> >>>>>   static int
> >>>>>   virtio_vdpa_vfio_setup(struct virtio_vdpa_device *dev)
> >>>>>   {
> >>>>> @@ -216,10 +337,144 @@ virtio_vdpa_get_protocol_features(int did
> >>>>> __rte_unused, uint64_t *features)
> >>>>>       return 0;
> >>>>>   }
> >>>>>   +static uint64_t
> >>>>> +hva_to_gpa(int vid, uint64_t hva)
> >>>>> +{
> >>>>> +    struct rte_vhost_memory *mem = NULL;
> >>>>> +    struct rte_vhost_mem_region *reg;
> >>>>> +    uint32_t i;
> >>>>> +    uint64_t gpa = 0;
> >>>>> +
> >>>>> +    if (rte_vhost_get_mem_table(vid, &mem) < 0)
> >>>>> +        goto exit;
> >>>>> +
> >>>>> +    for (i = 0; i < mem->nregions; i++) {
> >>>>> +        reg = &mem->regions[i];
> >>>>> +
> >>>>> +        if (hva >= reg->host_user_addr &&
> >>>>> +                hva < reg->host_user_addr + reg->size) {
> >>>>> +            gpa = hva - reg->host_user_addr + reg->guest_phys_addr;
> >>>>> +            break;
> >>>>> +        }
> >>>>> +    }
> >>>>> +
> >>>>> +exit:
> >>>>> +    if (mem)
> >>>>> +        free(mem);
> >>>>> +    return gpa;
> >>>>> +}
> >>>>> +
> >>>>> +static int
> >>>>> +virtio_vdpa_start(struct virtio_vdpa_device *dev)
> >>>>> +{
> >>>>> +    struct virtio_hw *hw = &dev->hw;
> >>>>> +    int i, vid, nr_vring, ret;
> >>>>> +    struct rte_vhost_vring vr;
> >>>>> +    struct virtio_pmd_ctrl ctrl;
> >>>>> +    int dlen[1];
> >>>>> +
> >>>>> +    vid = dev->vid;
> >>>>> +    nr_vring = rte_vhost_get_vring_num(vid);
> >>>>> +
> >>>>> +    if (dev->vqs)
> >>>>> +        rte_free(dev->vqs);
> >>>>> +
> >>>>> +    dev->vqs = rte_zmalloc("virtio_vdpa", sizeof(*dev->vqs) *
> >>>>> nr_vring, 0);
> >>>>> +
> >>>>> +    for (i = 0; i < nr_vring; i++) {
> >>>>> +        struct virtqueue *vq = &dev->vqs[i];
> >>>>> +
> >>>>> +        rte_vhost_get_vhost_vring(vid, i, &vr);
> >>>>> +
> >>>>> +        vq->vq_queue_index = i;
> >>>>> +        vq->vq_nentries = vr.size;
> >>>>> +        vq->vq_ring_mem = hva_to_gpa(vid,
> >>>>> (uint64_t)(uintptr_t)vr.desc);
> >>>>> +        if (vq->vq_ring_mem  == 0) {
> >>>>> +            DRV_LOG(ERR, "Fail to get GPA for descriptor ring.");
> >>>>> +            ret = -1;
> >>>>> +            goto out_free_vqs;
> >>>>> +        }
> >>>>> +
> >>>>> +        ret = VTPCI_OPS(hw)->setup_queue(hw, vq);
> >>>>> +        if (ret) {
> >>>>> +            DRV_LOG(ERR, "Fail to setup queue.");
> >>>>> +            goto out_free_vqs;
> >>>>> +        }
> >>>>> +    }
> >>>>> +
> >>>>> +    if (dev->cvq) {
> >>>>> +        ret = VTPCI_OPS(hw)->setup_queue(hw, dev->cvq);
> >>>>> +        if (ret) {
> >>>>> +            DRV_LOG(ERR, "Fail to setup ctrl queue.");
> >>>>> +            goto out_free_vqs;
> >>>>> +        }
> >>>>> +    }
> >>>>> +
> >>>>> +    vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_DRIVER_OK);
> >>>>> +
> >>>>> +    if (!dev->cvq)
> >>>>> +        return 0;
> >>>>> +
> >>>>> +    ctrl.hdr.class = VIRTIO_NET_CTRL_MQ;
> >>>>> +    ctrl.hdr.cmd = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET;
> >>>>> +    memcpy(ctrl.data, &dev->max_queue_pairs, sizeof(uint16_t));
> >>>>> +
> >>>>> +    dlen[0] = sizeof(uint16_t);
> >>>>> +
> >>>>> +    ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
> >>>>> +    if (ret) {
> >>>>> +        DRV_LOG(ERR, "Multiqueue configured but send command "
> >>>>> +              "failed, this is too late now...");
> >>>>> +        ret = -EINVAL;
> >>>>> +        goto out_free_vqs;
> >>>>> +    }
> >>>>> +
> >>>>> +    return 0;
> >>>>> +out_free_vqs:
> >>>>> +    rte_free(dev->vqs);
> >>>>> +
> >>>>> +    return ret;
> >>>>> +}
> >>>>> +
> >>>>> +static int
> >>>>> +virtio_vdpa_dev_config(int vid)
> >>>>> +{
> >>>>> +    int did, ret;
> >>>>> +    struct internal_list *list;
> >>>>> +    struct virtio_vdpa_device *dev;
> >>>>> +
> >>>>> +    did = rte_vhost_get_vdpa_device_id(vid);
> >>>>> +    list = find_internal_resource_by_did(did);
> >>>>> +    if (list == NULL) {
> >>>>> +        DRV_LOG(ERR, "Invalid device id: %d", did);
> >>>>> +        return -1;
> >>>>> +    }
> >>>>> +
> >>>>> +    dev = list->dev;
> >>>>> +    dev->vid = vid;
> >>>>> +
> >>>>> +    rte_spinlock_lock(&dev->lock);
> >>>>> +
> >>>>> +    ret = virtio_vdpa_dma_map(dev, 1);
> >>>>> +    if (ret)
> >>>>> +        goto out_unlock;
> >>>>> +
> >>>>> +    ret = virtio_vdpa_start(dev);
> >>>>> +
> >>>>> +    if (rte_vhost_host_notifier_ctrl(vid, true) != 0)
> >>>>> +        DRV_LOG(NOTICE, "vDPA (%d): software relay is used.", did);
> >>>>> +
> >>>>> +out_unlock:
> >>>>> +    rte_spinlock_unlock(&dev->lock);
> >>>>> +
> >>>>> +    return ret;
> >>>>> +}
> >>>>> +
> >>>>>   static struct rte_vdpa_dev_ops virtio_vdpa_ops = {
> >>>>>       .get_queue_num = virtio_vdpa_get_queue_num,
> >>>>>       .get_features = virtio_vdpa_get_features,
> >>>>>       .get_protocol_features = virtio_vdpa_get_protocol_features,
> >>>>> +    .dev_conf = virtio_vdpa_dev_config,
> >>>>>   };
> >>>>>     static inline int
> >>>>> -- 
> >>>>> 2.21.0
> >>>>>

Reply via email to