> -----Original Message-----
> From: dev [mailto:dev-boun...@dpdk.org] On Behalf Of Maxime Coquelin
> Sent: Thursday, October 5, 2017 4:36 PM
> To: dev@dpdk.org; Horton, Remy <remy.hor...@intel.com>; Bie, Tiwei
> <tiwei....@intel.com>; y...@fridaylinux.org
> Cc: m...@redhat.com; jfrei...@redhat.com; vkapl...@redhat.com;
> jasow...@redhat.com; Maxime Coquelin <maxime.coque...@redhat.com>
> Subject: [dpdk-dev] [PATCH v3 17/19] vhost-user: iommu: postpone device
> creation until ring are mapped
> 
> Translating the start addresses of the rings is not enough, we need to
> be sure all the ring is made available by the guest.
> 
> It depends on the size of the rings, which is not known on SET_VRING_ADDR
> reception. Furthermore, we need to be be safe against vring pages
> invalidates.
> 
> This patch introduces a new access_ok flag per virtqueue, which is set
> when all the rings are mapped, and cleared as soon as a page used by a
> ring is invalidated. The invalidation part is implemented in a following
> patch.
> 
> Signed-off-by: Maxime Coquelin <maxime.coque...@redhat.com>
> ---
>  lib/librte_vhost/vhost.c      | 37 ++++++++++++++++++++++++++
>  lib/librte_vhost/vhost.h      |  2 ++
>  lib/librte_vhost/vhost_user.c | 62 +++++++++++++++++++++++++++++++--
> ----------
>  lib/librte_vhost/virtio_net.c | 60 +++++++++++++++++++++++++-------------
> ---
>  4 files changed, 121 insertions(+), 40 deletions(-)
> 
> diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
> index 0e2ad3322..ef54835a6 100644
> --- a/lib/librte_vhost/vhost.c
> +++ b/lib/librte_vhost/vhost.c
> @@ -135,6 +135,43 @@ free_device(struct virtio_net *dev)
>       rte_free(dev);
>  }
> 
> +int
> +vring_translate(struct virtio_net *dev, struct vhost_virtqueue *vq)
> +{
> +     uint64_t size;
> +
> +     if (!(dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)))
> +             goto out;
> +
> +     size = sizeof(struct vring_desc) * vq->size;
> +     vq->desc = (struct vring_desc *)vhost_iova_to_vva(dev, vq,
> +                                             vq-
> >ring_addrs.desc_user_addr,
> +                                             size, VHOST_ACCESS_RW);
> +     if (!vq->desc)
> +             return -1;
> +
> +     size = sizeof(struct vring_avail);
> +     size += sizeof(uint16_t) * vq->size;
> +     vq->avail = (struct vring_avail *)vhost_iova_to_vva(dev, vq,
> +                                             vq-
> >ring_addrs.avail_user_addr,
> +                                             size, VHOST_ACCESS_RW);
> +     if (!vq->avail)
> +             return -1;
> +
> +     size = sizeof(struct vring_used);
> +     size += sizeof(struct vring_used_elem) * vq->size;
> +     vq->used = (struct vring_used *)vhost_iova_to_vva(dev, vq,
> +                                             vq-
> >ring_addrs.used_user_addr,
> +                                             size, VHOST_ACCESS_RW);
> +     if (!vq->used)
> +             return -1;
> +
> +out:
> +     vq->access_ok = 1;
> +
> +     return 0;
> +}
> +
>  static void
>  init_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
>  {
> diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
> index 903da5db5..b3fe6bb8e 100644
> --- a/lib/librte_vhost/vhost.h
> +++ b/lib/librte_vhost/vhost.h
> @@ -113,6 +113,7 @@ struct vhost_virtqueue {
>       /* Currently unused as polling mode is enabled */
>       int                     kickfd;
>       int                     enabled;
> +     int                     access_ok;
> 
>       /* Physical address of used ring, for logging */
>       uint64_t                log_guest_addr;
> @@ -378,6 +379,7 @@ void vhost_backend_cleanup(struct virtio_net *dev);
> 
>  uint64_t __vhost_iova_to_vva(struct virtio_net *dev, struct
> vhost_virtqueue *vq,
>                       uint64_t iova, uint64_t size, uint8_t perm);
> +int vring_translate(struct virtio_net *dev, struct vhost_virtqueue *vq);
> 
>  static __rte_always_inline uint64_t
>  vhost_iova_to_vva(struct virtio_net *dev, struct vhost_virtqueue *vq,
> diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
> index 90b209764..dd6562fd8 100644
> --- a/lib/librte_vhost/vhost_user.c
> +++ b/lib/librte_vhost/vhost_user.c
> @@ -391,6 +391,12 @@ vhost_user_set_vring_addr(struct virtio_net *dev,
> VhostUserMsg *msg)
>        */
>       memcpy(&vq->ring_addrs, addr, sizeof(*addr));
> 
> +     vq->desc = NULL;
> +     vq->avail = NULL;
> +     vq->used = NULL;
> +
> +     vq->access_ok = 0;
> +
>       return 0;
>  }
> 
> @@ -407,10 +413,10 @@ static struct virtio_net
> *translate_ring_addresses(struct virtio_net *dev,
>       vq->desc = (struct vring_desc *)(uintptr_t)ring_addr_to_vva(dev,
>                       vq, addr->desc_user_addr, sizeof(struct vring_desc));
>       if (vq->desc == 0) {
> -             RTE_LOG(ERR, VHOST_CONFIG,
> +             RTE_LOG(DEBUG, VHOST_CONFIG,
>                       "(%d) failed to find desc ring address.\n",
>                       dev->vid);
> -             return NULL;
> +             return dev;
>       }
> 
>       dev = numa_realloc(dev, vq_index);
> @@ -419,19 +425,19 @@ static struct virtio_net
> *translate_ring_addresses(struct virtio_net *dev,
>       vq->avail = (struct vring_avail *)(uintptr_t)ring_addr_to_vva(dev,
>                       vq, addr->avail_user_addr, sizeof(struct vring_avail));
>       if (vq->avail == 0) {
> -             RTE_LOG(ERR, VHOST_CONFIG,
> +             RTE_LOG(DEBUG, VHOST_CONFIG,
>                       "(%d) failed to find avail ring address.\n",
>                       dev->vid);
> -             return NULL;
> +             return dev;
>       }
> 
>       vq->used = (struct vring_used *)(uintptr_t)ring_addr_to_vva(dev,
>                       vq, addr->used_user_addr, sizeof(struct
> vring_used));
>       if (vq->used == 0) {
> -             RTE_LOG(ERR, VHOST_CONFIG,
> +             RTE_LOG(DEBUG, VHOST_CONFIG,
>                       "(%d) failed to find used ring address.\n",
>                       dev->vid);
> -             return NULL;
> +             return dev;
>       }
> 
>       if (vq->last_used_idx != vq->used->idx) {
> @@ -677,7 +683,7 @@ vhost_user_set_mem_table(struct virtio_net *dev,
> struct VhostUserMsg *pmsg)
>  static int
>  vq_is_ready(struct vhost_virtqueue *vq)
>  {
> -     return vq && vq->desc   &&
> +     return vq && vq->desc && vq->avail && vq->used &&
>              vq->kickfd != VIRTIO_UNINITIALIZED_EVENTFD &&
>              vq->callfd != VIRTIO_UNINITIALIZED_EVENTFD;
>  }
> @@ -986,8 +992,29 @@ vhost_user_set_req_fd(struct virtio_net *dev,
> struct VhostUserMsg *msg)
>  }
> 
>  static int
> -vhost_user_iotlb_msg(struct virtio_net *dev, struct VhostUserMsg *msg)
> +is_vring_iotlb_update(struct vhost_virtqueue *vq, struct vhost_iotlb_msg
> *imsg)
>  {
> +     struct vhost_vring_addr *ra;
> +     uint64_t start, end;
> +
> +     start = imsg->iova;
> +     end = start + imsg->size;
> +
> +     ra = &vq->ring_addrs;
> +     if (ra->desc_user_addr >= start && ra->desc_user_addr < end)
> +             return 1;
> +     if (ra->avail_user_addr >= start && ra->avail_user_addr < end)
> +             return 1;
> +     if (ra->used_user_addr >= start && ra->used_user_addr < end)
> +             return 1;
> +
> +     return 0;
> +}
> +
> +static int
> +vhost_user_iotlb_msg(struct virtio_net **pdev, struct VhostUserMsg *msg)
> +{
> +     struct virtio_net *dev = *pdev;
>       struct vhost_iotlb_msg *imsg = &msg->payload.iotlb;
>       uint16_t i;
>       uint64_t vva;
> @@ -1003,6 +1030,9 @@ vhost_user_iotlb_msg(struct virtio_net *dev,
> struct VhostUserMsg *msg)
> 
>                       vhost_user_iotlb_cache_insert(vq, imsg->iova, vva,
>                                       imsg->size, imsg->perm);
> +
> +                     if (is_vring_iotlb_update(vq, imsg))
> +                             *pdev = dev = translate_ring_addresses(dev,
> i);
>               }
>               break;
>       case VHOST_IOTLB_INVALIDATE:
> @@ -1151,8 +1181,12 @@ vhost_user_msg_handler(int vid, int fd)
>       }
> 
>       ret = 0;
> -     RTE_LOG(INFO, VHOST_CONFIG, "read message %s\n",
> -             vhost_message_str[msg.request]);
> +     if (msg.request != VHOST_USER_IOTLB_MSG)
> +             RTE_LOG(INFO, VHOST_CONFIG, "read message %s\n",
> +                     vhost_message_str[msg.request]);
> +     else
> +             RTE_LOG(DEBUG, VHOST_CONFIG, "read message %s\n",
> +                     vhost_message_str[msg.request]);
> 
>       ret = vhost_user_check_and_alloc_queue_pair(dev, &msg);
>       if (ret < 0) {
> @@ -1254,7 +1288,7 @@ vhost_user_msg_handler(int vid, int fd)
>               break;
> 
>       case VHOST_USER_IOTLB_MSG:
> -             ret = vhost_user_iotlb_msg(dev, &msg);
> +             ret = vhost_user_iotlb_msg(&dev, &msg);
>               break;
> 
>       default:
> @@ -1263,12 +1297,6 @@ vhost_user_msg_handler(int vid, int fd)
> 
>       }
> 
> -     /*
> -      * The virtio_net struct might have been reallocated on a different
> -      * NUMA node, so dev pointer might no more be valid.
> -      */
> -     dev = get_device(vid);
> -
>       if (msg.flags & VHOST_USER_NEED_REPLY) {
>               msg.payload.u64 = !!ret;
>               msg.size = sizeof(msg.payload.u64);
> diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
> index cdfb6f957..b75c93cf1 100644
> --- a/lib/librte_vhost/virtio_net.c
> +++ b/lib/librte_vhost/virtio_net.c
> @@ -329,13 +329,23 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t
> queue_id,
>       if (unlikely(vq->enabled == 0))
>               return 0;
> 
> +     if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
> +             vhost_user_iotlb_rd_lock(vq);
> +
> +     if (unlikely(vq->access_ok == 0)) {
> +             if (unlikely(vring_translate(dev, vq) < 0)) {
> +                     count = 0;
> +                     goto out;
> +             }
> +     }
> +
>       avail_idx = *((volatile uint16_t *)&vq->avail->idx);
>       start_idx = vq->last_used_idx;
>       free_entries = avail_idx - start_idx;
>       count = RTE_MIN(count, free_entries);
>       count = RTE_MIN(count, (uint32_t)MAX_PKT_BURST);
>       if (count == 0)
> -             return 0;
> +             goto out;
> 
>       LOG_DEBUG(VHOST_DATA, "(%d) start_idx %d | end_idx %d\n",
>               dev->vid, start_idx, start_idx + count);
> @@ -356,10 +366,6 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t
> queue_id,
>       }
> 
>       rte_prefetch0(&vq->desc[desc_indexes[0]]);
> -
> -     if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
> -             vhost_user_iotlb_rd_lock(vq);
> -
>       for (i = 0; i < count; i++) {
>               uint16_t desc_idx = desc_indexes[i];
>               int err;
> @@ -394,9 +400,6 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t
> queue_id,
> 
>       do_data_copy_enqueue(dev, vq);
> 
> -     if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
> -             vhost_user_iotlb_rd_unlock(vq);
> -
>       rte_smp_wmb();
> 
>       *(volatile uint16_t *)&vq->used->idx += count;
> @@ -412,6 +415,10 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t
> queue_id,
>       if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)
>                       && (vq->callfd >= 0))
>               eventfd_write(vq->callfd, (eventfd_t)1);
> +out:
> +     if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
> +             vhost_user_iotlb_rd_unlock(vq);
> +
>       return count;
>  }
> 
> @@ -647,9 +654,16 @@ virtio_dev_merge_rx(struct virtio_net *dev,
> uint16_t queue_id,
>       if (unlikely(vq->enabled == 0))
>               return 0;
> 
> +     if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
> +             vhost_user_iotlb_rd_lock(vq);
> +
> +     if (unlikely(vq->access_ok == 0))
> +             if (unlikely(vring_translate(dev, vq) < 0))
> +                     goto out;
> +
>       count = RTE_MIN((uint32_t)MAX_PKT_BURST, count);
>       if (count == 0)
> -             return 0;
> +             goto out;
> 
>       vq->batch_copy_nb_elems = 0;
> 
> @@ -657,10 +671,6 @@ virtio_dev_merge_rx(struct virtio_net *dev,
> uint16_t queue_id,
> 
>       vq->shadow_used_idx = 0;
>       avail_head = *((volatile uint16_t *)&vq->avail->idx);
> -
> -     if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
> -             vhost_user_iotlb_rd_lock(vq);
> -
>       for (pkt_idx = 0; pkt_idx < count; pkt_idx++) {
>               uint32_t pkt_len = pkts[pkt_idx]->pkt_len + dev-
> >vhost_hlen;
> 
> @@ -689,9 +699,6 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t
> queue_id,
> 
>       do_data_copy_enqueue(dev, vq);
> 
> -     if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
> -             vhost_user_iotlb_rd_unlock(vq);
> -
>       if (likely(vq->shadow_used_idx)) {
>               flush_shadow_used_ring(dev, vq);
> 
> @@ -704,6 +711,10 @@ virtio_dev_merge_rx(struct virtio_net *dev,
> uint16_t queue_id,
>                       eventfd_write(vq->callfd, (eventfd_t)1);
>       }
> 
> +out:
> +     if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
> +             vhost_user_iotlb_rd_unlock(vq);
> +
>       return pkt_idx;
>  }
> 
> @@ -1173,6 +1184,13 @@ rte_vhost_dequeue_burst(int vid, uint16_t
> queue_id,
> 
>       vq->batch_copy_nb_elems = 0;
> 
> +     if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
> +             vhost_user_iotlb_rd_lock(vq);
> +
> +     if (unlikely(vq->access_ok == 0))
> +             if (unlikely(vring_translate(dev, vq) < 0))
> +                     goto out;
> +
>       if (unlikely(dev->dequeue_zero_copy)) {
>               struct zcopy_mbuf *zmbuf, *next;
>               int nr_updated = 0;
> @@ -1262,10 +1280,6 @@ rte_vhost_dequeue_burst(int vid, uint16_t
> queue_id,
> 
>       /* Prefetch descriptor index. */
>       rte_prefetch0(&vq->desc[desc_indexes[0]]);
> -
> -     if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
> -             vhost_user_iotlb_rd_lock(vq);
> -
>       for (i = 0; i < count; i++) {
>               struct vring_desc *desc;
>               uint16_t sz, idx;
> @@ -1329,9 +1343,6 @@ rte_vhost_dequeue_burst(int vid, uint16_t
> queue_id,
>                       TAILQ_INSERT_TAIL(&vq->zmbuf_list, zmbuf, next);
>               }
>       }
> -     if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
> -             vhost_user_iotlb_rd_unlock(vq);
> -
>       vq->last_avail_idx += i;
> 
>       if (likely(dev->dequeue_zero_copy == 0)) {
> @@ -1341,6 +1352,9 @@ rte_vhost_dequeue_burst(int vid, uint16_t
> queue_id,
>       }
> 
>  out:
> +     if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
> +             vhost_user_iotlb_rd_unlock(vq);
> +
>       if (unlikely(rarp_mbuf != NULL)) {
>               /*
>                * Inject it to the head of "pkts" array, so that switch's mac
> --
> 2.13.6
Hi, Maxime

I met one issue with your patch set during the v17.11 test.
The test scenario is following, 
1.      Bind one NIC, use test-pmd set vhost-user with 2 queue
usertools/dpdk-devbind.py --bind=igb_uio 0000:05:00.0
./x86_64-native-linuxapp-gcc/app/testpmd -c 0xe -n 4 --socket-mem 1024,1024 \
--vdev 'net_vhost0,iface=vhost-net,queues=2' - -i --rxq=2 --txq=2 --nb-cores=2 
--rss-ip
2.      Launch qemu with  virtio device which has 2 queue 
3.      In VM, launch testpmd with virtio-pmd using only 1 queue.
x86_64-native-linuxapp-gcc/app/testpmd -c 0x07 -n 3 - -i --txqflags=0xf01 \
--rxq=1 --txq=1 --rss-ip --nb-cores=1

First, 
commit 09927b5249694bad1c094d3068124673722e6b8f
vhost: translate ring addresses when IOMMU enabled
The patch causes no traffic in PVP test. but link status is still up in 
vhost-user.

Second, 
eefac9536a901a1f0bb52aa3b6fec8f375f09190 
vhost: postpone device creation until rings are mapped
The patch causes link status "down" in vhost-user.

Could you have a check at your side? Thanks.

BRs
Lei

Reply via email to