> -----Original Message----- > From: dev [mailto:dev-boun...@dpdk.org] On Behalf Of Maxime Coquelin > Sent: Thursday, October 5, 2017 4:36 PM > To: dev@dpdk.org; Horton, Remy <remy.hor...@intel.com>; Bie, Tiwei > <tiwei....@intel.com>; y...@fridaylinux.org > Cc: m...@redhat.com; jfrei...@redhat.com; vkapl...@redhat.com; > jasow...@redhat.com; Maxime Coquelin <maxime.coque...@redhat.com> > Subject: [dpdk-dev] [PATCH v3 17/19] vhost-user: iommu: postpone device > creation until ring are mapped > > Translating the start addresses of the rings is not enough, we need to > be sure all the ring is made available by the guest. > > It depends on the size of the rings, which is not known on SET_VRING_ADDR > reception. Furthermore, we need to be be safe against vring pages > invalidates. > > This patch introduces a new access_ok flag per virtqueue, which is set > when all the rings are mapped, and cleared as soon as a page used by a > ring is invalidated. The invalidation part is implemented in a following > patch. > > Signed-off-by: Maxime Coquelin <maxime.coque...@redhat.com> > --- > lib/librte_vhost/vhost.c | 37 ++++++++++++++++++++++++++ > lib/librte_vhost/vhost.h | 2 ++ > lib/librte_vhost/vhost_user.c | 62 +++++++++++++++++++++++++++++++-- > ---------- > lib/librte_vhost/virtio_net.c | 60 +++++++++++++++++++++++++------------- > --- > 4 files changed, 121 insertions(+), 40 deletions(-) > > diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c > index 0e2ad3322..ef54835a6 100644 > --- a/lib/librte_vhost/vhost.c > +++ b/lib/librte_vhost/vhost.c > @@ -135,6 +135,43 @@ free_device(struct virtio_net *dev) > rte_free(dev); > } > > +int > +vring_translate(struct virtio_net *dev, struct vhost_virtqueue *vq) > +{ > + uint64_t size; > + > + if (!(dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))) > + goto out; > + > + size = sizeof(struct vring_desc) * vq->size; > + vq->desc = (struct vring_desc *)vhost_iova_to_vva(dev, vq, > + vq- > >ring_addrs.desc_user_addr, > + size, VHOST_ACCESS_RW); > + if (!vq->desc) > + return -1; > + > + size = sizeof(struct vring_avail); > + size += sizeof(uint16_t) * vq->size; > + vq->avail = (struct vring_avail *)vhost_iova_to_vva(dev, vq, > + vq- > >ring_addrs.avail_user_addr, > + size, VHOST_ACCESS_RW); > + if (!vq->avail) > + return -1; > + > + size = sizeof(struct vring_used); > + size += sizeof(struct vring_used_elem) * vq->size; > + vq->used = (struct vring_used *)vhost_iova_to_vva(dev, vq, > + vq- > >ring_addrs.used_user_addr, > + size, VHOST_ACCESS_RW); > + if (!vq->used) > + return -1; > + > +out: > + vq->access_ok = 1; > + > + return 0; > +} > + > static void > init_vring_queue(struct virtio_net *dev, uint32_t vring_idx) > { > diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h > index 903da5db5..b3fe6bb8e 100644 > --- a/lib/librte_vhost/vhost.h > +++ b/lib/librte_vhost/vhost.h > @@ -113,6 +113,7 @@ struct vhost_virtqueue { > /* Currently unused as polling mode is enabled */ > int kickfd; > int enabled; > + int access_ok; > > /* Physical address of used ring, for logging */ > uint64_t log_guest_addr; > @@ -378,6 +379,7 @@ void vhost_backend_cleanup(struct virtio_net *dev); > > uint64_t __vhost_iova_to_vva(struct virtio_net *dev, struct > vhost_virtqueue *vq, > uint64_t iova, uint64_t size, uint8_t perm); > +int vring_translate(struct virtio_net *dev, struct vhost_virtqueue *vq); > > static __rte_always_inline uint64_t > vhost_iova_to_vva(struct virtio_net *dev, struct vhost_virtqueue *vq, > diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c > index 90b209764..dd6562fd8 100644 > --- a/lib/librte_vhost/vhost_user.c > +++ b/lib/librte_vhost/vhost_user.c > @@ -391,6 +391,12 @@ vhost_user_set_vring_addr(struct virtio_net *dev, > VhostUserMsg *msg) > */ > memcpy(&vq->ring_addrs, addr, sizeof(*addr)); > > + vq->desc = NULL; > + vq->avail = NULL; > + vq->used = NULL; > + > + vq->access_ok = 0; > + > return 0; > } > > @@ -407,10 +413,10 @@ static struct virtio_net > *translate_ring_addresses(struct virtio_net *dev, > vq->desc = (struct vring_desc *)(uintptr_t)ring_addr_to_vva(dev, > vq, addr->desc_user_addr, sizeof(struct vring_desc)); > if (vq->desc == 0) { > - RTE_LOG(ERR, VHOST_CONFIG, > + RTE_LOG(DEBUG, VHOST_CONFIG, > "(%d) failed to find desc ring address.\n", > dev->vid); > - return NULL; > + return dev; > } > > dev = numa_realloc(dev, vq_index); > @@ -419,19 +425,19 @@ static struct virtio_net > *translate_ring_addresses(struct virtio_net *dev, > vq->avail = (struct vring_avail *)(uintptr_t)ring_addr_to_vva(dev, > vq, addr->avail_user_addr, sizeof(struct vring_avail)); > if (vq->avail == 0) { > - RTE_LOG(ERR, VHOST_CONFIG, > + RTE_LOG(DEBUG, VHOST_CONFIG, > "(%d) failed to find avail ring address.\n", > dev->vid); > - return NULL; > + return dev; > } > > vq->used = (struct vring_used *)(uintptr_t)ring_addr_to_vva(dev, > vq, addr->used_user_addr, sizeof(struct > vring_used)); > if (vq->used == 0) { > - RTE_LOG(ERR, VHOST_CONFIG, > + RTE_LOG(DEBUG, VHOST_CONFIG, > "(%d) failed to find used ring address.\n", > dev->vid); > - return NULL; > + return dev; > } > > if (vq->last_used_idx != vq->used->idx) { > @@ -677,7 +683,7 @@ vhost_user_set_mem_table(struct virtio_net *dev, > struct VhostUserMsg *pmsg) > static int > vq_is_ready(struct vhost_virtqueue *vq) > { > - return vq && vq->desc && > + return vq && vq->desc && vq->avail && vq->used && > vq->kickfd != VIRTIO_UNINITIALIZED_EVENTFD && > vq->callfd != VIRTIO_UNINITIALIZED_EVENTFD; > } > @@ -986,8 +992,29 @@ vhost_user_set_req_fd(struct virtio_net *dev, > struct VhostUserMsg *msg) > } > > static int > -vhost_user_iotlb_msg(struct virtio_net *dev, struct VhostUserMsg *msg) > +is_vring_iotlb_update(struct vhost_virtqueue *vq, struct vhost_iotlb_msg > *imsg) > { > + struct vhost_vring_addr *ra; > + uint64_t start, end; > + > + start = imsg->iova; > + end = start + imsg->size; > + > + ra = &vq->ring_addrs; > + if (ra->desc_user_addr >= start && ra->desc_user_addr < end) > + return 1; > + if (ra->avail_user_addr >= start && ra->avail_user_addr < end) > + return 1; > + if (ra->used_user_addr >= start && ra->used_user_addr < end) > + return 1; > + > + return 0; > +} > + > +static int > +vhost_user_iotlb_msg(struct virtio_net **pdev, struct VhostUserMsg *msg) > +{ > + struct virtio_net *dev = *pdev; > struct vhost_iotlb_msg *imsg = &msg->payload.iotlb; > uint16_t i; > uint64_t vva; > @@ -1003,6 +1030,9 @@ vhost_user_iotlb_msg(struct virtio_net *dev, > struct VhostUserMsg *msg) > > vhost_user_iotlb_cache_insert(vq, imsg->iova, vva, > imsg->size, imsg->perm); > + > + if (is_vring_iotlb_update(vq, imsg)) > + *pdev = dev = translate_ring_addresses(dev, > i); > } > break; > case VHOST_IOTLB_INVALIDATE: > @@ -1151,8 +1181,12 @@ vhost_user_msg_handler(int vid, int fd) > } > > ret = 0; > - RTE_LOG(INFO, VHOST_CONFIG, "read message %s\n", > - vhost_message_str[msg.request]); > + if (msg.request != VHOST_USER_IOTLB_MSG) > + RTE_LOG(INFO, VHOST_CONFIG, "read message %s\n", > + vhost_message_str[msg.request]); > + else > + RTE_LOG(DEBUG, VHOST_CONFIG, "read message %s\n", > + vhost_message_str[msg.request]); > > ret = vhost_user_check_and_alloc_queue_pair(dev, &msg); > if (ret < 0) { > @@ -1254,7 +1288,7 @@ vhost_user_msg_handler(int vid, int fd) > break; > > case VHOST_USER_IOTLB_MSG: > - ret = vhost_user_iotlb_msg(dev, &msg); > + ret = vhost_user_iotlb_msg(&dev, &msg); > break; > > default: > @@ -1263,12 +1297,6 @@ vhost_user_msg_handler(int vid, int fd) > > } > > - /* > - * The virtio_net struct might have been reallocated on a different > - * NUMA node, so dev pointer might no more be valid. > - */ > - dev = get_device(vid); > - > if (msg.flags & VHOST_USER_NEED_REPLY) { > msg.payload.u64 = !!ret; > msg.size = sizeof(msg.payload.u64); > diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c > index cdfb6f957..b75c93cf1 100644 > --- a/lib/librte_vhost/virtio_net.c > +++ b/lib/librte_vhost/virtio_net.c > @@ -329,13 +329,23 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t > queue_id, > if (unlikely(vq->enabled == 0)) > return 0; > > + if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) > + vhost_user_iotlb_rd_lock(vq); > + > + if (unlikely(vq->access_ok == 0)) { > + if (unlikely(vring_translate(dev, vq) < 0)) { > + count = 0; > + goto out; > + } > + } > + > avail_idx = *((volatile uint16_t *)&vq->avail->idx); > start_idx = vq->last_used_idx; > free_entries = avail_idx - start_idx; > count = RTE_MIN(count, free_entries); > count = RTE_MIN(count, (uint32_t)MAX_PKT_BURST); > if (count == 0) > - return 0; > + goto out; > > LOG_DEBUG(VHOST_DATA, "(%d) start_idx %d | end_idx %d\n", > dev->vid, start_idx, start_idx + count); > @@ -356,10 +366,6 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t > queue_id, > } > > rte_prefetch0(&vq->desc[desc_indexes[0]]); > - > - if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) > - vhost_user_iotlb_rd_lock(vq); > - > for (i = 0; i < count; i++) { > uint16_t desc_idx = desc_indexes[i]; > int err; > @@ -394,9 +400,6 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t > queue_id, > > do_data_copy_enqueue(dev, vq); > > - if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) > - vhost_user_iotlb_rd_unlock(vq); > - > rte_smp_wmb(); > > *(volatile uint16_t *)&vq->used->idx += count; > @@ -412,6 +415,10 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t > queue_id, > if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT) > && (vq->callfd >= 0)) > eventfd_write(vq->callfd, (eventfd_t)1); > +out: > + if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) > + vhost_user_iotlb_rd_unlock(vq); > + > return count; > } > > @@ -647,9 +654,16 @@ virtio_dev_merge_rx(struct virtio_net *dev, > uint16_t queue_id, > if (unlikely(vq->enabled == 0)) > return 0; > > + if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) > + vhost_user_iotlb_rd_lock(vq); > + > + if (unlikely(vq->access_ok == 0)) > + if (unlikely(vring_translate(dev, vq) < 0)) > + goto out; > + > count = RTE_MIN((uint32_t)MAX_PKT_BURST, count); > if (count == 0) > - return 0; > + goto out; > > vq->batch_copy_nb_elems = 0; > > @@ -657,10 +671,6 @@ virtio_dev_merge_rx(struct virtio_net *dev, > uint16_t queue_id, > > vq->shadow_used_idx = 0; > avail_head = *((volatile uint16_t *)&vq->avail->idx); > - > - if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) > - vhost_user_iotlb_rd_lock(vq); > - > for (pkt_idx = 0; pkt_idx < count; pkt_idx++) { > uint32_t pkt_len = pkts[pkt_idx]->pkt_len + dev- > >vhost_hlen; > > @@ -689,9 +699,6 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t > queue_id, > > do_data_copy_enqueue(dev, vq); > > - if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) > - vhost_user_iotlb_rd_unlock(vq); > - > if (likely(vq->shadow_used_idx)) { > flush_shadow_used_ring(dev, vq); > > @@ -704,6 +711,10 @@ virtio_dev_merge_rx(struct virtio_net *dev, > uint16_t queue_id, > eventfd_write(vq->callfd, (eventfd_t)1); > } > > +out: > + if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) > + vhost_user_iotlb_rd_unlock(vq); > + > return pkt_idx; > } > > @@ -1173,6 +1184,13 @@ rte_vhost_dequeue_burst(int vid, uint16_t > queue_id, > > vq->batch_copy_nb_elems = 0; > > + if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) > + vhost_user_iotlb_rd_lock(vq); > + > + if (unlikely(vq->access_ok == 0)) > + if (unlikely(vring_translate(dev, vq) < 0)) > + goto out; > + > if (unlikely(dev->dequeue_zero_copy)) { > struct zcopy_mbuf *zmbuf, *next; > int nr_updated = 0; > @@ -1262,10 +1280,6 @@ rte_vhost_dequeue_burst(int vid, uint16_t > queue_id, > > /* Prefetch descriptor index. */ > rte_prefetch0(&vq->desc[desc_indexes[0]]); > - > - if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) > - vhost_user_iotlb_rd_lock(vq); > - > for (i = 0; i < count; i++) { > struct vring_desc *desc; > uint16_t sz, idx; > @@ -1329,9 +1343,6 @@ rte_vhost_dequeue_burst(int vid, uint16_t > queue_id, > TAILQ_INSERT_TAIL(&vq->zmbuf_list, zmbuf, next); > } > } > - if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) > - vhost_user_iotlb_rd_unlock(vq); > - > vq->last_avail_idx += i; > > if (likely(dev->dequeue_zero_copy == 0)) { > @@ -1341,6 +1352,9 @@ rte_vhost_dequeue_burst(int vid, uint16_t > queue_id, > } > > out: > + if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) > + vhost_user_iotlb_rd_unlock(vq); > + > if (unlikely(rarp_mbuf != NULL)) { > /* > * Inject it to the head of "pkts" array, so that switch's mac > -- > 2.13.6 Hi, Maxime
I met one issue with your patch set during the v17.11 test. The test scenario is following, 1. Bind one NIC, use test-pmd set vhost-user with 2 queue usertools/dpdk-devbind.py --bind=igb_uio 0000:05:00.0 ./x86_64-native-linuxapp-gcc/app/testpmd -c 0xe -n 4 --socket-mem 1024,1024 \ --vdev 'net_vhost0,iface=vhost-net,queues=2' - -i --rxq=2 --txq=2 --nb-cores=2 --rss-ip 2. Launch qemu with virtio device which has 2 queue 3. In VM, launch testpmd with virtio-pmd using only 1 queue. x86_64-native-linuxapp-gcc/app/testpmd -c 0x07 -n 3 - -i --txqflags=0xf01 \ --rxq=1 --txq=1 --rss-ip --nb-cores=1 First, commit 09927b5249694bad1c094d3068124673722e6b8f vhost: translate ring addresses when IOMMU enabled The patch causes no traffic in PVP test. but link status is still up in vhost-user. Second, eefac9536a901a1f0bb52aa3b6fec8f375f09190 vhost: postpone device creation until rings are mapped The patch causes link status "down" in vhost-user. Could you have a check at your side? Thanks. BRs Lei