On Wed, Nov 28, 2018 at 05:46:00PM +0800, Xiao Wang wrote:
[...]
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change without prior notice
> + *
> + * Synchronize the available ring from guest to mediate ring, help to
> + * check desc validity to protect against malicious guest driver.
> + *
> + * @param vid
> + *  vhost device id
> + * @param qid
> + *  vhost queue id
> + * @param m_vring
> + *  mediate virtio ring pointer
> + * @return
> + *  number of synced available entries on success, -1 on failure
> + */
> +int __rte_experimental
> +rte_vdpa_relay_avail_ring(int vid, int qid, struct vring *m_vring);
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change without prior notice
> + *
> + * Synchronize the used ring from mediate ring to guest, log dirty
> + * page for each Rx buffer used.
> + *
> + * @param vid
> + *  vhost device id
> + * @param qid
> + *  vhost queue id
> + * @param m_vring
> + *  mediate virtio ring pointer
> + * @return
> + *  number of synced used entries on success, -1 on failure
> + */
> +int __rte_experimental
> +rte_vdpa_relay_used_ring(int vid, int qid, struct vring *m_vring);

Above APIs are split ring specific. We also need to take
packed ring into consideration.

>  #endif /* _RTE_VDPA_H_ */
[...]
> diff --git a/lib/librte_vhost/vdpa.c b/lib/librte_vhost/vdpa.c
> index e7d849ee0..e41117776 100644
> --- a/lib/librte_vhost/vdpa.c
> +++ b/lib/librte_vhost/vdpa.c
> @@ -122,3 +122,176 @@ rte_vdpa_get_device_num(void)
>  {
>       return vdpa_device_num;
>  }
> +
> +static int
> +invalid_desc_check(struct virtio_net *dev, struct vhost_virtqueue *vq,
> +             uint64_t desc_iova, uint64_t desc_len, uint8_t perm)
> +{
> +     uint64_t desc_addr, desc_chunck_len;
> +
> +     while (desc_len) {
> +             desc_chunck_len = desc_len;
> +             desc_addr = vhost_iova_to_vva(dev, vq,
> +                             desc_iova,
> +                             &desc_chunck_len,
> +                             perm);
> +
> +             if (!desc_addr)
> +                     return -1;
> +
> +             desc_len -= desc_chunck_len;
> +             desc_iova += desc_chunck_len;
> +     }
> +
> +     return 0;
> +}
> +
> +int
> +rte_vdpa_relay_avail_ring(int vid, int qid, struct vring *m_vring)
> +{
> +     struct virtio_net *dev = get_device(vid);
> +     uint16_t idx, idx_m, desc_id;
> +     struct vring_desc desc;
> +     struct vhost_virtqueue *vq;
> +     struct vring_desc *desc_ring;
> +     struct vring_desc *idesc = NULL;
> +     uint64_t dlen;
> +     int ret;
> +
> +     if (!dev)
> +             return -1;
> +
> +     vq = dev->virtqueue[qid];

Better to also validate qid.

> +     idx = vq->avail->idx;
> +     idx_m = m_vring->avail->idx;
> +     ret = idx - idx_m;

Need to cast (idx - idx_m) to uint16_t.

> +
> +     while (idx_m != idx) {
> +             /* avail entry copy */
> +             desc_id = vq->avail->ring[idx_m % vq->size];

idx_m & (vq->size - 1) should be faster.

> +             m_vring->avail->ring[idx_m % vq->size] = desc_id;
> +             desc_ring = vq->desc;
> +
> +             if (vq->desc[desc_id].flags & VRING_DESC_F_INDIRECT) {
> +                     dlen = vq->desc[desc_id].len;
> +                     desc_ring = (struct vring_desc *)(uintptr_t)
> +                     vhost_iova_to_vva(dev, vq, vq->desc[desc_id].addr,

The indent needs to be fixed.

> +                                             &dlen,
> +                                             VHOST_ACCESS_RO);
> +                     if (unlikely(!desc_ring))
> +                             return -1;
> +
> +                     if (unlikely(dlen < vq->desc[idx].len)) {
> +                             idesc = alloc_copy_ind_table(dev, vq,
> +                                     vq->desc[idx].addr, vq->desc[idx].len);
> +                             if (unlikely(!idesc))
> +                                     return -1;
> +
> +                             desc_ring = idesc;
> +                     }
> +
> +                     desc_id = 0;
> +             }
> +
> +             /* check if the buf addr is within the guest memory */
> +             do {
> +                     desc = desc_ring[desc_id];
> +                     if (invalid_desc_check(dev, vq, desc.addr, desc.len,
> +                                             VHOST_ACCESS_RW))

Should check with < 0, otherwise should return bool.

We may just have RO access.

> +                             return -1;

The memory allocated for idesc if any will leak in this case.

> +                     desc_id = desc.next;
> +             } while (desc.flags & VRING_DESC_F_NEXT);
> +
> +             if (unlikely(!!idesc)) {

The !! isn't needed.

> +                     free_ind_table(idesc);
> +                     idesc = NULL;
> +             }
> +
> +             idx_m++;
> +     }
> +

Barrier is needed here.

> +     m_vring->avail->idx = idx;
> +
> +     if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))
> +             vhost_avail_event(vq) = vq->avail->idx;

Need to use idx instead of vq->avail->idx which may
have already been changed by driver.

> +
> +     return ret;
> +}
> +
> +int
> +rte_vdpa_relay_used_ring(int vid, int qid, struct vring *m_vring)
> +{
> +     struct virtio_net *dev = get_device(vid);
> +     uint16_t idx, idx_m, desc_id;
> +     struct vhost_virtqueue *vq;
> +     struct vring_desc desc;
> +     struct vring_desc *desc_ring;
> +     struct vring_desc *idesc = NULL;
> +     uint64_t dlen;
> +     int ret;
> +
> +     if (!dev)
> +             return -1;
> +
> +     vq = dev->virtqueue[qid];

Better to also validate qid.

> +     idx = vq->used->idx;
> +     idx_m = m_vring->used->idx;
> +     ret = idx_m - idx;

Need to cast (idx_m - idx) to uint16_t.

> +
> +     while (idx != idx_m) {
> +             /* copy used entry, used ring logging is not covered here */

The used ring logging has been covered here by the following call
to vhost_log_used_vring() after used ring is changed.

> +             vq->used->ring[idx % vq->size] =

idx & (vq->size - 1) should be faster.

> +                     m_vring->used->ring[idx % vq->size];
> +
> +             /* dirty page logging for used ring */
> +             vhost_log_used_vring(dev, vq,
> +                     offsetof(struct vring_used, ring[idx % vq->size]),
> +                     sizeof(struct vring_used_elem));
> +
> +             desc_id = vq->used->ring[idx % vq->size].id;
> +             desc_ring = vq->desc;
> +
> +             if (vq->desc[desc_id].flags & VRING_DESC_F_INDIRECT) {
> +                     dlen = vq->desc[desc_id].len;
> +                     desc_ring = (struct vring_desc *)(uintptr_t)
> +                     vhost_iova_to_vva(dev, vq, vq->desc[desc_id].addr,

The indent needs to be fixed.

> +                                             &dlen,
> +                                             VHOST_ACCESS_RO);
> +                     if (unlikely(!desc_ring))
> +                             return -1;
> +
> +                     if (unlikely(dlen < vq->desc[idx].len)) {
> +                             idesc = alloc_copy_ind_table(dev, vq,
> +                                     vq->desc[idx].addr, vq->desc[idx].len);
> +                             if (unlikely(!idesc))
> +                                     return -1;
> +
> +                             desc_ring = idesc;
> +                     }
> +
> +                     desc_id = 0;
> +             }
> +
> +             /* dirty page logging for Rx buffer */

Rx is for net, this API isn't net specific.

> +             do {
> +                     desc = desc_ring[desc_id];
> +                     if (desc.flags & VRING_DESC_F_WRITE)
> +                             vhost_log_write(dev, desc.addr, desc.len);
> +                     desc_id = desc.next;
> +             } while (desc.flags & VRING_DESC_F_NEXT);
> +
> +             if (unlikely(!!idesc)) {

The !! isn't needed.

> +                     free_ind_table(idesc);
> +                     idesc = NULL;
> +             }
> +
> +             idx++;
> +     }
> +

Barrier is needed here.

> +     vq->used->idx = idx_m;
> +
> +     if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))
> +             vring_used_event(m_vring) = m_vring->used->idx;
> +
> +     return ret;
> +}
[...]

Reply via email to