From: Maxime Coquelin
> On 1/29/20 11:09 AM, Matan Azrad wrote:
> > The HW virtq object represents an emulated context for a VIRTIO_NET
> > virtqueue which was created and managed by a VIRTIO_NET driver as
> > defined in VIRTIO Specification.
> >
> > Add support to prepare and release all the basic HW resources needed
> > the user virtqs emulation according to the rte_vhost configurations.
> >
> > This patch prepares the basic configurations needed by DevX commands
> > to create a virtq.
> >
> > Add new file mlx5_vdpa_virtq.c to manage virtq operations.
> >
> > Signed-off-by: Matan Azrad <ma...@mellanox.com>
> > Acked-by: Viacheslav Ovsiienko <viachesl...@mellanox.com>
> > ---
> > drivers/vdpa/mlx5/Makefile | 1 +
> > drivers/vdpa/mlx5/meson.build | 1 +
> > drivers/vdpa/mlx5/mlx5_vdpa.c | 1 +
> > drivers/vdpa/mlx5/mlx5_vdpa.h | 36 ++++++
> > drivers/vdpa/mlx5/mlx5_vdpa_virtq.c | 212
> > ++++++++++++++++++++++++++++++++++++
> > 5 files changed, 251 insertions(+)
> > create mode 100644 drivers/vdpa/mlx5/mlx5_vdpa_virtq.c
> >
> > diff --git a/drivers/vdpa/mlx5/Makefile b/drivers/vdpa/mlx5/Makefile
> > index 7f13756..353e262 100644
> > --- a/drivers/vdpa/mlx5/Makefile
> > +++ b/drivers/vdpa/mlx5/Makefile
> > @@ -10,6 +10,7 @@ LIB = librte_pmd_mlx5_vdpa.a
> > SRCS-$(CONFIG_RTE_LIBRTE_MLX5_VDPA_PMD) += mlx5_vdpa.c
> > SRCS-$(CONFIG_RTE_LIBRTE_MLX5_VDPA_PMD) += mlx5_vdpa_mem.c
> > SRCS-$(CONFIG_RTE_LIBRTE_MLX5_VDPA_PMD) += mlx5_vdpa_event.c
> > +SRCS-$(CONFIG_RTE_LIBRTE_MLX5_VDPA_PMD) += mlx5_vdpa_virtq.c
> >
> > # Basic CFLAGS.
> > CFLAGS += -O3
> > diff --git a/drivers/vdpa/mlx5/meson.build
> > b/drivers/vdpa/mlx5/meson.build index c609f7c..e017f95 100644
> > --- a/drivers/vdpa/mlx5/meson.build
> > +++ b/drivers/vdpa/mlx5/meson.build
> > @@ -14,6 +14,7 @@ sources = files(
> > 'mlx5_vdpa.c',
> > 'mlx5_vdpa_mem.c',
> > 'mlx5_vdpa_event.c',
> > + 'mlx5_vdpa_virtq.c',
> > )
> > cflags_options = [
> > '-std=c11',
> > diff --git a/drivers/vdpa/mlx5/mlx5_vdpa.c
> > b/drivers/vdpa/mlx5/mlx5_vdpa.c index c67f93d..4d30b35 100644
> > --- a/drivers/vdpa/mlx5/mlx5_vdpa.c
> > +++ b/drivers/vdpa/mlx5/mlx5_vdpa.c
> > @@ -229,6 +229,7 @@
> > goto error;
> > }
> > SLIST_INIT(&priv->mr_list);
> > + SLIST_INIT(&priv->virtq_list);
> > pthread_mutex_lock(&priv_list_lock);
> > TAILQ_INSERT_TAIL(&priv_list, priv, next);
> > pthread_mutex_unlock(&priv_list_lock);
> > diff --git a/drivers/vdpa/mlx5/mlx5_vdpa.h
> > b/drivers/vdpa/mlx5/mlx5_vdpa.h index 30030b7..a7e2185 100644
> > --- a/drivers/vdpa/mlx5/mlx5_vdpa.h
> > +++ b/drivers/vdpa/mlx5/mlx5_vdpa.h
> > @@ -53,6 +53,19 @@ struct mlx5_vdpa_query_mr {
> > int is_indirect;
> > };
> >
> > +struct mlx5_vdpa_virtq {
> > + SLIST_ENTRY(mlx5_vdpa_virtq) next;
> > + uint16_t index;
> > + uint16_t vq_size;
> > + struct mlx5_devx_obj *virtq;
> > + struct mlx5_vdpa_event_qp eqp;
> > + struct {
> > + struct mlx5dv_devx_umem *obj;
> > + void *buf;
> > + uint32_t size;
> > + } umems[3];
> > +};
> > +
> > struct mlx5_vdpa_priv {
> > TAILQ_ENTRY(mlx5_vdpa_priv) next;
> > int id; /* vDPA device id. */
> > @@ -69,6 +82,10 @@ struct mlx5_vdpa_priv {
> > struct mlx5dv_devx_event_channel *eventc;
> > struct mlx5dv_devx_uar *uar;
> > struct rte_intr_handle intr_handle;
> > + struct mlx5_devx_obj *td;
> > + struct mlx5_devx_obj *tis;
> > + uint16_t nr_virtqs;
> > + SLIST_HEAD(virtq_list, mlx5_vdpa_virtq) virtq_list;
> > SLIST_HEAD(mr_list, mlx5_vdpa_query_mr) mr_list; };
> >
> > @@ -146,4 +163,23 @@ int mlx5_vdpa_event_qp_create(struct
> mlx5_vdpa_priv *priv, uint16_t desc_n,
> > */
> > void mlx5_vdpa_cqe_event_unset(struct mlx5_vdpa_priv *priv);
> >
> > +/**
> > + * Release a virtq and all its related resources.
> > + *
> > + * @param[in] priv
> > + * The vdpa driver private structure.
> > + */
> > +void mlx5_vdpa_virtqs_release(struct mlx5_vdpa_priv *priv);
> > +
> > +/**
> > + * Create all the HW virtqs resources and all their related resources.
> > + *
> > + * @param[in] priv
> > + * The vdpa driver private structure.
> > + *
> > + * @return
> > + * 0 on success, a negative errno value otherwise and rte_errno is set.
> > + */
> > +int mlx5_vdpa_virtqs_prepare(struct mlx5_vdpa_priv *priv);
> > +
> > #endif /* RTE_PMD_MLX5_VDPA_H_ */
> > diff --git a/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c
> > b/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c
> > new file mode 100644
> > index 0000000..781bccf
> > --- /dev/null
> > +++ b/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c
> > @@ -0,0 +1,212 @@
> > +/* SPDX-License-Identifier: BSD-3-Clause
> > + * Copyright 2019 Mellanox Technologies, Ltd */ #include <string.h>
> > +
> > +#include <rte_malloc.h>
> > +#include <rte_errno.h>
> > +
> > +#include <mlx5_common.h>
> > +
> > +#include "mlx5_vdpa_utils.h"
> > +#include "mlx5_vdpa.h"
> > +
> > +
> > +static int
> > +mlx5_vdpa_virtq_unset(struct mlx5_vdpa_virtq *virtq) {
> > + int i;
> > +
> > + if (virtq->virtq) {
> > + claim_zero(mlx5_devx_cmd_destroy(virtq->virtq));
> > + virtq->virtq = NULL;
> > + }
> > + for (i = 0; i < 3; ++i) {
> > + if (virtq->umems[i].obj)
> > + claim_zero(mlx5_glue->devx_umem_dereg
> > + (virtq-
> >umems[i].obj));
> > + if (virtq->umems[i].buf)
> > + rte_free(virtq->umems[i].buf);
> > + }
> > + memset(&virtq->umems, 0, sizeof(virtq->umems));
> > + if (virtq->eqp.fw_qp)
> > + mlx5_vdpa_event_qp_destroy(&virtq->eqp);
> > + return 0;
> > +}
> > +
> > +void
> > +mlx5_vdpa_virtqs_release(struct mlx5_vdpa_priv *priv) {
> > + struct mlx5_vdpa_virtq *entry;
> > + struct mlx5_vdpa_virtq *next;
> > +
> > + entry = SLIST_FIRST(&priv->virtq_list);
> > + while (entry) {
> > + next = SLIST_NEXT(entry, next);
> > + mlx5_vdpa_virtq_unset(entry);
> > + SLIST_REMOVE(&priv->virtq_list, entry, mlx5_vdpa_virtq,
> next);
> > + rte_free(entry);
> > + entry = next;
> > + }
> > + SLIST_INIT(&priv->virtq_list);
> > + if (priv->tis) {
> > + claim_zero(mlx5_devx_cmd_destroy(priv->tis));
> > + priv->tis = NULL;
> > + }
> > + if (priv->td) {
> > + claim_zero(mlx5_devx_cmd_destroy(priv->td));
> > + priv->td = NULL;
> > + }
> > +}
> > +
> > +static uint64_t
> > +mlx5_vdpa_hva_to_gpa(struct rte_vhost_memory *mem, uint64_t hva) {
> > + struct rte_vhost_mem_region *reg;
> > + uint32_t i;
> > + uint64_t gpa = 0;
> > +
> > + for (i = 0; i < mem->nregions; i++) {
> > + reg = &mem->regions[i];
> > + if (hva >= reg->host_user_addr &&
> > + hva < reg->host_user_addr + reg->size) {
> > + gpa = hva - reg->host_user_addr + reg-
> >guest_phys_addr;
> > + break;
> > + }
> > + }
> > + return gpa;
> > +}
>
> I think you may need a third parameter for the size to map.
> Otherwise, you would be vulnerable to CVE-2018-1059.
Yes, I just read it and understood that the virtio descriptor queues\packets
data may be non continues in the guest physical memory and even maybe undefined
here in the rte_vhost library, Is it?
Don't you think that the rte_vhost should validate it? at least, that all the
queues memory are mapped?
Can you extend more why it may happen? QEMU bug?
In any case,
>From Mellanox perspective, at least for the packet data, it is OK since if the
>guest will try to access physical address which is not mapped the packet will
>be ignored by the HW.
> > +
> > +static int
> > +mlx5_vdpa_virtq_setup(struct mlx5_vdpa_priv *priv,
> > + struct mlx5_vdpa_virtq *virtq, int index) {
> > + struct rte_vhost_vring vq;
> > + struct mlx5_devx_virtq_attr attr = {0};
> > + uint64_t gpa;
> > + int ret;
> > + int i;
> > + uint16_t last_avail_idx;
> > + uint16_t last_used_idx;
> > +
> > + ret = rte_vhost_get_vhost_vring(priv->vid, index, &vq);
> > + if (ret)
> > + return -1;
> > + virtq->index = index;
> > + virtq->vq_size = vq.size;
> > + /*
> > + * No need event QPs creation when the guest in poll mode or when
> the
> > + * capability allows it.
> > + */
> > + attr.event_mode = vq.callfd != -1 || !(priv->caps.event_mode & (1
> <<
> > +
> MLX5_VIRTQ_EVENT_MODE_NO_MSIX)) ?
> > +
> MLX5_VIRTQ_EVENT_MODE_QP :
> > +
> MLX5_VIRTQ_EVENT_MODE_NO_MSIX;
> > + if (attr.event_mode == MLX5_VIRTQ_EVENT_MODE_QP) {
> > + ret = mlx5_vdpa_event_qp_create(priv, vq.size, vq.callfd,
> > + &virtq->eqp);
> > + if (ret) {
> > + DRV_LOG(ERR, "Failed to create event QPs for virtq
> %d.",
> > + index);
> > + return -1;
> > + }
> > + attr.qp_id = virtq->eqp.fw_qp->id;
> > + } else {
> > + DRV_LOG(INFO, "Virtq %d is, for sure, working by poll mode,
> no"
> > + " need event QPs and event mechanism.", index);
> > + }
> > + /* Setup 3 UMEMs for each virtq. */
> > + for (i = 0; i < 3; ++i) {
> > + virtq->umems[i].size = priv->caps.umems[i].a * vq.size +
> > + priv-
> >caps.umems[i].b;
> > + virtq->umems[i].buf = rte_zmalloc(__func__,
> > + virtq->umems[i].size, 4096);
> > + if (!virtq->umems[i].buf) {
> > + DRV_LOG(ERR, "Cannot allocate umem %d memory
> for virtq"
> > + " %u.", i, index);
> > + goto error;
> > + }
> > + virtq->umems[i].obj = mlx5_glue->devx_umem_reg(priv-
> >ctx,
> > + virtq->umems[i].buf,
> > + virtq->umems[i].size,
> > +
> IBV_ACCESS_LOCAL_WRITE);
> > + if (!virtq->umems[i].obj) {
> > + DRV_LOG(ERR, "Failed to register umem %d for virtq
> %u.",
> > + i, index);
> > + goto error;
> > + }
> > + attr.umems[i].id = virtq->umems[i].obj->umem_id;
> > + attr.umems[i].offset = 0;
> > + attr.umems[i].size = virtq->umems[i].size;
> > + }
> > + gpa = mlx5_vdpa_hva_to_gpa(priv->vmem,
> (uint64_t)(uintptr_t)vq.desc);
> > + if (!gpa) {
> > + DRV_LOG(ERR, "Fail to get GPA for descriptor ring.");
> > + goto error;
> > + }
> > + attr.desc_addr = gpa;
> > + gpa = mlx5_vdpa_hva_to_gpa(priv->vmem,
> (uint64_t)(uintptr_t)vq.used);
> > + if (!gpa) {
> > + DRV_LOG(ERR, "Fail to get GPA for used ring.");
> > + goto error;
> > + }
> > + attr.used_addr = gpa;
> > + gpa = mlx5_vdpa_hva_to_gpa(priv->vmem,
> (uint64_t)(uintptr_t)vq.avail);
> > + if (!gpa) {
> > + DRV_LOG(ERR, "Fail to get GPA for available ring.");
> > + goto error;
> > + }
> > + attr.available_addr = gpa;
> > + rte_vhost_get_vring_base(priv->vid, index, &last_avail_idx,
> > + &last_used_idx);
> > + DRV_LOG(INFO, "vid %d: Init last_avail_idx=%d, last_used_idx=%d
> for "
> > + "virtq %d.", priv->vid, last_avail_idx, last_used_idx, index);
> > + attr.hw_available_index = last_avail_idx;
> > + attr.hw_used_index = last_used_idx;
> > + attr.q_size = vq.size;
> > + attr.mkey = priv->gpa_mkey_index;
> > + attr.tis_id = priv->tis->id;
> > + attr.queue_index = index;
> > + virtq->virtq = mlx5_devx_cmd_create_virtq(priv->ctx, &attr);
> > + if (!virtq->virtq)
> > + goto error;
> > + return 0;
> > +error:
> > + mlx5_vdpa_virtq_unset(virtq);
> > + return -1;
> > +}
> > +
> > +int
> > +mlx5_vdpa_virtqs_prepare(struct mlx5_vdpa_priv *priv) {
> > + struct mlx5_devx_tis_attr tis_attr = {0};
> > + struct mlx5_vdpa_virtq *virtq;
> > + uint32_t i;
> > + uint16_t nr_vring = rte_vhost_get_vring_num(priv->vid);
> > +
> > + priv->td = mlx5_devx_cmd_create_td(priv->ctx);
> > + if (!priv->td) {
> > + DRV_LOG(ERR, "Failed to create transport domain.");
> > + return -rte_errno;
> > + }
> > + tis_attr.transport_domain = priv->td->id;
> > + priv->tis = mlx5_devx_cmd_create_tis(priv->ctx, &tis_attr);
> > + if (!priv->tis) {
> > + DRV_LOG(ERR, "Failed to create TIS.");
> > + goto error;
> > + }
> > + for (i = 0; i < nr_vring; i++) {
> > + virtq = rte_zmalloc(__func__, sizeof(*virtq), 0);
> > + if (!virtq || mlx5_vdpa_virtq_setup(priv, virtq, i)) {
> > + if (virtq)
> > + rte_free(virtq);
> > + goto error;
> > + }
> > + SLIST_INSERT_HEAD(&priv->virtq_list, virtq, next);
> > + }
> > + priv->nr_virtqs = nr_vring;
> > + return 0;
> > +error:
> > + mlx5_vdpa_virtqs_release(priv);
> > + return -1;
> > +}
> >