On 1/31/20 8:34 AM, Matan Azrad wrote:
>
>
> From: Maxime Coquelin
>> On 1/29/20 11:09 AM, Matan Azrad wrote:
>>> The HW virtq object represents an emulated context for a VIRTIO_NET
>>> virtqueue which was created and managed by a VIRTIO_NET driver as
>>> defined in VIRTIO Specification.
>>>
>>> Add support to prepare and release all the basic HW resources needed
>>> the user virtqs emulation according to the rte_vhost configurations.
>>>
>>> This patch prepares the basic configurations needed by DevX commands
>>> to create a virtq.
>>>
>>> Add new file mlx5_vdpa_virtq.c to manage virtq operations.
>>>
>>> Signed-off-by: Matan Azrad <ma...@mellanox.com>
>>> Acked-by: Viacheslav Ovsiienko <viachesl...@mellanox.com>
>>> ---
>>> drivers/vdpa/mlx5/Makefile | 1 +
>>> drivers/vdpa/mlx5/meson.build | 1 +
>>> drivers/vdpa/mlx5/mlx5_vdpa.c | 1 +
>>> drivers/vdpa/mlx5/mlx5_vdpa.h | 36 ++++++
>>> drivers/vdpa/mlx5/mlx5_vdpa_virtq.c | 212
>>> ++++++++++++++++++++++++++++++++++++
>>> 5 files changed, 251 insertions(+)
>>> create mode 100644 drivers/vdpa/mlx5/mlx5_vdpa_virtq.c
>>>
>>> diff --git a/drivers/vdpa/mlx5/Makefile b/drivers/vdpa/mlx5/Makefile
>>> index 7f13756..353e262 100644
>>> --- a/drivers/vdpa/mlx5/Makefile
>>> +++ b/drivers/vdpa/mlx5/Makefile
>>> @@ -10,6 +10,7 @@ LIB = librte_pmd_mlx5_vdpa.a
>>> SRCS-$(CONFIG_RTE_LIBRTE_MLX5_VDPA_PMD) += mlx5_vdpa.c
>>> SRCS-$(CONFIG_RTE_LIBRTE_MLX5_VDPA_PMD) += mlx5_vdpa_mem.c
>>> SRCS-$(CONFIG_RTE_LIBRTE_MLX5_VDPA_PMD) += mlx5_vdpa_event.c
>>> +SRCS-$(CONFIG_RTE_LIBRTE_MLX5_VDPA_PMD) += mlx5_vdpa_virtq.c
>>>
>>> # Basic CFLAGS.
>>> CFLAGS += -O3
>>> diff --git a/drivers/vdpa/mlx5/meson.build
>>> b/drivers/vdpa/mlx5/meson.build index c609f7c..e017f95 100644
>>> --- a/drivers/vdpa/mlx5/meson.build
>>> +++ b/drivers/vdpa/mlx5/meson.build
>>> @@ -14,6 +14,7 @@ sources = files(
>>> 'mlx5_vdpa.c',
>>> 'mlx5_vdpa_mem.c',
>>> 'mlx5_vdpa_event.c',
>>> + 'mlx5_vdpa_virtq.c',
>>> )
>>> cflags_options = [
>>> '-std=c11',
>>> diff --git a/drivers/vdpa/mlx5/mlx5_vdpa.c
>>> b/drivers/vdpa/mlx5/mlx5_vdpa.c index c67f93d..4d30b35 100644
>>> --- a/drivers/vdpa/mlx5/mlx5_vdpa.c
>>> +++ b/drivers/vdpa/mlx5/mlx5_vdpa.c
>>> @@ -229,6 +229,7 @@
>>> goto error;
>>> }
>>> SLIST_INIT(&priv->mr_list);
>>> + SLIST_INIT(&priv->virtq_list);
>>> pthread_mutex_lock(&priv_list_lock);
>>> TAILQ_INSERT_TAIL(&priv_list, priv, next);
>>> pthread_mutex_unlock(&priv_list_lock);
>>> diff --git a/drivers/vdpa/mlx5/mlx5_vdpa.h
>>> b/drivers/vdpa/mlx5/mlx5_vdpa.h index 30030b7..a7e2185 100644
>>> --- a/drivers/vdpa/mlx5/mlx5_vdpa.h
>>> +++ b/drivers/vdpa/mlx5/mlx5_vdpa.h
>>> @@ -53,6 +53,19 @@ struct mlx5_vdpa_query_mr {
>>> int is_indirect;
>>> };
>>>
>>> +struct mlx5_vdpa_virtq {
>>> + SLIST_ENTRY(mlx5_vdpa_virtq) next;
>>> + uint16_t index;
>>> + uint16_t vq_size;
>>> + struct mlx5_devx_obj *virtq;
>>> + struct mlx5_vdpa_event_qp eqp;
>>> + struct {
>>> + struct mlx5dv_devx_umem *obj;
>>> + void *buf;
>>> + uint32_t size;
>>> + } umems[3];
>>> +};
>>> +
>>> struct mlx5_vdpa_priv {
>>> TAILQ_ENTRY(mlx5_vdpa_priv) next;
>>> int id; /* vDPA device id. */
>>> @@ -69,6 +82,10 @@ struct mlx5_vdpa_priv {
>>> struct mlx5dv_devx_event_channel *eventc;
>>> struct mlx5dv_devx_uar *uar;
>>> struct rte_intr_handle intr_handle;
>>> + struct mlx5_devx_obj *td;
>>> + struct mlx5_devx_obj *tis;
>>> + uint16_t nr_virtqs;
>>> + SLIST_HEAD(virtq_list, mlx5_vdpa_virtq) virtq_list;
>>> SLIST_HEAD(mr_list, mlx5_vdpa_query_mr) mr_list; };
>>>
>>> @@ -146,4 +163,23 @@ int mlx5_vdpa_event_qp_create(struct
>> mlx5_vdpa_priv *priv, uint16_t desc_n,
>>> */
>>> void mlx5_vdpa_cqe_event_unset(struct mlx5_vdpa_priv *priv);
>>>
>>> +/**
>>> + * Release a virtq and all its related resources.
>>> + *
>>> + * @param[in] priv
>>> + * The vdpa driver private structure.
>>> + */
>>> +void mlx5_vdpa_virtqs_release(struct mlx5_vdpa_priv *priv);
>>> +
>>> +/**
>>> + * Create all the HW virtqs resources and all their related resources.
>>> + *
>>> + * @param[in] priv
>>> + * The vdpa driver private structure.
>>> + *
>>> + * @return
>>> + * 0 on success, a negative errno value otherwise and rte_errno is set.
>>> + */
>>> +int mlx5_vdpa_virtqs_prepare(struct mlx5_vdpa_priv *priv);
>>> +
>>> #endif /* RTE_PMD_MLX5_VDPA_H_ */
>>> diff --git a/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c
>>> b/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c
>>> new file mode 100644
>>> index 0000000..781bccf
>>> --- /dev/null
>>> +++ b/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c
>>> @@ -0,0 +1,212 @@
>>> +/* SPDX-License-Identifier: BSD-3-Clause
>>> + * Copyright 2019 Mellanox Technologies, Ltd */ #include <string.h>
>>> +
>>> +#include <rte_malloc.h>
>>> +#include <rte_errno.h>
>>> +
>>> +#include <mlx5_common.h>
>>> +
>>> +#include "mlx5_vdpa_utils.h"
>>> +#include "mlx5_vdpa.h"
>>> +
>>> +
>>> +static int
>>> +mlx5_vdpa_virtq_unset(struct mlx5_vdpa_virtq *virtq) {
>>> + int i;
>>> +
>>> + if (virtq->virtq) {
>>> + claim_zero(mlx5_devx_cmd_destroy(virtq->virtq));
>>> + virtq->virtq = NULL;
>>> + }
>>> + for (i = 0; i < 3; ++i) {
>>> + if (virtq->umems[i].obj)
>>> + claim_zero(mlx5_glue->devx_umem_dereg
>>> + (virtq-
>>> umems[i].obj));
>>> + if (virtq->umems[i].buf)
>>> + rte_free(virtq->umems[i].buf);
>>> + }
>>> + memset(&virtq->umems, 0, sizeof(virtq->umems));
>>> + if (virtq->eqp.fw_qp)
>>> + mlx5_vdpa_event_qp_destroy(&virtq->eqp);
>>> + return 0;
>>> +}
>>> +
>>> +void
>>> +mlx5_vdpa_virtqs_release(struct mlx5_vdpa_priv *priv) {
>>> + struct mlx5_vdpa_virtq *entry;
>>> + struct mlx5_vdpa_virtq *next;
>>> +
>>> + entry = SLIST_FIRST(&priv->virtq_list);
>>> + while (entry) {
>>> + next = SLIST_NEXT(entry, next);
>>> + mlx5_vdpa_virtq_unset(entry);
>>> + SLIST_REMOVE(&priv->virtq_list, entry, mlx5_vdpa_virtq,
>> next);
>>> + rte_free(entry);
>>> + entry = next;
>>> + }
>>> + SLIST_INIT(&priv->virtq_list);
>>> + if (priv->tis) {
>>> + claim_zero(mlx5_devx_cmd_destroy(priv->tis));
>>> + priv->tis = NULL;
>>> + }
>>> + if (priv->td) {
>>> + claim_zero(mlx5_devx_cmd_destroy(priv->td));
>>> + priv->td = NULL;
>>> + }
>>> +}
>>> +
>>> +static uint64_t
>>> +mlx5_vdpa_hva_to_gpa(struct rte_vhost_memory *mem, uint64_t hva) {
>>> + struct rte_vhost_mem_region *reg;
>>> + uint32_t i;
>>> + uint64_t gpa = 0;
>>> +
>>> + for (i = 0; i < mem->nregions; i++) {
>>> + reg = &mem->regions[i];
>>> + if (hva >= reg->host_user_addr &&
>>> + hva < reg->host_user_addr + reg->size) {
>>> + gpa = hva - reg->host_user_addr + reg-
>>> guest_phys_addr;
>>> + break;
>>> + }
>>> + }
>>> + return gpa;
>>> +}
>>
>> I think you may need a third parameter for the size to map.
>> Otherwise, you would be vulnerable to CVE-2018-1059.
>
> Yes, I just read it and understood that the virtio descriptor queues\packets
> data may be non continues in the guest physical memory and even maybe
> undefined here in the rte_vhost library, Is it?
>
> Don't you think that the rte_vhost should validate it? at least, that all the
> queues memory are mapped?
I just checked vhost lib again, and you're right, it already does the
check.
Basically, if translate_ring_addresses() fail because the rings aren't
fully mapped, then virtio_is_ready() will return false and so the
vdpa .dev_conf() callback won't be called.
> Can you extend more why it may happen? QEMU bug?
It could happen with a malicious or compromised vhost-user
master, like Qemu or Virtio-user based application.
> In any case,
> From Mellanox perspective, at least for the packet data, it is OK since if
> the guest will try to access physical address which is not mapped the packet
> will be ignored by the HW.
Ok!
Thanks,
Maxime