From: Viacheslav Ovsiienko
> On ESXi setups when we have SR-IOV and E-Switch enabled there is the
> problem to receive VLAN traffic on VF interfaces. The NIC driver in ESXi
> hypervisor does not setup E-Switch vport setting correctly and VLAN traffic
> targeted to VF is dropped.
> 
> The patch provides the temporary workaround - if the rule containing the
> VLAN pattern is being installed for VF the VLAN network interface over VF is
> created, like the command does:
> 
>   ip link add link vf.if name mlx5.wa.1.100 type vlan id 100
> 
> The PMD in DPDK maintains the database of created VLAN interfaces for
> each existing VF and requested VLAN tags. When all of the RTE Flows using
> the given VLAN tag are removed the created VLAN interface with this VLAN
> tag is deleted.
> 
> The name of created VLAN interface follows the format:
> 
>   evmlx.d1.d2, where d1 is VF interface ifindex, d2 - VLAN ifindex
> 
> Implementation limitations:
> 
> - mask in rules is ignored, rule must specify VLAN tags exactly,
>   no wildcards (which are implemented by the masks) are allowed
> 
> - virtual environment is detected via rte_hypervisor() call,
>   currently it checks the RTE_CPUFLAG_HYPERVISOR flag for x86
>   platform. For other architectures workaround always
>   applied for the Flow over PCI VF
> 
> Signed-off-by: Viacheslav Ovsiienko <viachesl...@mellanox.com>

After rebase, 
Acked-by: Matan Azrad <ma...@mellanox.com>

> ---
>  drivers/net/mlx5/mlx5.c            |   6 +
>  drivers/net/mlx5/mlx5.h            |  30 ++++
>  drivers/net/mlx5/mlx5_flow.c       |  22 +++
>  drivers/net/mlx5/mlx5_flow.h       |   5 +
>  drivers/net/mlx5/mlx5_flow_dv.c    |  33 ++++-
>  drivers/net/mlx5/mlx5_flow_verbs.c |  25 +++-
>  drivers/net/mlx5/mlx5_nl.c         | 279
> +++++++++++++++++++++++++++++++++++++
>  7 files changed, 396 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index
> d93f92d..8549167 100644
> --- a/drivers/net/mlx5/mlx5.c
> +++ b/drivers/net/mlx5/mlx5.c
> @@ -690,6 +690,8 @@ struct mlx5_dev_spawn_data {
>               close(priv->nl_socket_route);
>       if (priv->nl_socket_rdma >= 0)
>               close(priv->nl_socket_rdma);
> +     if (priv->esxi_context)
> +             mlx5_vlan_esxi_exit(priv->esxi_context);
>       if (priv->sh) {
>               /*
>                * Free the shared context in last turn, because the cleanup
> @@ -1546,6 +1548,8 @@ struct mlx5_dev_spawn_data {  #endif
>       /* Store device configuration on private structure. */
>       priv->config = config;
> +     /* Create context for virtual machine VLAN workaround. */
> +     priv->esxi_context = mlx5_vlan_esxi_init(eth_dev, spawn->ifindex);
>       if (config.dv_flow_en) {
>               err = mlx5_alloc_shared_dr(priv);
>               if (err)
> @@ -1572,6 +1576,8 @@ struct mlx5_dev_spawn_data {
>                       close(priv->nl_socket_route);
>               if (priv->nl_socket_rdma >= 0)
>                       close(priv->nl_socket_rdma);
> +             if (priv->esxi_context)
> +                     mlx5_vlan_esxi_exit(priv->esxi_context);
>               if (own_domain_id)
>                       claim_zero(rte_eth_switch_domain_free(priv-
> >domain_id));
>               rte_free(priv);
> diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index
> 5af3f41..87afa7a 100644
> --- a/drivers/net/mlx5/mlx5.h
> +++ b/drivers/net/mlx5/mlx5.h
> @@ -231,6 +231,27 @@ enum mlx5_verbs_alloc_type {
>       MLX5_VERBS_ALLOC_TYPE_RX_QUEUE,
>  };
> 
> +/* VLAN netdev for ESXi VLAN workaround. */ struct mlx5_vlan_dev {
> +     uint32_t refcnt;
> +     uint32_t ifindex; /**< Own interface index. */ };
> +
> +/* Structure for VF ESXi VLAN workaround. */ struct mlx5_vf_vlan {
> +     uint32_t tag:12;
> +     uint32_t created:1;
> +};
> +
> +/* Array of VLAN devices created on the base of VF */ struct
> +mlx5_vlan_esxi_context {
> +     int nl_socket;
> +     uint32_t nl_sn;
> +     uint32_t vf_ifindex;
> +     struct rte_eth_dev *dev;
> +     struct mlx5_vlan_dev vlan_dev[4096];
> +};
> +
>  /**
>   * Verbs allocator needs a context to know in the callback which kind of
>   * resources it is allocating.
> @@ -386,6 +407,7 @@ struct mlx5_priv {
>       int nl_socket_rdma; /* Netlink socket (NETLINK_RDMA). */
>       int nl_socket_route; /* Netlink socket (NETLINK_ROUTE). */
>       uint32_t nl_sn; /* Netlink message sequence number. */
> +     struct mlx5_vlan_esxi_context *esxi_context; /* ESXi VLAN context.
> */
>  #ifndef RTE_ARCH_64
>       rte_spinlock_t uar_lock_cq; /* CQs share a common distinct UAR */
>       rte_spinlock_t uar_lock[MLX5_UAR_PAGE_NUM_MAX]; @@ -582,6
> +604,14 @@ int mlx5_nl_mac_addr_remove(struct rte_eth_dev *dev, struct
> rte_ether_addr *mac,  int mlx5_nl_switch_info(int nl, unsigned int ifindex,
>                       struct mlx5_switch_info *info);
> 
> +struct mlx5_vlan_esxi_context *mlx5_vlan_esxi_init(struct rte_eth_dev
> *dev,
> +                                                uint32_t ifindex);
> +void mlx5_vlan_esxi_exit(struct mlx5_vlan_esxi_context *ctx); void
> +mlx5_vlan_esxi_release(struct rte_eth_dev *dev,
> +                         struct mlx5_vf_vlan *vf_vlan);
> +void mlx5_vlan_esxi_acquire(struct rte_eth_dev *dev,
> +                         struct mlx5_vf_vlan *vf_vlan);
> +
>  /* mlx5_devx_cmds.c */
> 
>  int mlx5_devx_cmd_flow_counter_alloc(struct ibv_context *ctx, diff --git
> a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c index
> 4ba34db..42743d2 100644
> --- a/drivers/net/mlx5/mlx5_flow.c
> +++ b/drivers/net/mlx5/mlx5_flow.c
> @@ -1200,6 +1200,8 @@ uint32_t mlx5_flow_adjust_priority(struct
> rte_eth_dev *dev, int32_t priority,
>   *   Item specification.
>   * @param[in] item_flags
>   *   Bit-fields that holds the items detected until now.
> + * @param[in] dev
> + *   Ethernet device flow is being created on.
>   * @param[out] error
>   *   Pointer to error structure.
>   *
> @@ -1209,6 +1211,7 @@ uint32_t mlx5_flow_adjust_priority(struct
> rte_eth_dev *dev, int32_t priority,  int  mlx5_flow_validate_item_vlan(const
> struct rte_flow_item *item,
>                            uint64_t item_flags,
> +                          struct rte_eth_dev *dev,
>                            struct rte_flow_error *error)
>  {
>       const struct rte_flow_item_vlan *spec = item->spec; @@ -1243,6
> +1246,25 @@ uint32_t mlx5_flow_adjust_priority(struct rte_eth_dev *dev,
> int32_t priority,
>                                       error);
>       if (ret)
>               return ret;
> +     if (!tunnel && mask->tci != RTE_BE16(0x0fff)) {
> +             struct mlx5_priv *priv = dev->data->dev_private;
> +
> +             if (priv->esxi_context) {
> +                     /*
> +                      * Non-NULL context means we have a virtual
> machine
> +                      * and SR-IOV enabled, we have to create VLAN
> interface
> +                      * to make hypervisor (ESXi) to setup E-Switch vport
> +                      * context correctly. We avoid creating the multiple
> +                      * VLAN interfaces, so we cannot support VLAN tag
> mask.
> +                      */
> +                     return rte_flow_error_set(error, EINVAL,
> +
> RTE_FLOW_ERROR_TYPE_ITEM,
> +                                               item,
> +                                               "VLAN tag mask is not"
> +                                               " supported in virtual"
> +                                               " environment");
> +             }
> +     }
>       if (spec) {
>               vlan_tag = spec->tci;
>               vlan_tag &= mask->tci;
> diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
> index 72b339e..ac20572 100644
> --- a/drivers/net/mlx5/mlx5_flow.h
> +++ b/drivers/net/mlx5/mlx5_flow.h
> @@ -318,6 +318,8 @@ struct mlx5_flow_dv {
>       /**< Pointer to the jump action resource. */
>       struct mlx5_flow_dv_port_id_action_resource *port_id_action;
>       /**< Pointer to port ID action resource. */
> +     struct mlx5_vf_vlan vf_vlan;
> +     /**< Structure for VF ESXi VLAN workaround. */
>  #ifdef HAVE_IBV_FLOW_DV_SUPPORT
>       void *actions[MLX5_DV_MAX_NUMBER_OF_ACTIONS];
>       /**< Action list. */
> @@ -343,6 +345,8 @@ struct mlx5_flow_verbs {
>       struct ibv_flow *flow; /**< Verbs flow pointer. */
>       struct mlx5_hrxq *hrxq; /**< Hash Rx queue object. */
>       uint64_t hash_fields; /**< Verbs hash Rx queue hash fields. */
> +     struct mlx5_vf_vlan vf_vlan;
> +     /**< Structure for VF ESXi VLAN workaround. */
>  };
> 
>  /** Device flow structure. */
> @@ -507,6 +511,7 @@ int mlx5_flow_validate_item_udp(const struct
> rte_flow_item *item,
>                               struct rte_flow_error *error);
>  int mlx5_flow_validate_item_vlan(const struct rte_flow_item *item,
>                                uint64_t item_flags,
> +                              struct rte_eth_dev *dev,
>                                struct rte_flow_error *error);
>  int mlx5_flow_validate_item_vxlan(const struct rte_flow_item *item,
>                                 uint64_t item_flags,
> diff --git a/drivers/net/mlx5/mlx5_flow_dv.c
> b/drivers/net/mlx5/mlx5_flow_dv.c index 3fa624b..63183b5 100644
> --- a/drivers/net/mlx5/mlx5_flow_dv.c
> +++ b/drivers/net/mlx5/mlx5_flow_dv.c
> @@ -2363,7 +2363,7 @@ struct field_modify_info modify_tcp[] = {
>                       break;
>               case RTE_FLOW_ITEM_TYPE_VLAN:
>                       ret = mlx5_flow_validate_item_vlan(items,
> item_flags,
> -                                                        error);
> +                                                        dev, error);
>                       if (ret < 0)
>                               return ret;
>                       last_item = tunnel ?
> MLX5_FLOW_LAYER_INNER_VLAN :
> @@ -2914,6 +2914,8 @@ struct field_modify_info modify_tcp[] = {
>  /**
>   * Add VLAN item to matcher and to the value.
>   *
> + * @param[in, out] dev_flow
> + *   Flow descriptor.
>   * @param[in, out] matcher
>   *   Flow matcher.
>   * @param[in, out] key
> @@ -2924,7 +2926,8 @@ struct field_modify_info modify_tcp[] = {
>   *   Item is inner pattern.
>   */
>  static void
> -flow_dv_translate_item_vlan(void *matcher, void *key,
> +flow_dv_translate_item_vlan(struct mlx5_flow *dev_flow,
> +                         void *matcher, void *key,
>                           const struct rte_flow_item *item,
>                           int inner)
>  {
> @@ -2951,6 +2954,12 @@ struct field_modify_info modify_tcp[] = {
>               headers_m = MLX5_ADDR_OF(fte_match_param, matcher,
>                                        outer_headers);
>               headers_v = MLX5_ADDR_OF(fte_match_param, key,
> outer_headers);
> +             /*
> +              * This is workaround, masks are not supported,
> +              * and pre-validated.
> +              */
> +             dev_flow->dv.vf_vlan.tag =
> +                     rte_be_to_cpu_16(vlan_v->tci) & 0x0fff;
>       }
>       tci_m = rte_be_to_cpu_16(vlan_m->tci);
>       tci_v = rte_be_to_cpu_16(vlan_m->tci & vlan_v->tci); @@ -4443,7
> +4452,8 @@ struct field_modify_info modify_tcp[] = {
>                                            MLX5_FLOW_LAYER_OUTER_L2;
>                       break;
>               case RTE_FLOW_ITEM_TYPE_VLAN:
> -                     flow_dv_translate_item_vlan(match_mask,
> match_value,
> +                     flow_dv_translate_item_vlan(dev_flow,
> +                                                 match_mask, match_value,
>                                                   items, tunnel);
>                       matcher.priority = MLX5_PRIORITY_MAP_L2;
>                       last_item = tunnel ? (MLX5_FLOW_LAYER_INNER_L2
> | @@ -4658,6 +4668,17 @@ struct field_modify_info modify_tcp[] = {
>                                          "hardware refuses to create flow");
>                       goto error;
>               }
> +             if (priv->esxi_context &&
> +                 dev_flow->dv.vf_vlan.tag &&
> +                 !dev_flow->dv.vf_vlan.created) {
> +                     /*
> +                      * The rule contains the VLAN pattern.
> +                      * For VF we are going to create VLAN
> +                      * interface to make ESXi set correct
> +                      * e-Switch vport context.
> +                      */
> +                     mlx5_vlan_esxi_acquire(dev, &dev_flow-
> >dv.vf_vlan);
> +             }
>       }
>       return 0;
>  error:
> @@ -4671,6 +4692,9 @@ struct field_modify_info modify_tcp[] = {
>                               mlx5_hrxq_release(dev, dv->hrxq);
>                       dv->hrxq = NULL;
>               }
> +             if (dev_flow->dv.vf_vlan.tag &&
> +                 dev_flow->dv.vf_vlan.created)
> +                     mlx5_vlan_esxi_release(dev, &dev_flow-
> >dv.vf_vlan);
>       }
>       rte_errno = err; /* Restore rte_errno. */
>       return -rte_errno;
> @@ -4871,6 +4895,9 @@ struct field_modify_info modify_tcp[] = {
>                               mlx5_hrxq_release(dev, dv->hrxq);
>                       dv->hrxq = NULL;
>               }
> +             if (dev_flow->dv.vf_vlan.tag &&
> +                 dev_flow->dv.vf_vlan.created)
> +                     mlx5_vlan_esxi_release(dev, &dev_flow-
> >dv.vf_vlan);
>       }
>  }
> 
> diff --git a/drivers/net/mlx5/mlx5_flow_verbs.c
> b/drivers/net/mlx5/mlx5_flow_verbs.c
> index 2f4c80c..5909488 100644
> --- a/drivers/net/mlx5/mlx5_flow_verbs.c
> +++ b/drivers/net/mlx5/mlx5_flow_verbs.c
> @@ -386,6 +386,9 @@
>               flow_verbs_spec_add(&dev_flow->verbs, &eth, size);
>       else
>               flow_verbs_item_vlan_update(dev_flow->verbs.attr, &eth);
> +     if (!tunnel)
> +             dev_flow->verbs.vf_vlan.tag =
> +                     rte_be_to_cpu_16(spec->tci) & 0x0fff;
>  }
> 
>  /**
> @@ -1049,7 +1052,7 @@
>                       break;
>               case RTE_FLOW_ITEM_TYPE_VLAN:
>                       ret = mlx5_flow_validate_item_vlan(items,
> item_flags,
> -                                                        error);
> +                                                        dev, error);
>                       if (ret < 0)
>                               return ret;
>                       last_item = tunnel ? (MLX5_FLOW_LAYER_INNER_L2
> | @@ -1587,6 +1590,10 @@
>                               mlx5_hrxq_release(dev, verbs->hrxq);
>                       verbs->hrxq = NULL;
>               }
> +             if (dev_flow->verbs.vf_vlan.tag &&
> +                 dev_flow->verbs.vf_vlan.created) {
> +                     mlx5_vlan_esxi_release(dev, &dev_flow-
> >verbs.vf_vlan);
> +             }
>       }
>  }
> 
> @@ -1634,6 +1641,7 @@
>  flow_verbs_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
>                struct rte_flow_error *error)
>  {
> +     struct mlx5_priv *priv = dev->data->dev_private;
>       struct mlx5_flow_verbs *verbs;
>       struct mlx5_flow *dev_flow;
>       int err;
> @@ -1683,6 +1691,17 @@
>                                          "hardware refuses to create flow");
>                       goto error;
>               }
> +             if (priv->esxi_context &&
> +                 dev_flow->verbs.vf_vlan.tag &&
> +                 !dev_flow->verbs.vf_vlan.created) {
> +                     /*
> +                      * The rule contains the VLAN pattern.
> +                      * For VF we are going to create VLAN
> +                      * interface to make ESXi set correct
> +                      * e-Switch vport context.
> +                      */
> +                     mlx5_vlan_esxi_acquire(dev, &dev_flow-
> >verbs.vf_vlan);
> +             }
>       }
>       return 0;
>  error:
> @@ -1696,6 +1715,10 @@
>                               mlx5_hrxq_release(dev, verbs->hrxq);
>                       verbs->hrxq = NULL;
>               }
> +             if (dev_flow->verbs.vf_vlan.tag &&
> +                 dev_flow->verbs.vf_vlan.created) {
> +                     mlx5_vlan_esxi_release(dev, &dev_flow-
> >verbs.vf_vlan);
> +             }
>       }
>       rte_errno = err; /* Restore rte_errno. */
>       return -rte_errno;
> diff --git a/drivers/net/mlx5/mlx5_nl.c b/drivers/net/mlx5/mlx5_nl.c index
> 5773fa7..8516442 100644
> --- a/drivers/net/mlx5/mlx5_nl.c
> +++ b/drivers/net/mlx5/mlx5_nl.c
> @@ -12,11 +12,14 @@
>  #include <stdbool.h>
>  #include <stdint.h>
>  #include <stdlib.h>
> +#include <stdalign.h>
>  #include <string.h>
>  #include <sys/socket.h>
>  #include <unistd.h>
> 
>  #include <rte_errno.h>
> +#include <rte_malloc.h>
> +#include <rte_hypervisor.h>
> 
>  #include "mlx5.h"
>  #include "mlx5_utils.h"
> @@ -28,6 +31,8 @@
>  /* Receive buffer size for the Netlink socket */  #define
> MLX5_RECV_BUF_SIZE 32768
> 
> +/** Parameters of VLAN devices created by driver. */ #define
> +MLX5_ESXI_VLAN_DEVICE_PFX "evmlx"
>  /*
>   * Define NDA_RTA as defined in iproute2 sources.
>   *
> @@ -987,3 +992,277 @@ struct mlx5_nl_ifindex_data {
>       }
>       return ret;
>  }
> +
> +/*
> + * Delete VLAN network device by ifindex.
> + *
> + * @param[in] tcf
> + *   Context object initialized by mlx5_vlan_esxi_init().
> + * @param[in] ifindex
> + *   Interface index of network device to delete.
> + */
> +static void
> +mlx5_vlan_esxi_delete(struct mlx5_vlan_esxi_context *esxi,
> +                   uint32_t ifindex)
> +{
> +     int ret;
> +     struct {
> +             struct nlmsghdr nh;
> +             struct ifinfomsg info;
> +     } req = {
> +             .nh = {
> +                     .nlmsg_len = NLMSG_LENGTH(sizeof(struct
> ifinfomsg)),
> +                     .nlmsg_type = RTM_DELLINK,
> +                     .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK,
> +             },
> +             .info = {
> +                     .ifi_family = AF_UNSPEC,
> +                     .ifi_index = ifindex,
> +             },
> +     };
> +
> +     if (ifindex) {
> +             ++esxi->nl_sn;
> +             if (!esxi->nl_sn)
> +                     ++esxi->nl_sn;
> +             ret = mlx5_nl_send(esxi->nl_socket, &req.nh, esxi->nl_sn);
> +             if (ret >= 0)
> +                     ret = mlx5_nl_recv(esxi->nl_socket,
> +                                        esxi->nl_sn,
> +                                        NULL, NULL);
> +             if (ret < 0)
> +                     DRV_LOG(WARNING, "netlink: error deleting"
> +                                      " VLAN ESXi ifindex %u, %d",
> +                                      ifindex, ret);
> +     }
> +}
> +
> +/* Set of subroutines to build Netlink message. */ static struct nlattr
> +* nl_msg_tail(struct nlmsghdr *nlh) {
> +     return (struct nlattr *)
> +             (((uint8_t *)nlh) + NLMSG_ALIGN(nlh->nlmsg_len)); }
> +
> +static void
> +nl_attr_put(struct nlmsghdr *nlh, int type, const void *data, int alen)
> +{
> +     struct nlattr *nla = nl_msg_tail(nlh);
> +
> +     nla->nla_type = type;
> +     nla->nla_len = NLMSG_ALIGN(sizeof(struct nlattr) + alen);
> +     nlh->nlmsg_len = NLMSG_ALIGN(nlh->nlmsg_len) + nla->nla_len;
> +
> +     if (alen)
> +             memcpy((uint8_t *)nla + sizeof(struct nlattr), data, alen); }
> +
> +static struct nlattr *
> +nl_attr_nest_start(struct nlmsghdr *nlh, int type) {
> +     struct nlattr *nest = (struct nlattr *)nl_msg_tail(nlh);
> +
> +     nl_attr_put(nlh, type, NULL, 0);
> +     return nest;
> +}
> +
> +static void
> +nl_attr_nest_end(struct nlmsghdr *nlh, struct nlattr *nest) {
> +     nest->nla_len = (uint8_t *)nl_msg_tail(nlh) - (uint8_t *)nest; }
> +
> +/*
> + * Create network VLAN device with specified VLAN tag.
> + *
> + * @param[in] tcf
> + *   Context object initialized by mlx5_vlan_esxi_init().
> + * @param[in] ifindex
> + *   Base network interface index.
> + * @param[in] tag
> + *   VLAN tag for VLAN network device to create.
> + */
> +static uint32_t
> +mlx5_vlan_esxi_create(struct mlx5_vlan_esxi_context *esxi,
> +                   uint32_t ifindex,
> +                   uint16_t tag)
> +{
> +     struct nlmsghdr *nlh;
> +     struct ifinfomsg *ifm;
> +     char name[sizeof(MLX5_ESXI_VLAN_DEVICE_PFX) + 32];
> +
> +     alignas(RTE_CACHE_LINE_SIZE)
> +     uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
> +                 NLMSG_ALIGN(sizeof(struct ifinfomsg)) +
> +                 NLMSG_ALIGN(sizeof(struct nlattr)) * 8 +
> +                 NLMSG_ALIGN(sizeof(uint32_t)) +
> +                 NLMSG_ALIGN(sizeof(name)) +
> +                 NLMSG_ALIGN(sizeof("vlan")) +
> +                 NLMSG_ALIGN(sizeof(uint32_t)) +
> +                 NLMSG_ALIGN(sizeof(uint16_t)) + 16];
> +     struct nlattr *na_info;
> +     struct nlattr *na_vlan;
> +     int ret;
> +
> +     memset(buf, 0, sizeof(buf));
> +     ++esxi->nl_sn;
> +     if (!esxi->nl_sn)
> +             ++esxi->nl_sn;
> +     nlh = (struct nlmsghdr *)buf;
> +     nlh->nlmsg_len = sizeof(struct nlmsghdr);
> +     nlh->nlmsg_type = RTM_NEWLINK;
> +     nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE |
> +                        NLM_F_EXCL | NLM_F_ACK;
> +     ifm = (struct ifinfomsg *)nl_msg_tail(nlh);
> +     nlh->nlmsg_len += sizeof(struct ifinfomsg);
> +     ifm->ifi_family = AF_UNSPEC;
> +     ifm->ifi_type = 0;
> +     ifm->ifi_index = 0;
> +     ifm->ifi_flags = IFF_UP;
> +     ifm->ifi_change = 0xffffffff;
> +     nl_attr_put(nlh, IFLA_LINK, &ifindex, sizeof(ifindex));
> +     ret = snprintf(name, sizeof(name), "%s.%u.%u",
> +                    MLX5_ESXI_VLAN_DEVICE_PFX, ifindex, tag);
> +     nl_attr_put(nlh, IFLA_IFNAME, name, ret + 1);
> +     na_info = nl_attr_nest_start(nlh, IFLA_LINKINFO);
> +     nl_attr_put(nlh, IFLA_INFO_KIND, "vlan", sizeof("vlan"));
> +     na_vlan = nl_attr_nest_start(nlh, IFLA_INFO_DATA);
> +     nl_attr_put(nlh, IFLA_VLAN_ID, &tag, sizeof(tag));
> +     nl_attr_nest_end(nlh, na_vlan);
> +     nl_attr_nest_end(nlh, na_info);
> +     assert(sizeof(buf) >= nlh->nlmsg_len);
> +     ret = mlx5_nl_send(esxi->nl_socket, nlh, esxi->nl_sn);
> +     if (ret >= 0)
> +             ret = mlx5_nl_recv(esxi->nl_socket, esxi->nl_sn, NULL,
> NULL);
> +     if (ret < 0) {
> +             DRV_LOG(WARNING,
> +                     "netlink: VLAN %s create failure (%d)",
> +                     name, ret);
> +     }
> +     // Try to get ifindex of created or pre-existing device.
> +     ret = if_nametoindex(name);
> +     if (!ret) {
> +             DRV_LOG(WARNING,
> +                     "VLAN %s failed to get index (%d)",
> +                     name, errno);
> +             return 0;
> +     }
> +     return ret;
> +}
> +
> +/*
> + * Release VLAN network device, created for ESXi workaround.
> + *
> + * @param[in] dev
> + *   Ethernet device object, Netlink context provider.
> + * @param[in] vlan
> + *   Object representing the network device to release.
> + */
> +void mlx5_vlan_esxi_release(struct rte_eth_dev *dev,
> +                         struct mlx5_vf_vlan *vlan)
> +{
> +     struct mlx5_priv *priv = dev->data->dev_private;
> +     struct mlx5_vlan_esxi_context *esxi = priv->esxi_context;
> +     struct mlx5_vlan_dev *vlan_dev = &esxi->vlan_dev[0];
> +
> +     assert(vlan->created);
> +     assert(priv->esxi_context);
> +     if (!vlan->created || !esxi)
> +             return;
> +     vlan->created = 0;
> +     assert(vlan_dev[vlan->tag].refcnt);
> +     if (--vlan_dev[vlan->tag].refcnt == 0 &&
> +         vlan_dev[vlan->tag].ifindex) {
> +             mlx5_vlan_esxi_delete(esxi, vlan_dev[vlan->tag].ifindex);
> +             vlan_dev[vlan->tag].ifindex = 0;
> +     }
> +}
> +
> +/**
> + * Acquire VLAN interface with specified tag for ESXi workaround.
> + *
> + * @param[in] dev
> + *   Ethernet device object, Netlink context provider.
> + * @param[in] vlan
> + *   Object representing the network device to acquire.
> + */
> +void mlx5_vlan_esxi_acquire(struct rte_eth_dev *dev,
> +                         struct mlx5_vf_vlan *vlan)
> +{
> +     struct mlx5_priv *priv = dev->data->dev_private;
> +     struct mlx5_vlan_esxi_context *esxi = priv->esxi_context;
> +     struct mlx5_vlan_dev *vlan_dev = &esxi->vlan_dev[0];
> +
> +     assert(!vlan->created);
> +     assert(priv->esxi_context);
> +     if (vlan->created || !esxi)
> +             return;
> +     if (vlan_dev[vlan->tag].refcnt == 0) {
> +             assert(!vlan_dev[vlan->tag].ifindex);
> +             vlan_dev[vlan->tag].ifindex =
> +                     mlx5_vlan_esxi_create(esxi,
> +                                           esxi->vf_ifindex,
> +                                           vlan->tag);
> +     }
> +     if (vlan_dev[vlan->tag].ifindex) {
> +             vlan_dev[vlan->tag].refcnt++;
> +             vlan->created = 1;
> +     }
> +}
> +
> +/*
> + * Create per ethernet device VLAN ESXi workaround context  */ struct
> +mlx5_vlan_esxi_context * mlx5_vlan_esxi_init(struct rte_eth_dev *dev,
> +                 uint32_t ifindex)
> +{
> +     struct mlx5_priv *priv = dev->data->dev_private;
> +     struct mlx5_dev_config *config = &priv->config;
> +     struct mlx5_vlan_esxi_context *esxi;
> +
> +     /* Do not engage workaround over PF. */
> +     if (!config->vf)
> +             return NULL;
> +     /* Check whether there is virtual environment */
> +     if (rte_hypervisor_get() == RTE_HYPERVISOR_NONE)
> +             return NULL;
> +     esxi = rte_zmalloc(__func__, sizeof(*esxi), sizeof(uint32_t));
> +     if (!esxi) {
> +             DRV_LOG(WARNING,
> +                     "Can not allocate memory"
> +                     " for ESXi VLAN context");
> +             return NULL;
> +     }
> +     esxi->nl_socket = mlx5_nl_init(NETLINK_ROUTE);
> +     if (esxi->nl_socket < 0) {
> +             DRV_LOG(WARNING,
> +                     "Can not create Netlink socket"
> +                     " for ESXi VLAN context");
> +             rte_free(esxi);
> +             return NULL;
> +     }
> +     esxi->nl_sn = random();
> +     esxi->vf_ifindex = ifindex;
> +     esxi->dev = dev;
> +     /* Cleanup for existing VLAN devices. */
> +     return esxi;
> +}
> +
> +/*
> + * Destroy per ethernet device VLAN ESXi workaround context  */ void
> +mlx5_vlan_esxi_exit(struct mlx5_vlan_esxi_context *esxi) {
> +     unsigned int i;
> +
> +     /* Delete all remaining VLAN devices. */
> +     for (i = 0; i < RTE_DIM(esxi->vlan_dev); i++) {
> +             if (esxi->vlan_dev[i].ifindex)
> +                     mlx5_vlan_esxi_delete(esxi, esxi-
> >vlan_dev[i].ifindex);
> +     }
> +     if (esxi->nl_socket >= 0)
> +             close(esxi->nl_socket);
> +     rte_free(esxi);
> +}
> --
> 1.8.3.1

Reply via email to