> -----Original Message-----
> From: Dekel Peled <dek...@mellanox.com>
> Sent: Wednesday, October 24, 2018 2:08 PM
> To: Yongseok Koh <ys...@mellanox.com>; Shahaf Shuler
> <shah...@mellanox.com>
> Cc: dev@dpdk.org; Ori Kam <or...@mellanox.com>
> Subject: [PATCH 1/2] net/mlx5: add VXLAN encap decap to Direct Verbs
> 
> This patch adds support for VXLAN encap and decap operations, in
> Direct Verbs flow.
> 
> Signed-off-by: Dekel Peled <dek...@mellanox.com>
> ---
>  drivers/net/mlx5/mlx5_flow.h    |   4 +
>  drivers/net/mlx5/mlx5_flow_dv.c | 409
> +++++++++++++++++++++++++++++++++++++++-
>  2 files changed, 408 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
> index 38635c9..9c28e50 100644
> --- a/drivers/net/mlx5/mlx5_flow.h
> +++ b/drivers/net/mlx5/mlx5_flow.h
> @@ -92,6 +92,8 @@
>  #define MLX5_FLOW_ACTION_DEC_TTL (1u << 19)
>  #define MLX5_FLOW_ACTION_SET_MAC_SRC (1u << 20)
>  #define MLX5_FLOW_ACTION_SET_MAC_DST (1u << 21)
> +#define MLX5_FLOW_ACTION_VXLAN_ENCAP (1u << 22)
> +#define MLX5_FLOW_ACTION_VXLAN_DECAP (1u << 23)
> 
>  #define MLX5_FLOW_FATE_ACTIONS \
>       (MLX5_FLOW_ACTION_DROP | MLX5_FLOW_ACTION_QUEUE |
> MLX5_FLOW_ACTION_RSS)
> @@ -181,6 +183,8 @@ struct mlx5_flow_dv {
>  #ifdef HAVE_IBV_FLOW_DV_SUPPORT
>       struct mlx5dv_flow_action_attr
> actions[MLX5_DV_MAX_NUMBER_OF_ACTIONS];
>       /**< Action list. */
> +     struct ibv_flow_action *encap_verb; /**< Verbs object of encap. */
> +     struct ibv_flow_action *decap_verb; /**< Verbs object of decap. */

Why do we need encap and decap?

>  #endif
>       int actions_n; /**< number of actions. */
>  };
> diff --git a/drivers/net/mlx5/mlx5_flow_dv.c
> b/drivers/net/mlx5/mlx5_flow_dv.c
> index e8f409f..06ecabf 100644
> --- a/drivers/net/mlx5/mlx5_flow_dv.c
> +++ b/drivers/net/mlx5/mlx5_flow_dv.c
> @@ -35,6 +35,16 @@
> 
>  #ifdef HAVE_IBV_FLOW_DV_SUPPORT
> 
> +#define MLX5_UDP     17
> +#define MLX5_TCP     6
> +#define MLX5_GRE     47

Please use already created defines. (IPPROTO_TCP)

> +
> +/*
> + * Encap buf length, max:
> + *   Eth:14/VLAN:8/IPv6:40/TCP:36/TUNNEL:20/Eth:14
> + */
> +#define MLX5_ENCAP_LEN 132
> +
>  /**
>   * Validate META item.
>   *
> @@ -97,6 +107,331 @@
>  }
> 
>  /**
> + * Validate the vxlan encap action.
> + *
> + * @param[in] action_flags
> + *   Holds the actions detected until now.
> + * @param[in] action
> + *   Pointer to the encap action.
> + * @param[in] attr
> + *   Pointer to flow attributes
> + * @param[out] error
> + *   Pointer to error structure.
> + *
> + * @return
> + *   0 on success, a negative errno value otherwise and rte_errno is set.
> + */
> +static int
> +flow_dv_validate_action_vxlan_encap(uint64_t action_flags,
> +                                 const struct rte_flow_action *action,
> +                                 const struct rte_flow_attr *attr,
> +                                 struct rte_flow_error *error)
> +{
> +     const struct rte_flow_action_vxlan_encap *vxlan_encap = action->conf;
> +
> +     if (!vxlan_encap)
> +             return rte_flow_error_set(error, EINVAL,
> +                                       RTE_FLOW_ERROR_TYPE_ACTION,
> action,
> +                                       "configuration cannot be null");
> +     if (action_flags & MLX5_FLOW_ACTION_DROP)
> +             return rte_flow_error_set(error, EINVAL,
> +                                       RTE_FLOW_ERROR_TYPE_ACTION,
> NULL,
> +                                       "can't drop and encap in same flow");
> +     if (action_flags & MLX5_FLOW_ACTION_VXLAN_ENCAP)
> +             return rte_flow_error_set(error, EINVAL,
> +                                       RTE_FLOW_ERROR_TYPE_ACTION,
> NULL,
> +                                       "can't have 2 encap actions in same"
> +                                       " flow");
> +     if (attr->ingress)
> +             return rte_flow_error_set(error, ENOTSUP,
> +
> RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
> +                                       NULL,
> +                                       "encap action not supported for "
> +                                       "ingress");
> +     return 0;
> +}
> +
> +/**
> + * Validate the vxlan decap action.
> + *
> + * @param[in] action_flags
> + *   Holds the actions detected until now.
> + * @param[in] action
> + *   Pointer to the decap action.
> + * @param[in] attr
> + *   Pointer to flow attributes
> + * @param[out] error
> + *   Pointer to error structure.
> + *
> + * @return
> + *   0 on success, a negative errno value otherwise and rte_errno is set.
> + */
> +static int
> +flow_dv_validate_action_vxlan_decap(uint64_t action_flags,
> +                     const struct rte_flow_action *action __rte_unused,
> +                     const struct rte_flow_attr *attr,
> +                     struct rte_flow_error *error)
> +{
> +     if (action_flags & MLX5_FLOW_ACTION_DROP)
> +             return rte_flow_error_set(error, EINVAL,
> +                                       RTE_FLOW_ERROR_TYPE_ACTION,
> NULL,
> +                                       "can't drop and decap in same flow");
> +     if (action_flags & MLX5_FLOW_ACTION_VXLAN_ENCAP)
> +             return rte_flow_error_set(error, EINVAL,
> +                                       RTE_FLOW_ERROR_TYPE_ACTION,
> NULL,
> +                                       "can't encap and decap in same
> flow");
> +     if (attr->egress)
> +             return rte_flow_error_set(error, ENOTSUP,
> +
> RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
> +                                       NULL,
> +                                       "decap action not supported for "
> +                                       "egress");

Why not EINVAL?

> +     return 0;
> +}
> +
> +static size_t item_len[] = {
> +     [RTE_FLOW_ITEM_TYPE_VOID] = 0,
> +     [RTE_FLOW_ITEM_TYPE_ETH] = sizeof(struct rte_flow_item_eth),
> +     [RTE_FLOW_ITEM_TYPE_VLAN] = sizeof(struct rte_flow_item_vlan),
> +     [RTE_FLOW_ITEM_TYPE_IPV4] = sizeof(struct rte_flow_item_ipv4),
> +     [RTE_FLOW_ITEM_TYPE_IPV6] = sizeof(struct rte_flow_item_ipv6),
> +     [RTE_FLOW_ITEM_TYPE_UDP] = sizeof(struct rte_flow_item_udp),
> +     [RTE_FLOW_ITEM_TYPE_TCP] = sizeof(struct rte_flow_item_tcp),
> +     [RTE_FLOW_ITEM_TYPE_VXLAN] = sizeof(struct rte_flow_item_vxlan),
> +     [RTE_FLOW_ITEM_TYPE_GRE] = sizeof(struct rte_flow_item_gre),
> +     [RTE_FLOW_ITEM_TYPE_NVGRE] = sizeof(struct rte_flow_item_gre),
> +     [RTE_FLOW_ITEM_TYPE_VXLAN_GPE] = sizeof(struct
> rte_flow_item_vxlan_gpe),
> +     [RTE_FLOW_ITEM_TYPE_MPLS] = sizeof(struct rte_flow_item_mpls),
> +};
> +
> +/**
> + * Convert the encap action data from rte_flow_item to raw buffer
> + *
> + * @param[in] item
> + *   Pointer to rte_flow_item object.
> + * @param[out] buf
> + *   Pointer to the output buffer.
> + * @param[out] size
> + *   Pointer to the output buffer size.
> + * @param[out] error
> + *   Pointer to the error structure.
> + * @param[in] l3_type
> + *   ???.

What is ???

> + *
> + * @return
> + *   0 on success, a negative errno value otherwise and rte_errno is set.
> + */
> +static int
> +flow_dv_convert_encap(struct rte_flow_item *item, uint8_t *buf, size_t *size,
> +                   struct rte_flow_error *error, uint16_t l3_type)

What is the l3_type used for?

> +{
> +     struct ether_hdr *eth = NULL;
> +     struct vlan_hdr *vlan = NULL;
> +     struct ipv4_hdr *ipv4 = NULL;
> +     struct ipv6_hdr *ipv6 = NULL;
> +     struct udp_hdr *udp = NULL;
> +     struct vxlan_hdr *vxlan = NULL;
> +     const struct rte_flow_item_vlan *vlan_spec;

Why vlan has dedicated variable?

> +     size_t len;
> +
> +     assert(item);

Why assert on the item? It should be valid and if not return error.

> +     *size = 0;

Why not use temp_size and avoid memory access?

> +     for (; item->type != RTE_FLOW_ITEM_TYPE_END; item++) {
> +             /* TODO: variable length handling: raw, vxlan and nvgre. */

Why TODO?

> +             len = item_len[item->type];

This can result in crash if type is not the one you expected.

> +             if (len + *size > MLX5_ENCAP_LEN)
> +                     return rte_flow_error_set(error, EINVAL,
> +
> RTE_FLOW_ERROR_TYPE_ACTION,
> +                                               (void *)item->type,
> +                                               "invalid item length");
> +             rte_memcpy((void *)&buf[*size], item->spec, len);
> +             switch (item->type) {
> +             case RTE_FLOW_ITEM_TYPE_ETH:
> +                     eth = (void *)&buf[*size];

Why cast to void? Same for all others.

> +                     break;
> +             case RTE_FLOW_ITEM_TYPE_VLAN:
> +                     vlan_spec = item->spec;
> +                     vlan = (void *)&buf[*size];
> +                     if (!eth)
> +                             return rte_flow_error_set(error, EINVAL,
> +
>       RTE_FLOW_ERROR_TYPE_ACTION,
> +                                             (void *)item->type,
> +                                             "eth header not found");
> +                     vlan->vlan_tci = vlan_spec->tci;
> +                     vlan->eth_proto = vlan_spec->inner_type;
> +                     if (!eth->ether_type)
> +                             eth->ether_type = htons(ETHER_TYPE_VLAN);

Why not use rte_cpu_to_be? Same for all.

> +                     break;
> +             case RTE_FLOW_ITEM_TYPE_IPV4:
> +                     ipv4 = (void *)&buf[*size];
> +                     if (!vlan && !eth)
> +                             return rte_flow_error_set(error, EINVAL,
> +
>       RTE_FLOW_ERROR_TYPE_ACTION,
> +                                             (void *)item->type,
> +                                             "neither eth nor vlan header
> found");
> +                     if (vlan && !vlan->eth_proto)
> +                             vlan->eth_proto = htons(ETHER_TYPE_IPv4);
> +                     else if (eth && !eth->ether_type)
> +                             eth->ether_type = htons(ETHER_TYPE_IPv4);
> +                     if (!ipv4->version_ihl)
> +                             ipv4->version_ihl = 0x45;
> +                     if (!ipv4->time_to_live)
> +                             ipv4->time_to_live = 0x40;
> +                     break;
> +             case RTE_FLOW_ITEM_TYPE_IPV6:
> +                     ipv6 = (void *)&buf[*size];
> +                     if (!vlan && !eth)
> +                             return rte_flow_error_set(error, EINVAL,
> +
>       RTE_FLOW_ERROR_TYPE_ACTION,
> +                                             (void *)item->type,
> +                                             "neither eth nor vlan header
> found");
> +                     if (vlan && !vlan->eth_proto)
> +                             vlan->eth_proto = htons(ETHER_TYPE_IPv6);
> +                     else if (eth && !eth->ether_type)
> +                             eth->ether_type = htons(ETHER_TYPE_IPv6);
> +                     if (!ipv6->vtc_flow)
> +                             ipv6->vtc_flow = htonl(0x60000000);
> +                     if (!ipv6->hop_limits)
> +                             ipv6->hop_limits = 0xff;
> +                     break;
> +             case RTE_FLOW_ITEM_TYPE_UDP:
> +                     udp = (void *)&buf[*size];
> +                     if (!ipv4 && !ipv6)
> +                             return rte_flow_error_set(error, EINVAL,
> +
>       RTE_FLOW_ERROR_TYPE_ACTION,
> +                                             (void *)item->type,
> +                                             "ip header not found");
> +                     if (ipv4 && !ipv4->next_proto_id)
> +                             ipv4->next_proto_id = MLX5_UDP;
> +                     else if (ipv6 && !ipv6->proto)
> +                             ipv6->proto = MLX5_UDP;
> +                     break;
> +             case RTE_FLOW_ITEM_TYPE_VXLAN:
> +                     vxlan = (void *)&buf[*size];
> +                     if (!udp)
> +                             return rte_flow_error_set(error, EINVAL,
> +
>       RTE_FLOW_ERROR_TYPE_ACTION,
> +                                             (void *)item->type,
> +                                             "udp header not found");
> +                     if (!udp->dst_port)
> +                             udp->dst_port =
> htons(MLX5_UDP_PORT_VXLAN);
> +                     if (!vxlan->vx_flags)
> +                             vxlan->vx_flags = htonl(0x08000000);
> +                     break;
> +             case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
> +                     vxlan = (void *)&buf[*size];
> +                     if (!udp)
> +                             return rte_flow_error_set(error, EINVAL,
> +
>       RTE_FLOW_ERROR_TYPE_ACTION,
> +                                             (void *)item->type,
> +                                             "udp header not found");
> +                     if (!udp->dst_port)
> +                             udp->dst_port =
> htons(MLX5_UDP_PORT_VXLAN_GPE);
> +                     if (!vxlan->vx_flags)
> +                             vxlan->vx_flags = htonl(0x0c000003);
> +                     break;
> +             case RTE_FLOW_ITEM_TYPE_GRE:
> +             case RTE_FLOW_ITEM_TYPE_NVGRE:
> +                     if (!ipv4 && !ipv6)
> +                             return rte_flow_error_set(error, EINVAL,
> +
>       RTE_FLOW_ERROR_TYPE_ACTION,
> +                                             (void *)item->type,
> +                                             "ip header not found");
> +                     if (ipv4 && !ipv4->next_proto_id)
> +                             ipv4->next_proto_id = htons(MLX5_GRE);
> +                     else if (ipv6 && !ipv6->proto)
> +                             ipv6->proto = htons(MLX5_GRE);
> +                     break;
> +             case RTE_FLOW_ITEM_TYPE_VOID:
> +                     break;
> +             default:
> +                     return rte_flow_error_set(error, EINVAL,
> +                                     RTE_FLOW_ERROR_TYPE_ACTION,
> +                                     (void *)item->type,
> +                                     "unsupported item type");
> +                     break;
> +             }
> +             *size += len;
> +     }
> +     if (l3_type && vlan)
> +             vlan->eth_proto = htons(l3_type);
> +     else if (l3_type && eth)
> +             eth->ether_type = htons(l3_type);
> +     return 0;
> +}
> +
> +/**
> + * Convert VXLAN encap action to DV specification.
> + *
> + * @param[in] dev
> + *   Pointer to rte_eth_dev structure.
> + * @param[in] action
> + *   Pointer to action structure.
> + * @param[out] error
> + *   Pointer to the error structure.
> + *
> + * @return
> + *   Pointer to action on success, NULL otherwise and rte_errno is set.
> + */
> +static struct ibv_flow_action *
> +flow_dv_create_vxlan_encap(struct rte_eth_dev *dev,
> +                        const struct rte_flow_action *action,
> +                        struct rte_flow_error *error)
> +{
> +     struct ibv_flow_action *encap_verb = NULL;
> +     const struct rte_flow_action_vxlan_encap *encap_data;
> +     struct priv *priv = dev->data->dev_private;
> +     uint8_t buf[MLX5_ENCAP_LEN];
> +     size_t size = 0;
> +     int convert_result;
> +
> +     encap_data = (const struct rte_flow_action_vxlan_encap *)action-
> >conf;
> +     convert_result = flow_dv_convert_encap(encap_data->definition,
> +                                            buf, &size, error, 0);
> +     if (convert_result)
> +             return NULL;
> +     encap_verb = mlx5_glue->dv_create_flow_action_packet_reformat
> +             (priv->ctx, size, (size ? buf : NULL),
> +
> MLX5DV_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL,
> +              MLX5DV_FLOW_TABLE_TYPE_NIC_TX);
> +     if (!encap_verb)
> +             rte_flow_error_set(error, EINVAL,
> RTE_FLOW_ERROR_TYPE_ACTION,
> +                                NULL, "cannot create vxlan encap action");
> +     return encap_verb;
> +}
> +
> +/**
> + * Convert VXLAN decap action to DV specification.
> + *
> + * @param[in] dev
> + *   Pointer to rte_eth_dev structure.
> + * @param[in] action
> + *   Pointer to action structure.
> + * @param[out] error
> + *   Pointer to the error structure.
> + *
> + * @return
> + *   Pointer to action on success, NULL otherwise and rte_errno is set.
> + */
> +static struct ibv_flow_action *
> +flow_dv_create_vxlan_decap(struct rte_eth_dev *dev,
> +                        const struct rte_flow_action *action __rte_unused,
> +                        struct rte_flow_error *error)
> +{
> +     struct ibv_flow_action *decap_verb = NULL;
> +     struct priv *priv = dev->data->dev_private;
> +
> +     decap_verb = mlx5_glue->dv_create_flow_action_packet_reformat
> +             (priv->ctx, 0, NULL,
> +
> MLX5DV_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2,
> +              MLX5DV_FLOW_TABLE_TYPE_NIC_RX);
> +     if (!decap_verb)
> +             rte_flow_error_set(error, EINVAL,
> RTE_FLOW_ERROR_TYPE_ACTION,
> +                                NULL, "cannot create decap action");
> +     return decap_verb;
> +}
> +
> +/**
>   * Verify the @p attributes will be correctly understood by the NIC and store
>   * them in the @p flow if everything is correct.
>   *
> @@ -347,6 +682,24 @@
>                       action_flags |= MLX5_FLOW_ACTION_COUNT;
>                       ++actions_n;
>                       break;
> +             case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
> +                     ret =
> flow_dv_validate_action_vxlan_encap(action_flags,
> +                                                               actions, attr,
> +                                                               error);
> +                     if (ret < 0)
> +                             return ret;
> +                     action_flags |= MLX5_FLOW_ACTION_VXLAN_ENCAP;
> +                     ++actions_n;
> +                     break;
> +             case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP:
> +                     ret =
> flow_dv_validate_action_vxlan_decap(action_flags,
> +                                                               actions, attr,
> +                                                               error);
> +                     if (ret < 0)
> +                             return ret;
> +                     action_flags |= MLX5_FLOW_ACTION_VXLAN_DECAP;
> +                     ++actions_n;
> +                     break;
>               default:
>                       return rte_flow_error_set(error, ENOTSUP,
> 
> RTE_FLOW_ERROR_TYPE_ACTION,
> @@ -1056,14 +1409,23 @@
>  /**
>   * Store the requested actions in an array.
>   *
> + * @param[in] dev
> + *   Pointer to rte_eth_dev structure.
>   * @param[in] action
>   *   Flow action to translate.
>   * @param[in, out] dev_flow
>   *   Pointer to the mlx5_flow.
> + * @param[out] error
> + *   Pointer to the error structure.
> + *
> + * @return
> + *   0 on success, a negative errno value otherwise and rte_errno is set.
>   */
> -static void
> -flow_dv_create_action(const struct rte_flow_action *action,
> -                   struct mlx5_flow *dev_flow)
> +static int
> +flow_dv_create_action(struct rte_eth_dev *dev,
> +                   const struct rte_flow_action *action,
> +                   struct mlx5_flow *dev_flow,
> +                   struct rte_flow_error *error)
>  {
>       const struct rte_flow_action_queue *queue;
>       const struct rte_flow_action_rss *rss;
> @@ -1110,10 +1472,35 @@
>               /* Added to array only in apply since we need the QP */
>               flow->actions |= MLX5_FLOW_ACTION_RSS;
>               break;
> +     case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
> +             dev_flow->dv.actions[actions_n].type =
> +                     MLX5DV_FLOW_ACTION_IBV_FLOW_ACTION;
> +             dev_flow->dv.actions[actions_n].action =
> +                             flow_dv_create_vxlan_encap(dev, action,
> error);
> +             if (!(dev_flow->dv.actions[actions_n].action))
> +                     return -rte_errno;
> +             dev_flow->dv.encap_verb =
> +                     dev_flow->dv.actions[actions_n].action;
> +             flow->actions |= MLX5_FLOW_ACTION_VXLAN_ENCAP;
> +             actions_n++;
> +             break;
> +     case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP:
> +             dev_flow->dv.actions[actions_n].type =
> +                     MLX5DV_FLOW_ACTION_IBV_FLOW_ACTION;
> +             dev_flow->dv.actions[actions_n].action =
> +                             flow_dv_create_vxlan_decap(dev, action,
> error);
> +             if (!(dev_flow->dv.actions[actions_n].action))
> +                     return -rte_errno;
> +             dev_flow->dv.decap_verb =
> +                     dev_flow->dv.actions[actions_n].action;
> +             flow->actions |= MLX5_FLOW_ACTION_VXLAN_DECAP;
> +             actions_n++;
> +             break;
>       default:
>               break;
>       }
>       dev_flow->dv.actions_n = actions_n;
> +     return 0;
>  }
> 
>  static uint32_t matcher_zero[MLX5_ST_SZ_DW(fte_match_param)] = { 0 };
> @@ -1279,8 +1666,10 @@
>       matcher.egress = attr->egress;
>       if (flow_dv_matcher_register(dev, &matcher, dev_flow, error))
>               return -rte_errno;
> -     for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++)
> -             flow_dv_create_action(actions, dev_flow);
> +     for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
> +             if (flow_dv_create_action(dev, actions, dev_flow, error))
> +                     return -rte_errno;
> +     }
>       return 0;
>  }
> 
> @@ -1465,6 +1854,16 @@
>               LIST_REMOVE(dev_flow, next);
>               if (dev_flow->dv.matcher)
>                       flow_dv_matcher_release(dev, dev_flow);
> +             if (dev_flow->dv.encap_verb) {
> +                     claim_zero(mlx5_glue->destroy_flow_action
> +                                             (dev_flow->dv.encap_verb));
> +                     dev_flow->dv.encap_verb = NULL;
> +             }
> +             if (dev_flow->dv.decap_verb) {
> +                     claim_zero(mlx5_glue->destroy_flow_action
> +                                             (dev_flow->dv.decap_verb));
> +                     dev_flow->dv.decap_verb = NULL;
> +             }
>               rte_free(dev_flow);
>       }
>  }
> --
> 1.8.3.1


Thanks,
Ori

Reply via email to