Thanks. I deferred the series, will send revised series later.
> -----Original Message----- > From: Ori Kam > Sent: Wednesday, October 24, 2018 3:59 PM > To: Dekel Peled <dek...@mellanox.com>; Yongseok Koh > <ys...@mellanox.com>; Shahaf Shuler <shah...@mellanox.com> > Cc: dev@dpdk.org > Subject: RE: [PATCH 1/2] net/mlx5: add VXLAN encap decap to Direct Verbs > > > > > -----Original Message----- > > From: Dekel Peled <dek...@mellanox.com> > > Sent: Wednesday, October 24, 2018 2:08 PM > > To: Yongseok Koh <ys...@mellanox.com>; Shahaf Shuler > > <shah...@mellanox.com> > > Cc: dev@dpdk.org; Ori Kam <or...@mellanox.com> > > Subject: [PATCH 1/2] net/mlx5: add VXLAN encap decap to Direct Verbs > > > > This patch adds support for VXLAN encap and decap operations, in > > Direct Verbs flow. > > > > Signed-off-by: Dekel Peled <dek...@mellanox.com> > > --- > > drivers/net/mlx5/mlx5_flow.h | 4 + > > drivers/net/mlx5/mlx5_flow_dv.c | 409 > > +++++++++++++++++++++++++++++++++++++++- > > 2 files changed, 408 insertions(+), 5 deletions(-) > > > > diff --git a/drivers/net/mlx5/mlx5_flow.h > > b/drivers/net/mlx5/mlx5_flow.h index 38635c9..9c28e50 100644 > > --- a/drivers/net/mlx5/mlx5_flow.h > > +++ b/drivers/net/mlx5/mlx5_flow.h > > @@ -92,6 +92,8 @@ > > #define MLX5_FLOW_ACTION_DEC_TTL (1u << 19) #define > > MLX5_FLOW_ACTION_SET_MAC_SRC (1u << 20) #define > > MLX5_FLOW_ACTION_SET_MAC_DST (1u << 21) > > +#define MLX5_FLOW_ACTION_VXLAN_ENCAP (1u << 22) #define > > +MLX5_FLOW_ACTION_VXLAN_DECAP (1u << 23) > > > > #define MLX5_FLOW_FATE_ACTIONS \ > > (MLX5_FLOW_ACTION_DROP | MLX5_FLOW_ACTION_QUEUE | > > MLX5_FLOW_ACTION_RSS) > > @@ -181,6 +183,8 @@ struct mlx5_flow_dv { #ifdef > > HAVE_IBV_FLOW_DV_SUPPORT > > struct mlx5dv_flow_action_attr > > actions[MLX5_DV_MAX_NUMBER_OF_ACTIONS]; > > /**< Action list. */ > > + struct ibv_flow_action *encap_verb; /**< Verbs object of encap. */ > > + struct ibv_flow_action *decap_verb; /**< Verbs object of decap. */ > > Why do we need encap and decap? > > > #endif > > int actions_n; /**< number of actions. */ }; diff --git > > a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c > > index e8f409f..06ecabf 100644 > > --- a/drivers/net/mlx5/mlx5_flow_dv.c > > +++ b/drivers/net/mlx5/mlx5_flow_dv.c > > @@ -35,6 +35,16 @@ > > > > #ifdef HAVE_IBV_FLOW_DV_SUPPORT > > > > +#define MLX5_UDP 17 > > +#define MLX5_TCP 6 > > +#define MLX5_GRE 47 > > Please use already created defines. (IPPROTO_TCP) > > > + > > +/* > > + * Encap buf length, max: > > + * Eth:14/VLAN:8/IPv6:40/TCP:36/TUNNEL:20/Eth:14 > > + */ > > +#define MLX5_ENCAP_LEN 132 > > + > > /** > > * Validate META item. > > * > > @@ -97,6 +107,331 @@ > > } > > > > /** > > + * Validate the vxlan encap action. > > + * > > + * @param[in] action_flags > > + * Holds the actions detected until now. > > + * @param[in] action > > + * Pointer to the encap action. > > + * @param[in] attr > > + * Pointer to flow attributes > > + * @param[out] error > > + * Pointer to error structure. > > + * > > + * @return > > + * 0 on success, a negative errno value otherwise and rte_errno is set. > > + */ > > +static int > > +flow_dv_validate_action_vxlan_encap(uint64_t action_flags, > > + const struct rte_flow_action *action, > > + const struct rte_flow_attr *attr, > > + struct rte_flow_error *error) { > > + const struct rte_flow_action_vxlan_encap *vxlan_encap = > > +action->conf; > > + > > + if (!vxlan_encap) > > + return rte_flow_error_set(error, EINVAL, > > + RTE_FLOW_ERROR_TYPE_ACTION, > > action, > > + "configuration cannot be null"); > > + if (action_flags & MLX5_FLOW_ACTION_DROP) > > + return rte_flow_error_set(error, EINVAL, > > + RTE_FLOW_ERROR_TYPE_ACTION, > > NULL, > > + "can't drop and encap in same > flow"); > > + if (action_flags & MLX5_FLOW_ACTION_VXLAN_ENCAP) > > + return rte_flow_error_set(error, EINVAL, > > + RTE_FLOW_ERROR_TYPE_ACTION, > > NULL, > > + "can't have 2 encap actions in same" > > + " flow"); > > + if (attr->ingress) > > + return rte_flow_error_set(error, ENOTSUP, > > + > > RTE_FLOW_ERROR_TYPE_ATTR_INGRESS, > > + NULL, > > + "encap action not supported for " > > + "ingress"); > > + return 0; > > +} > > + > > +/** > > + * Validate the vxlan decap action. > > + * > > + * @param[in] action_flags > > + * Holds the actions detected until now. > > + * @param[in] action > > + * Pointer to the decap action. > > + * @param[in] attr > > + * Pointer to flow attributes > > + * @param[out] error > > + * Pointer to error structure. > > + * > > + * @return > > + * 0 on success, a negative errno value otherwise and rte_errno is set. > > + */ > > +static int > > +flow_dv_validate_action_vxlan_decap(uint64_t action_flags, > > + const struct rte_flow_action *action __rte_unused, > > + const struct rte_flow_attr *attr, > > + struct rte_flow_error *error) > > +{ > > + if (action_flags & MLX5_FLOW_ACTION_DROP) > > + return rte_flow_error_set(error, EINVAL, > > + RTE_FLOW_ERROR_TYPE_ACTION, > > NULL, > > + "can't drop and decap in same > flow"); > > + if (action_flags & MLX5_FLOW_ACTION_VXLAN_ENCAP) > > + return rte_flow_error_set(error, EINVAL, > > + RTE_FLOW_ERROR_TYPE_ACTION, > > NULL, > > + "can't encap and decap in same > > flow"); > > + if (attr->egress) > > + return rte_flow_error_set(error, ENOTSUP, > > + > > RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, > > + NULL, > > + "decap action not supported for " > > + "egress"); > > Why not EINVAL? > > > + return 0; > > +} > > + > > +static size_t item_len[] = { > > + [RTE_FLOW_ITEM_TYPE_VOID] = 0, > > + [RTE_FLOW_ITEM_TYPE_ETH] = sizeof(struct rte_flow_item_eth), > > + [RTE_FLOW_ITEM_TYPE_VLAN] = sizeof(struct rte_flow_item_vlan), > > + [RTE_FLOW_ITEM_TYPE_IPV4] = sizeof(struct rte_flow_item_ipv4), > > + [RTE_FLOW_ITEM_TYPE_IPV6] = sizeof(struct rte_flow_item_ipv6), > > + [RTE_FLOW_ITEM_TYPE_UDP] = sizeof(struct rte_flow_item_udp), > > + [RTE_FLOW_ITEM_TYPE_TCP] = sizeof(struct rte_flow_item_tcp), > > + [RTE_FLOW_ITEM_TYPE_VXLAN] = sizeof(struct > rte_flow_item_vxlan), > > + [RTE_FLOW_ITEM_TYPE_GRE] = sizeof(struct rte_flow_item_gre), > > + [RTE_FLOW_ITEM_TYPE_NVGRE] = sizeof(struct > rte_flow_item_gre), > > + [RTE_FLOW_ITEM_TYPE_VXLAN_GPE] = sizeof(struct > > rte_flow_item_vxlan_gpe), > > + [RTE_FLOW_ITEM_TYPE_MPLS] = sizeof(struct rte_flow_item_mpls), > }; > > + > > +/** > > + * Convert the encap action data from rte_flow_item to raw buffer > > + * > > + * @param[in] item > > + * Pointer to rte_flow_item object. > > + * @param[out] buf > > + * Pointer to the output buffer. > > + * @param[out] size > > + * Pointer to the output buffer size. > > + * @param[out] error > > + * Pointer to the error structure. > > + * @param[in] l3_type > > + * ???. > > What is ??? > > > + * > > + * @return > > + * 0 on success, a negative errno value otherwise and rte_errno is set. > > + */ > > +static int > > +flow_dv_convert_encap(struct rte_flow_item *item, uint8_t *buf, size_t > *size, > > + struct rte_flow_error *error, uint16_t l3_type) > > What is the l3_type used for? > > > +{ > > + struct ether_hdr *eth = NULL; > > + struct vlan_hdr *vlan = NULL; > > + struct ipv4_hdr *ipv4 = NULL; > > + struct ipv6_hdr *ipv6 = NULL; > > + struct udp_hdr *udp = NULL; > > + struct vxlan_hdr *vxlan = NULL; > > + const struct rte_flow_item_vlan *vlan_spec; > > Why vlan has dedicated variable? > > > + size_t len; > > + > > + assert(item); > > Why assert on the item? It should be valid and if not return error. > > > + *size = 0; > > Why not use temp_size and avoid memory access? > > > + for (; item->type != RTE_FLOW_ITEM_TYPE_END; item++) { > > + /* TODO: variable length handling: raw, vxlan and nvgre. */ > > Why TODO? > > > + len = item_len[item->type]; > > This can result in crash if type is not the one you expected. > > > + if (len + *size > MLX5_ENCAP_LEN) > > + return rte_flow_error_set(error, EINVAL, > > + > > RTE_FLOW_ERROR_TYPE_ACTION, > > + (void *)item->type, > > + "invalid item length"); > > + rte_memcpy((void *)&buf[*size], item->spec, len); > > + switch (item->type) { > > + case RTE_FLOW_ITEM_TYPE_ETH: > > + eth = (void *)&buf[*size]; > > Why cast to void? Same for all others. > > > + break; > > + case RTE_FLOW_ITEM_TYPE_VLAN: > > + vlan_spec = item->spec; > > + vlan = (void *)&buf[*size]; > > + if (!eth) > > + return rte_flow_error_set(error, EINVAL, > > + > > RTE_FLOW_ERROR_TYPE_ACTION, > > + (void *)item->type, > > + "eth header not found"); > > + vlan->vlan_tci = vlan_spec->tci; > > + vlan->eth_proto = vlan_spec->inner_type; > > + if (!eth->ether_type) > > + eth->ether_type = > htons(ETHER_TYPE_VLAN); > > Why not use rte_cpu_to_be? Same for all. > > > + break; > > + case RTE_FLOW_ITEM_TYPE_IPV4: > > + ipv4 = (void *)&buf[*size]; > > + if (!vlan && !eth) > > + return rte_flow_error_set(error, EINVAL, > > + > > RTE_FLOW_ERROR_TYPE_ACTION, > > + (void *)item->type, > > + "neither eth nor vlan header > > found"); > > + if (vlan && !vlan->eth_proto) > > + vlan->eth_proto = htons(ETHER_TYPE_IPv4); > > + else if (eth && !eth->ether_type) > > + eth->ether_type = htons(ETHER_TYPE_IPv4); > > + if (!ipv4->version_ihl) > > + ipv4->version_ihl = 0x45; > > + if (!ipv4->time_to_live) > > + ipv4->time_to_live = 0x40; > > + break; > > + case RTE_FLOW_ITEM_TYPE_IPV6: > > + ipv6 = (void *)&buf[*size]; > > + if (!vlan && !eth) > > + return rte_flow_error_set(error, EINVAL, > > + > > RTE_FLOW_ERROR_TYPE_ACTION, > > + (void *)item->type, > > + "neither eth nor vlan header > > found"); > > + if (vlan && !vlan->eth_proto) > > + vlan->eth_proto = htons(ETHER_TYPE_IPv6); > > + else if (eth && !eth->ether_type) > > + eth->ether_type = htons(ETHER_TYPE_IPv6); > > + if (!ipv6->vtc_flow) > > + ipv6->vtc_flow = htonl(0x60000000); > > + if (!ipv6->hop_limits) > > + ipv6->hop_limits = 0xff; > > + break; > > + case RTE_FLOW_ITEM_TYPE_UDP: > > + udp = (void *)&buf[*size]; > > + if (!ipv4 && !ipv6) > > + return rte_flow_error_set(error, EINVAL, > > + > > RTE_FLOW_ERROR_TYPE_ACTION, > > + (void *)item->type, > > + "ip header not found"); > > + if (ipv4 && !ipv4->next_proto_id) > > + ipv4->next_proto_id = MLX5_UDP; > > + else if (ipv6 && !ipv6->proto) > > + ipv6->proto = MLX5_UDP; > > + break; > > + case RTE_FLOW_ITEM_TYPE_VXLAN: > > + vxlan = (void *)&buf[*size]; > > + if (!udp) > > + return rte_flow_error_set(error, EINVAL, > > + > > RTE_FLOW_ERROR_TYPE_ACTION, > > + (void *)item->type, > > + "udp header not found"); > > + if (!udp->dst_port) > > + udp->dst_port = > > htons(MLX5_UDP_PORT_VXLAN); > > + if (!vxlan->vx_flags) > > + vxlan->vx_flags = htonl(0x08000000); > > + break; > > + case RTE_FLOW_ITEM_TYPE_VXLAN_GPE: > > + vxlan = (void *)&buf[*size]; > > + if (!udp) > > + return rte_flow_error_set(error, EINVAL, > > + > > RTE_FLOW_ERROR_TYPE_ACTION, > > + (void *)item->type, > > + "udp header not found"); > > + if (!udp->dst_port) > > + udp->dst_port = > > htons(MLX5_UDP_PORT_VXLAN_GPE); > > + if (!vxlan->vx_flags) > > + vxlan->vx_flags = htonl(0x0c000003); > > + break; > > + case RTE_FLOW_ITEM_TYPE_GRE: > > + case RTE_FLOW_ITEM_TYPE_NVGRE: > > + if (!ipv4 && !ipv6) > > + return rte_flow_error_set(error, EINVAL, > > + > > RTE_FLOW_ERROR_TYPE_ACTION, > > + (void *)item->type, > > + "ip header not found"); > > + if (ipv4 && !ipv4->next_proto_id) > > + ipv4->next_proto_id = htons(MLX5_GRE); > > + else if (ipv6 && !ipv6->proto) > > + ipv6->proto = htons(MLX5_GRE); > > + break; > > + case RTE_FLOW_ITEM_TYPE_VOID: > > + break; > > + default: > > + return rte_flow_error_set(error, EINVAL, > > + RTE_FLOW_ERROR_TYPE_ACTION, > > + (void *)item->type, > > + "unsupported item type"); > > + break; > > + } > > + *size += len; > > + } > > + if (l3_type && vlan) > > + vlan->eth_proto = htons(l3_type); > > + else if (l3_type && eth) > > + eth->ether_type = htons(l3_type); > > + return 0; > > +} > > + > > +/** > > + * Convert VXLAN encap action to DV specification. > > + * > > + * @param[in] dev > > + * Pointer to rte_eth_dev structure. > > + * @param[in] action > > + * Pointer to action structure. > > + * @param[out] error > > + * Pointer to the error structure. > > + * > > + * @return > > + * Pointer to action on success, NULL otherwise and rte_errno is set. > > + */ > > +static struct ibv_flow_action * > > +flow_dv_create_vxlan_encap(struct rte_eth_dev *dev, > > + const struct rte_flow_action *action, > > + struct rte_flow_error *error) > > +{ > > + struct ibv_flow_action *encap_verb = NULL; > > + const struct rte_flow_action_vxlan_encap *encap_data; > > + struct priv *priv = dev->data->dev_private; > > + uint8_t buf[MLX5_ENCAP_LEN]; > > + size_t size = 0; > > + int convert_result; > > + > > + encap_data = (const struct rte_flow_action_vxlan_encap *)action- > > >conf; > > + convert_result = flow_dv_convert_encap(encap_data->definition, > > + buf, &size, error, 0); > > + if (convert_result) > > + return NULL; > > + encap_verb = mlx5_glue->dv_create_flow_action_packet_reformat > > + (priv->ctx, size, (size ? buf : NULL), > > + > > MLX5DV_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL, > > + MLX5DV_FLOW_TABLE_TYPE_NIC_TX); > > + if (!encap_verb) > > + rte_flow_error_set(error, EINVAL, > > RTE_FLOW_ERROR_TYPE_ACTION, > > + NULL, "cannot create vxlan encap action"); > > + return encap_verb; > > +} > > + > > +/** > > + * Convert VXLAN decap action to DV specification. > > + * > > + * @param[in] dev > > + * Pointer to rte_eth_dev structure. > > + * @param[in] action > > + * Pointer to action structure. > > + * @param[out] error > > + * Pointer to the error structure. > > + * > > + * @return > > + * Pointer to action on success, NULL otherwise and rte_errno is set. > > + */ > > +static struct ibv_flow_action * > > +flow_dv_create_vxlan_decap(struct rte_eth_dev *dev, > > + const struct rte_flow_action *action __rte_unused, > > + struct rte_flow_error *error) > > +{ > > + struct ibv_flow_action *decap_verb = NULL; > > + struct priv *priv = dev->data->dev_private; > > + > > + decap_verb = mlx5_glue->dv_create_flow_action_packet_reformat > > + (priv->ctx, 0, NULL, > > + > > MLX5DV_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2, > > + MLX5DV_FLOW_TABLE_TYPE_NIC_RX); > > + if (!decap_verb) > > + rte_flow_error_set(error, EINVAL, > > RTE_FLOW_ERROR_TYPE_ACTION, > > + NULL, "cannot create decap action"); > > + return decap_verb; > > +} > > + > > +/** > > * Verify the @p attributes will be correctly understood by the NIC and > store > > * them in the @p flow if everything is correct. > > * > > @@ -347,6 +682,24 @@ > > action_flags |= MLX5_FLOW_ACTION_COUNT; > > ++actions_n; > > break; > > + case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP: > > + ret = > > flow_dv_validate_action_vxlan_encap(action_flags, > > + actions, attr, > > + error); > > + if (ret < 0) > > + return ret; > > + action_flags |= > MLX5_FLOW_ACTION_VXLAN_ENCAP; > > + ++actions_n; > > + break; > > + case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP: > > + ret = > > flow_dv_validate_action_vxlan_decap(action_flags, > > + actions, attr, > > + error); > > + if (ret < 0) > > + return ret; > > + action_flags |= > MLX5_FLOW_ACTION_VXLAN_DECAP; > > + ++actions_n; > > + break; > > default: > > return rte_flow_error_set(error, ENOTSUP, > > > > RTE_FLOW_ERROR_TYPE_ACTION, > > @@ -1056,14 +1409,23 @@ > > /** > > * Store the requested actions in an array. > > * > > + * @param[in] dev > > + * Pointer to rte_eth_dev structure. > > * @param[in] action > > * Flow action to translate. > > * @param[in, out] dev_flow > > * Pointer to the mlx5_flow. > > + * @param[out] error > > + * Pointer to the error structure. > > + * > > + * @return > > + * 0 on success, a negative errno value otherwise and rte_errno is set. > > */ > > -static void > > -flow_dv_create_action(const struct rte_flow_action *action, > > - struct mlx5_flow *dev_flow) > > +static int > > +flow_dv_create_action(struct rte_eth_dev *dev, > > + const struct rte_flow_action *action, > > + struct mlx5_flow *dev_flow, > > + struct rte_flow_error *error) > > { > > const struct rte_flow_action_queue *queue; > > const struct rte_flow_action_rss *rss; @@ -1110,10 +1472,35 @@ > > /* Added to array only in apply since we need the QP */ > > flow->actions |= MLX5_FLOW_ACTION_RSS; > > break; > > + case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP: > > + dev_flow->dv.actions[actions_n].type = > > + MLX5DV_FLOW_ACTION_IBV_FLOW_ACTION; > > + dev_flow->dv.actions[actions_n].action = > > + flow_dv_create_vxlan_encap(dev, action, > > error); > > + if (!(dev_flow->dv.actions[actions_n].action)) > > + return -rte_errno; > > + dev_flow->dv.encap_verb = > > + dev_flow->dv.actions[actions_n].action; > > + flow->actions |= MLX5_FLOW_ACTION_VXLAN_ENCAP; > > + actions_n++; > > + break; > > + case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP: > > + dev_flow->dv.actions[actions_n].type = > > + MLX5DV_FLOW_ACTION_IBV_FLOW_ACTION; > > + dev_flow->dv.actions[actions_n].action = > > + flow_dv_create_vxlan_decap(dev, action, > > error); > > + if (!(dev_flow->dv.actions[actions_n].action)) > > + return -rte_errno; > > + dev_flow->dv.decap_verb = > > + dev_flow->dv.actions[actions_n].action; > > + flow->actions |= MLX5_FLOW_ACTION_VXLAN_DECAP; > > + actions_n++; > > + break; > > default: > > break; > > } > > dev_flow->dv.actions_n = actions_n; > > + return 0; > > } > > > > static uint32_t matcher_zero[MLX5_ST_SZ_DW(fte_match_param)] = { 0 }; > > @@ -1279,8 +1666,10 @@ > > matcher.egress = attr->egress; > > if (flow_dv_matcher_register(dev, &matcher, dev_flow, error)) > > return -rte_errno; > > - for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) > > - flow_dv_create_action(actions, dev_flow); > > + for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { > > + if (flow_dv_create_action(dev, actions, dev_flow, error)) > > + return -rte_errno; > > + } > > return 0; > > } > > > > @@ -1465,6 +1854,16 @@ > > LIST_REMOVE(dev_flow, next); > > if (dev_flow->dv.matcher) > > flow_dv_matcher_release(dev, dev_flow); > > + if (dev_flow->dv.encap_verb) { > > + claim_zero(mlx5_glue->destroy_flow_action > > + (dev_flow->dv.encap_verb)); > > + dev_flow->dv.encap_verb = NULL; > > + } > > + if (dev_flow->dv.decap_verb) { > > + claim_zero(mlx5_glue->destroy_flow_action > > + (dev_flow->dv.decap_verb)); > > + dev_flow->dv.decap_verb = NULL; > > + } > > rte_free(dev_flow); > > } > > } > > -- > > 1.8.3.1 > > > Thanks, > Ori