> -----Original Message-----
> From: netdev-ow...@vger.kernel.org <netdev-ow...@vger.kernel.org> On
> Behalf Of Saeed Mahameed
> Sent: Tuesday, June 18, 2019 12:53 AM
> To: Saeed Mahameed <sae...@mellanox.com>; Leon Romanovsky
> <leo...@mellanox.com>
> Cc: netdev@vger.kernel.org; linux-r...@vger.kernel.org; Jianbo Liu
> <jian...@mellanox.com>; Eli Britstein <el...@mellanox.com>; Roi Dayan
> <r...@mellanox.com>; Mark Bloch <ma...@mellanox.com>
> Subject: [PATCH mlx5-next 05/15] net/mlx5: E-Switch, Tag packet with vport
> number in VF vports and uplink ingress ACLs
> 
> From: Jianbo Liu <jian...@mellanox.com>
> 
> When a dual-port VHCA sends a RoCE packet on its non-native port, and the
> packet arrives to its affiliated vport FDB, a mismatch might occur on the 
> rules
> that match the packet source vport as it is not represented by single VHCA 
> only
> in this case. So we change to match on metadata instead of source vport.
> To do that, a rule is created in all vports and uplink ingress ACLs, to save 
> the
> source vport number and vhca id in the packet's metadata in order to match on
> it later.
> The metadata register used is the first of the 32-bit type C registers. It 
> can be
> used for matching and header modify operations. The higher 16 bits of this
> register are for vhca id, and the lower 16 ones is for vport number.
> This change is not for dual-port RoCE only. If HW and FW allow, the vport
> metadata matching is enabled by default.
> 
> Signed-off-by: Jianbo Liu <jian...@mellanox.com>
> Reviewed-by: Eli Britstein <el...@mellanox.com>
> Reviewed-by: Roi Dayan <r...@mellanox.com>
> Reviewed-by: Mark Bloch <ma...@mellanox.com>
> Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
> ---
>  .../net/ethernet/mellanox/mlx5/core/eswitch.c |   2 +
>  .../net/ethernet/mellanox/mlx5/core/eswitch.h |   9 +
>  .../mellanox/mlx5/core/eswitch_offloads.c     | 183 ++++++++++++++----
>  include/linux/mlx5/eswitch.h                  |   3 +
>  4 files changed, 161 insertions(+), 36 deletions(-)
> 
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
> b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
> index a42a23e505df..1235fd84ae3a 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
> @@ -1168,6 +1168,8 @@ void esw_vport_cleanup_ingress_rules(struct
> mlx5_eswitch *esw,
> 
>       vport->ingress.drop_rule = NULL;
>       vport->ingress.allow_rule = NULL;
> +
> +     esw_vport_del_ingress_acl_modify_metadata(esw, vport);
>  }
> 
>  void esw_vport_disable_ingress_acl(struct mlx5_eswitch *esw, diff --git
> a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
> b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
> index 8b9f2cf58e91..4417a195832e 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
> @@ -68,6 +68,8 @@ struct vport_ingress {
>       struct mlx5_flow_group *allow_spoofchk_only_grp;
>       struct mlx5_flow_group *allow_untagged_only_grp;
>       struct mlx5_flow_group *drop_grp;
> +     int                      modify_metadata_id;
No need for random alignment. Just have one white space after int.

> +     struct mlx5_flow_handle  *modify_metadata_rule;
>       struct mlx5_flow_handle  *allow_rule;
>       struct mlx5_flow_handle  *drop_rule;
>       struct mlx5_fc           *drop_counter;
> @@ -196,6 +198,10 @@ struct mlx5_esw_functions {
>       u16                     num_vfs;
>  };
> 
> +enum {
> +     MLX5_ESWITCH_VPORT_MATCH_METADATA = BIT(0), };
> +
>  struct mlx5_eswitch {
>       struct mlx5_core_dev    *dev;
>       struct mlx5_nb          nb;
> @@ -203,6 +209,7 @@ struct mlx5_eswitch {
>       struct hlist_head       mc_table[MLX5_L2_ADDR_HASH_SIZE];
>       struct workqueue_struct *work_queue;
>       struct mlx5_vport       *vports;
> +     u32                     flags;
Same as above, no need for extra aligment.

>       int                     total_vports;
>       int                     enabled_vports;
>       /* Synchronize between vport change events @@ -240,6 +247,8 @@
> void esw_vport_disable_egress_acl(struct mlx5_eswitch *esw,
>                                 struct mlx5_vport *vport);
>  void esw_vport_disable_ingress_acl(struct mlx5_eswitch *esw,
>                                  struct mlx5_vport *vport);
> +void esw_vport_del_ingress_acl_modify_metadata(struct mlx5_eswitch *esw,
> +                                            struct mlx5_vport *vport);
> 
>  /* E-Switch API */
>  int mlx5_eswitch_init(struct mlx5_core_dev *dev); diff --git
> a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
> b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
> index 17abb98b48af..871ae44dc132 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
> @@ -1555,32 +1555,16 @@ static void esw_offloads_devcom_cleanup(struct
> mlx5_eswitch *esw)  static int esw_vport_ingress_prio_tag_config(struct
> mlx5_eswitch *esw,
>                                            struct mlx5_vport *vport)
>  {
> -     struct mlx5_core_dev *dev = esw->dev;
>       struct mlx5_flow_act flow_act = {0};
>       struct mlx5_flow_spec *spec;
>       int err = 0;
> 
>       /* For prio tag mode, there is only 1 FTEs:
> -      * 1) Untagged packets - push prio tag VLAN, allow
> +      * 1) Untagged packets - push prio tag VLAN and modify metadata if
> +      * required, allow
>        * Unmatched traffic is allowed by default
>        */
> 
> -     if (!MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support))
> -             return -EOPNOTSUPP;
> -
> -     esw_vport_cleanup_ingress_rules(esw, vport);
> -
> -     err = esw_vport_enable_ingress_acl(esw, vport);
> -     if (err) {
> -             mlx5_core_warn(esw->dev,
> -                            "failed to enable prio tag ingress acl (%d) on
> vport[%d]\n",
> -                            err, vport->vport);
> -             return err;
> -     }
> -
> -     esw_debug(esw->dev,
> -               "vport[%d] configure ingress rules\n", vport->vport);
> -
>       spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
>       if (!spec) {
>               err = -ENOMEM;
> @@ -1596,6 +1580,12 @@ static int esw_vport_ingress_prio_tag_config(struct
> mlx5_eswitch *esw,
>       flow_act.vlan[0].ethtype = ETH_P_8021Q;
>       flow_act.vlan[0].vid = 0;
>       flow_act.vlan[0].prio = 0;
> +
> +     if (vport->ingress.modify_metadata_rule) {
> +             flow_act.action |=
> MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
> +             flow_act.modify_id = vport->ingress.modify_metadata_id;
> +     }
> +
>       vport->ingress.allow_rule =
>               mlx5_add_flow_rules(vport->ingress.acl, spec,
>                                   &flow_act, NULL, 0);
> @@ -1616,6 +1606,59 @@ static int esw_vport_ingress_prio_tag_config(struct
> mlx5_eswitch *esw,
>       return err;
>  }
> 
> +static int esw_vport_add_ingress_acl_modify_metadata(struct mlx5_eswitch
> *esw,
> +                                                  struct mlx5_vport *vport)
> +{
> +     u8 action[MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto)] =
> {};
> +     struct mlx5_flow_act flow_act = {};
> +     struct mlx5_flow_spec spec = {};
> +     int err = 0;
> +
> +     MLX5_SET(set_action_in, action, action_type,
> MLX5_ACTION_TYPE_SET);
> +     MLX5_SET(set_action_in, action, field,
> MLX5_ACTION_IN_FIELD_METADATA_REG_C_0);
> +     MLX5_SET(set_action_in, action, data,
> +              mlx5_eswitch_get_vport_metadata_for_match(esw, vport-
> >vport));
> +
> +     err = mlx5_modify_header_alloc(esw->dev,
> MLX5_FLOW_NAMESPACE_ESW_INGRESS,
> +                                    1, action, &vport-
> >ingress.modify_metadata_id);
> +
> +     if (err) {
> +             esw_warn(esw->dev,
> +                      "failed to alloc modify header for vport %d ingress acl
> (%d)\n",
> +                      vport->vport, err);
> +             return err;
> +     }
> +
> +     flow_act.action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR |
> MLX5_FLOW_CONTEXT_ACTION_ALLOW;
> +     flow_act.modify_id = vport->ingress.modify_metadata_id;
> +     vport->ingress.modify_metadata_rule = mlx5_add_flow_rules(vport-
> >ingress.acl,
> +                                                               &spec,
> &flow_act, NULL, 0);
> +     if (IS_ERR(vport->ingress.modify_metadata_rule)) {
> +             err = PTR_ERR(vport->ingress.modify_metadata_rule);
> +             esw_warn(esw->dev,
> +                      "failed to add setting metadata rule for vport %d
> ingress acl, err(%d)\n",
> +                      vport->vport, err);
> +             vport->ingress.modify_metadata_rule = NULL;
> +             goto out;
> +     }
> +
> +out:
> +     if (err)
> +             mlx5_modify_header_dealloc(esw->dev, vport-
> >ingress.modify_metadata_id);
> +     return err;
> +}
> +
> +void esw_vport_del_ingress_acl_modify_metadata(struct mlx5_eswitch *esw,
> +                                            struct mlx5_vport *vport)
> +{
> +     if (vport->ingress.modify_metadata_rule) {
> +             mlx5_del_flow_rules(vport->ingress.modify_metadata_rule);
> +             mlx5_modify_header_dealloc(esw->dev,
> +vport->ingress.modify_metadata_id);
> +
> +             vport->ingress.modify_metadata_rule = NULL;
> +     }
> +}
> +
>  static int esw_vport_egress_prio_tag_config(struct mlx5_eswitch *esw,
>                                           struct mlx5_vport *vport)
>  {
> @@ -1623,6 +1666,9 @@ static int esw_vport_egress_prio_tag_config(struct
> mlx5_eswitch *esw,
>       struct mlx5_flow_spec *spec;
>       int err = 0;
> 
> +     if (!MLX5_CAP_GEN(esw->dev, prio_tag_required))
> +             return 0;
> +
>       /* For prio tag mode, there is only 1 FTEs:
>        * 1) prio tag packets - pop the prio tag VLAN, allow
>        * Unmatched traffic is allowed by default @@ -1676,27 +1722,77 @@
> static int esw_vport_egress_prio_tag_config(struct mlx5_eswitch *esw,
>       return err;
>  }
> 
> -static int esw_prio_tag_acls_config(struct mlx5_eswitch *esw, int nvports)
> +static int esw_vport_ingress_common_config(struct mlx5_eswitch *esw,
> +                                        struct mlx5_vport *vport)
>  {
> -     struct mlx5_vport *vport = NULL;
> -     int i, j;
>       int err;
> 
> -     mlx5_esw_for_each_vf_vport(esw, i, vport, nvports) {
> +     if (!mlx5_eswitch_vport_match_metadata_enabled(esw) &&
> +         !MLX5_CAP_GEN(esw->dev, prio_tag_required))
> +             return 0;
> +
> +     esw_vport_cleanup_ingress_rules(esw, vport);
> +
> +     err = esw_vport_enable_ingress_acl(esw, vport);
> +     if (err) {
> +             esw_warn(esw->dev,
> +                      "failed to enable ingress acl (%d) on vport[%d]\n",
> +                      err, vport->vport);
> +             return err;
> +     }
> +
> +     esw_debug(esw->dev,
> +               "vport[%d] configure ingress rules\n", vport->vport);
> +
> +     if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
> +             err = esw_vport_add_ingress_acl_modify_metadata(esw,
> vport);
> +             if (err)
> +                     goto out;
> +     }
> +
> +     if (MLX5_CAP_GEN(esw->dev, prio_tag_required) &&
> +         (vport->vport >= MLX5_VPORT_FIRST_VF &&
> +          vport->vport <= esw->dev->priv.sriov.num_vfs)) {
>               err = esw_vport_ingress_prio_tag_config(esw, vport);
>               if (err)
> -                     goto err_ingress;
> -             err = esw_vport_egress_prio_tag_config(esw, vport);
> +                     goto out;
> +     }
> +
> +out:
> +     if (err)
> +             esw_vport_disable_ingress_acl(esw, vport);
> +     return err;
> +}
> +
> +static int esw_create_offloads_acl_tables(struct mlx5_eswitch *esw) {
> +     struct mlx5_vport *vport;
> +     int i, j;
> +     int err;
> +
> +     mlx5_esw_for_all_vports(esw, i, vport) {
> +             err = esw_vport_ingress_common_config(esw, vport);
>               if (err)
> -                     goto err_egress;
> +                     goto err_ingress;
> +
> +             if (vport->vport >= MLX5_VPORT_FIRST_VF &&
> +                 vport->vport <= esw->dev->priv.sriov.num_vfs) {
Add an helper API mlx5_esw_is_vport(const struct mlx5_esw *esw, const struct 
mlx5_vport *vport) 
and use at two places in ingress and egress config.

> +                     err = esw_vport_egress_prio_tag_config(esw, vport);
> +                     if (err)
> +                             goto err_egress;
> +             }
>       }
> 
> +     if (mlx5_eswitch_vport_match_metadata_enabled(esw))
> +             esw_info(esw->dev, "Use metadata reg_c as source vport to
> match\n");
> +
>       return 0;
> 
>  err_egress:
>       esw_vport_disable_ingress_acl(esw, vport);
>  err_ingress:
> -     mlx5_esw_for_each_vf_vport_reverse(esw, j, vport, i - 1) {
> +     for (j = MLX5_VPORT_PF; j < i; j++) {
Keep the reverse order as before.

> +             vport = &esw->vports[j];
>               esw_vport_disable_egress_acl(esw, vport);
>               esw_vport_disable_ingress_acl(esw, vport);
>       }
> @@ -1704,15 +1800,17 @@ static int esw_prio_tag_acls_config(struct
> mlx5_eswitch *esw, int nvports)
>       return err;
>  }
> 
> -static void esw_prio_tag_acls_cleanup(struct mlx5_eswitch *esw)
> +static void esw_destroy_offloads_acl_tables(struct mlx5_eswitch *esw)
>  {
>       struct mlx5_vport *vport;
>       int i;
> 
> -     mlx5_esw_for_each_vf_vport(esw, i, vport, esw->nvports) {
> +     mlx5_esw_for_all_vports(esw, i, vport) {
If you are changing this, please do in reverse order to keep it exact mirror of 
create/enable sequence.

>               esw_vport_disable_egress_acl(esw, vport);
>               esw_vport_disable_ingress_acl(esw, vport);
>       }
> +
> +     esw->flags &= ~MLX5_ESWITCH_VPORT_MATCH_METADATA;
>  }
> 
>  static int esw_offloads_steering_init(struct mlx5_eswitch *esw, int nvports)
> @@ -1722,15 +1820,13 @@ static int esw_offloads_steering_init(struct
> mlx5_eswitch *esw, int nvports)
>       memset(&esw->fdb_table.offloads, 0, sizeof(struct offloads_fdb));
>       mutex_init(&esw->fdb_table.offloads.fdb_prio_lock);
> 
> -     if (MLX5_CAP_GEN(esw->dev, prio_tag_required)) {
> -             err = esw_prio_tag_acls_config(esw, nvports);
> -             if (err)
> -                     return err;
> -     }
> +     err = esw_create_offloads_acl_tables(esw);
> +     if (err)
> +             return err;
> 
>       err = esw_create_offloads_fdb_tables(esw, nvports);
>       if (err)
> -             return err;
> +             goto create_fdb_err;
> 
>       err = esw_create_offloads_table(esw, nvports);
>       if (err)
> @@ -1748,6 +1844,9 @@ static int esw_offloads_steering_init(struct
> mlx5_eswitch *esw, int nvports)
>  create_ft_err:
>       esw_destroy_offloads_fdb_tables(esw);
> 
> +create_fdb_err:
> +     esw_destroy_offloads_acl_tables(esw);
> +
>       return err;
>  }
> 
> @@ -1756,8 +1855,7 @@ static void esw_offloads_steering_cleanup(struct
> mlx5_eswitch *esw)
>       esw_destroy_vport_rx_group(esw);
>       esw_destroy_offloads_table(esw);
>       esw_destroy_offloads_fdb_tables(esw);
> -     if (MLX5_CAP_GEN(esw->dev, prio_tag_required))
> -             esw_prio_tag_acls_cleanup(esw);
> +     esw_destroy_offloads_acl_tables(esw);
>  }
> 
>  static void esw_functions_changed_event_handler(struct work_struct *work)
> @@ -2290,3 +2388,16 @@ struct mlx5_eswitch_rep
> *mlx5_eswitch_vport_rep(struct mlx5_eswitch *esw,
>       return mlx5_eswitch_get_rep(esw, vport);  }
> EXPORT_SYMBOL(mlx5_eswitch_vport_rep);
> +
> +u32 mlx5_eswitch_vport_match_metadata_enabled(struct mlx5_eswitch
> *esw)
> +{
> +     return esw->flags & MLX5_ESWITCH_VPORT_MATCH_METADATA;
> +}
> +EXPORT_SYMBOL(mlx5_eswitch_vport_match_metadata_enabled);
> +
Return type should book and const *esw.

> +u32 mlx5_eswitch_get_vport_metadata_for_match(struct mlx5_eswitch *esw,
> +                                           u16 vport)
> +{
> +     return ((MLX5_CAP_GEN(esw->dev, vhca_id) & 0xffff) << 16) | vport; }
> +EXPORT_SYMBOL(mlx5_eswitch_get_vport_metadata_for_match);
This one too.

> diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h index
> 174eec0871d9..d729f5e4d70a 100644
> --- a/include/linux/mlx5/eswitch.h
> +++ b/include/linux/mlx5/eswitch.h
> @@ -64,6 +64,9 @@ struct mlx5_flow_handle *
> mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw,
>                                   int vport, u32 sqn);
> 
> +u32 mlx5_eswitch_vport_match_metadata_enabled(struct mlx5_eswitch
> +*esw);
> +u32 mlx5_eswitch_get_vport_metadata_for_match(struct mlx5_eswitch *esw,
> +u16 vport);
> +
>  #ifdef CONFIG_MLX5_ESWITCH
>  enum devlink_eswitch_encap_mode
>  mlx5_eswitch_get_encap_mode(const struct mlx5_core_dev *dev);
> --
> 2.21.0

Reply via email to