[PATCH V1 00/11] Support resource sharing among ibv_devices

2022-12-19 Thread Erez Shitrit
Add the option to use resources (Tables, Matchers, Actions, etc.)
from one gvmi (AKA ibv_cntext) to other gvmi's.

When specific gvmi allows other to use its resources, steering objects
Will created and used on that gvmi.

It is done by aliases objects that map between local resources to
shared/remote resourses.

That in order to allow sharing between few ports.

Erez Shitrit (6):
  mailmap: add new contributors to the list
  net/mlx5/hws: add capabilities fields for vhca access
  net/mlx5/hws: add vhca identifier ID to the caps
  net/mlx5/hws: support shared ibv-context with local one
  net/mlx5/hws: support actions while shared resources is used
  net/mlx5/hws: add debug details for cross gvmi

Yevgeny Kliteynik (5):
  net/mlx5/hws: remove wrong PRM capability macros
  net/mlx5/hws: add PRM definitions for cross-vhca capabilities
  net/mlx5/hws: read cross-vhca capabilities
  net/mlx5/hws: added allow-other-vhca-access command
  net/mlx5/hws: added command to create alias objects

 .mailmap   |   1 +
 drivers/common/mlx5/mlx5_prm.h |  83 --
 drivers/net/mlx5/hws/mlx5dr.h  |   7 +-
 drivers/net/mlx5/hws/mlx5dr_action.c   |  69 +++-
 drivers/net/mlx5/hws/mlx5dr_action.h   |   3 +
 drivers/net/mlx5/hws/mlx5dr_cmd.c  | 118 -
 drivers/net/mlx5/hws/mlx5dr_cmd.h  |  25 +++
 drivers/net/mlx5/hws/mlx5dr_context.c  |  34 +++-
 drivers/net/mlx5/hws/mlx5dr_context.h  |  22 +++
 drivers/net/mlx5/hws/mlx5dr_debug.c|  24 ++-
 drivers/net/mlx5/hws/mlx5dr_internal.h |   1 +
 drivers/net/mlx5/hws/mlx5dr_matcher.c  | 218 +++--
 drivers/net/mlx5/hws/mlx5dr_matcher.h  |   9 +
 drivers/net/mlx5/hws/mlx5dr_table.c| 191 +-
 drivers/net/mlx5/hws/mlx5dr_table.h|   4 +-
 drivers/net/mlx5/mlx5_devx.c   |   2 +-
 16 files changed, 750 insertions(+), 61 deletions(-)

-- 
2.18.2



[PATCH V1 01/11] mailmap: add new contributors to the list

2022-12-19 Thread Erez Shitrit


Added Yevgeny Kliteynik to the list

Signed-off-by: Erez Shitrit 
---
 .mailmap | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.mailmap b/.mailmap
index 75884b6fe2..c6f34d18a6 100644
--- a/.mailmap
+++ b/.mailmap
@@ -1495,6 +1495,7 @@ Yash Sharma 
 Yasufumi Ogawa  
 Yelena Krivosheev 
 Yerden Zhumabekov  
+Yevgeny Kliteynik 
 Yicai Lu 
 Yiding Zhou 
 Yi Li 
-- 
2.18.2



[PATCH V1 02/11] net/mlx5/hws: add capabilities fields for vhca access

2022-12-19 Thread Erez Shitrit


The new fields define the ability to access from one vhca to other one.

Signed-off-by: Erez Shitrit 
Reviewed-by: Alex Vesker 
---
 drivers/common/mlx5/mlx5_prm.h | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/common/mlx5/mlx5_prm.h b/drivers/common/mlx5/mlx5_prm.h
index 2b5c43ee6e..19c00ad913 100644
--- a/drivers/common/mlx5/mlx5_prm.h
+++ b/drivers/common/mlx5/mlx5_prm.h
@@ -2115,8 +2115,10 @@ struct mlx5_ifc_cmd_hca_cap_2_bits {
u8 log_conn_track_max_alloc[0x5];
u8 reserved_at_d8[0x3];
u8 log_max_conn_track_offload[0x5];
-   u8 reserved_at_e0[0x20]; /* End of DW7. */
-   u8 reserved_at_100[0x60];
+   u8 cross_vhca_object_to_object_supported[0x20]; /* End of DW7. */
+   u8 allowed_object_for_other_vhca_access_high[0x20];
+   u8 allowed_object_for_other_vhca_access[0x20];
+   u8 reserved_at_140[0x20];
u8 reserved_at_160[0x3];
u8 hairpin_sq_wqe_bb_size[0x5];
u8 hairpin_sq_wq_in_host_mem[0x1];
-- 
2.18.2



[PATCH V1 03/11] net/mlx5/hws: remove wrong PRM capability macros

2022-12-19 Thread Erez Shitrit
From: Yevgeny Kliteynik 


Removing macros for wrong capability checks.

Signed-off-by: Yevgeny Kliteynik 
Reviewed-by: Alex Vesker 
---
 drivers/common/mlx5/mlx5_prm.h | 6 --
 1 file changed, 6 deletions(-)

diff --git a/drivers/common/mlx5/mlx5_prm.h b/drivers/common/mlx5/mlx5_prm.h
index 19c00ad913..3d9d69d9cf 100644
--- a/drivers/common/mlx5/mlx5_prm.h
+++ b/drivers/common/mlx5/mlx5_prm.h
@@ -1332,12 +1332,6 @@ enum {
(1ULL << MLX5_GENERAL_OBJ_TYPE_GENEVE_TLV_OPT)
 #define MLX5_GENERAL_OBJ_TYPES_CAP_CONN_TRACK_OFFLOAD \
(1ULL << MLX5_GENERAL_OBJ_TYPE_CONN_TRACK_OFFLOAD)
-#define MLX5_GENERAL_OBJ_TYPES_CAP_RTC \
-   (1ULL << MLX5_GENERAL_OBJ_TYPE_RTC)
-#define MLX5_GENERAL_OBJ_TYPES_CAP_STC \
-   (1ULL << MLX5_GENERAL_OBJ_TYPE_STC)
-#define MLX5_GENERAL_OBJ_TYPES_CAP_STE \
-   (1ULL << MLX5_GENERAL_OBJ_TYPE_STE)
 #define MLX5_GENERAL_OBJ_TYPES_CAP_DEFINER \
(1ULL << MLX5_GENERAL_OBJ_TYPE_DEFINER)
 #define MLX5_GENERAL_OBJ_TYPES_CAP_DEK \
-- 
2.18.2



[PATCH V1 05/11] net/mlx5/hws: read cross-vhca capabilities

2022-12-19 Thread Erez Shitrit
From: Yevgeny Kliteynik 


And keep them for future processing.

Signed-off-by: Yevgeny Kliteynik 
Reviewed-by: Alex Vesker 
---
 drivers/net/mlx5/hws/mlx5dr_cmd.c | 24 
 drivers/net/mlx5/hws/mlx5dr_cmd.h |  1 +
 2 files changed, 25 insertions(+)

diff --git a/drivers/net/mlx5/hws/mlx5dr_cmd.c 
b/drivers/net/mlx5/hws/mlx5dr_cmd.c
index 721376b8da..2156fd6643 100644
--- a/drivers/net/mlx5/hws/mlx5dr_cmd.c
+++ b/drivers/net/mlx5/hws/mlx5dr_cmd.c
@@ -723,6 +723,7 @@ int mlx5dr_cmd_query_caps(struct ibv_context *ctx,
uint32_t in[MLX5_ST_SZ_DW(query_hca_cap_in)] = {0};
const struct flow_hw_port_info *port_info;
struct ibv_device_attr_ex attr_ex;
+   u32 res;
int ret;
 
MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
@@ -798,6 +799,23 @@ int mlx5dr_cmd_query_caps(struct ibv_context *ctx,
 capability.cmd_hca_cap_2.
 
format_select_dw_gtpu_first_ext_dw_0);
 
+   /* check cross-VHCA support in cap2 */
+   res =
+   MLX5_GET(query_hca_cap_out, out,
+   capability.cmd_hca_cap_2.cross_vhca_object_to_object_supported);
+
+   caps->cross_vhca_resources = (res & 
MLX5_CROSS_VHCA_OBJ_TO_OBJ_TYPE_STC_TO_TIR) &&
+(res & 
MLX5_CROSS_VHCA_OBJ_TO_OBJ_TYPE_STC_TO_FT) &&
+(res & 
MLX5_CROSS_VHCA_OBJ_TO_OBJ_TYPE_FT_TO_RTC);
+
+   res =
+   MLX5_GET(query_hca_cap_out, out,
+   capability.cmd_hca_cap_2.allowed_object_for_other_vhca_access);
+
+   caps->cross_vhca_resources &= (res & MLX5_CROSS_VHCA_ALLOWED_OBJS_TIR) 
&&
+ (res & MLX5_CROSS_VHCA_ALLOWED_OBJS_FT) &&
+ (res & MLX5_CROSS_VHCA_ALLOWED_OBJS_RTC);
+
MLX5_SET(query_hca_cap_in, in, op_mod,
 MLX5_GET_HCA_CAP_OP_MOD_NIC_FLOW_TABLE |
 MLX5_HCA_CAP_OPMOD_GET_CUR);
@@ -817,6 +835,12 @@ int mlx5dr_cmd_query_caps(struct ibv_context *ctx,
capability.flow_table_nic_cap.

flow_table_properties_nic_receive.reparse);
 
+   /* check cross-VHCA support in flow table properties */
+   res =
+   MLX5_GET(query_hca_cap_out, out,
+   
capability.flow_table_nic_cap.flow_table_properties_nic_receive.cross_vhca_object);
+   caps->cross_vhca_resources &= res;
+
if (caps->wqe_based_update) {
MLX5_SET(query_hca_cap_in, in, op_mod,
 MLX5_GET_HCA_CAP_OP_MOD_WQE_BASED_FLOW_TABLE |
diff --git a/drivers/net/mlx5/hws/mlx5dr_cmd.h 
b/drivers/net/mlx5/hws/mlx5dr_cmd.h
index 2b3b47f473..ab61e27fd8 100644
--- a/drivers/net/mlx5/hws/mlx5dr_cmd.h
+++ b/drivers/net/mlx5/hws/mlx5dr_cmd.h
@@ -163,6 +163,7 @@ struct mlx5dr_cmd_query_caps {
uint8_t sq_ts_format;
uint64_t definer_format_sup;
uint32_t trivial_match_definer;
+   bool cross_vhca_resources;
char fw_ver[64];
 };
 
-- 
2.18.2



[PATCH V1 04/11] net/mlx5/hws: add PRM definitions for cross-vhca capabilities

2022-12-19 Thread Erez Shitrit
From: Yevgeny Kliteynik 


Each new cap was defined.

Signed-off-by: Yevgeny Kliteynik 
Reviewed-by: Alex Vesker 
---
 drivers/common/mlx5/mlx5_prm.h | 19 ++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/drivers/common/mlx5/mlx5_prm.h b/drivers/common/mlx5/mlx5_prm.h
index 3d9d69d9cf..dfa25c2b49 100644
--- a/drivers/common/mlx5/mlx5_prm.h
+++ b/drivers/common/mlx5/mlx5_prm.h
@@ -1930,7 +1930,9 @@ struct mlx5_ifc_flow_table_prop_layout_bits {
u8 metadata_reg_a_width[0x8];
u8 reserved_at_60[0xa];
u8 reparse[0x1];
-   u8 reserved_at_6b[0xd];
+   u8 reserved_at_6b[0x1];
+   u8 cross_vhca_object[0x1];
+   u8 reserved_at_6d[0xb];
u8 log_max_ft_num[0x8];
u8 reserved_at_80[0x10];
u8 log_max_flow_counter[0x8];
@@ -2084,6 +2086,19 @@ struct mlx5_ifc_flow_table_esw_cap_bits {
u8 reserved_at_C00[0x7400];
 };
 
+enum mlx5_ifc_cross_vhca_object_to_object_supported_types {
+   MLX5_CROSS_VHCA_OBJ_TO_OBJ_TYPE_STC_TO_TIR = 1 << 10,
+   MLX5_CROSS_VHCA_OBJ_TO_OBJ_TYPE_STC_TO_FT = 1 << 11,
+   MLX5_CROSS_VHCA_OBJ_TO_OBJ_TYPE_FT_TO_FT = 1 << 12,
+   MLX5_CROSS_VHCA_OBJ_TO_OBJ_TYPE_FT_TO_RTC = 1 << 13,
+};
+
+enum mlx5_ifc_cross_vhca_allowed_objects_types {
+   MLX5_CROSS_VHCA_ALLOWED_OBJS_TIR = 1 << 0x8,
+   MLX5_CROSS_VHCA_ALLOWED_OBJS_FT = 1 << 0x9,
+   MLX5_CROSS_VHCA_ALLOWED_OBJS_RTC = 1 << 0xa,
+};
+
 /*
  *  HCA Capabilities 2
  */
@@ -3108,6 +3123,8 @@ enum {
MLX5_GENERAL_OBJ_TYPE_RTC = 0x0041,
MLX5_GENERAL_OBJ_TYPE_STE = 0x0042,
MLX5_GENERAL_OBJ_TYPE_MODIFY_HEADER_PATTERN = 0x0043,
+   MLX5_GENERAL_OBJ_TYPE_FT_ALIAS = 0xff15,
+   MLX5_GENERAL_OBJ_TYPE_TIR_ALIAS = 0xff16,
 };
 
 struct mlx5_ifc_general_obj_in_cmd_hdr_bits {
-- 
2.18.2



[PATCH V1 06/11] net/mlx5/hws: added allow-other-vhca-access command

2022-12-19 Thread Erez Shitrit
From: Yevgeny Kliteynik 


Added FW command to allow creation of alias objects.

Signed-off-by: Yevgeny Kliteynik 
Reviewed-by: Alex Vesker 
---
 drivers/common/mlx5/mlx5_prm.h| 23 +++
 drivers/net/mlx5/hws/mlx5dr_cmd.c | 28 
 drivers/net/mlx5/hws/mlx5dr_cmd.h |  9 +
 3 files changed, 60 insertions(+)

diff --git a/drivers/common/mlx5/mlx5_prm.h b/drivers/common/mlx5/mlx5_prm.h
index dfa25c2b49..9d36645949 100644
--- a/drivers/common/mlx5/mlx5_prm.h
+++ b/drivers/common/mlx5/mlx5_prm.h
@@ -1133,6 +1133,7 @@ enum {
MLX5_CMD_SET_REGEX_REGISTERS = 0xb06,
MLX5_CMD_QUERY_REGEX_REGISTERS = 0xb07,
MLX5_CMD_OP_ACCESS_REGISTER_USER = 0xb0c,
+   MLX5_CMD_OP_ALLOW_OTHER_VHCA_ACCESS = 0xb16,
 };
 
 enum {
@@ -3150,6 +3151,28 @@ struct mlx5_ifc_general_obj_out_cmd_hdr_bits {
u8 reserved_at_60[0x20];
 };
 
+struct mlx5_ifc_allow_other_vhca_access_in_bits {
+   u8 opcode[0x10];
+   u8 uid[0x10];
+   u8 reserved_at_20[0x10];
+   u8 op_mod[0x10];
+   u8 reserved_at_40[0x50];
+   u8 object_type_to_be_accessed[0x10];
+   u8 object_id_to_be_accessed[0x20];
+   u8 reserved_at_c0[0x40];
+   union {
+   u8 access_key_raw[0x100];
+   u8 access_key[8][0x20];
+   };
+};
+
+struct mlx5_ifc_allow_other_vhca_access_out_bits {
+   u8 status[0x8];
+   u8 reserved_at_8[0x18];
+   u8 syndrome[0x20];
+   u8 reserved_at_40[0x40];
+};
+
 struct mlx5_ifc_virtio_q_counters_bits {
u8 modify_field_select[0x40];
u8 reserved_at_40[0x40];
diff --git a/drivers/net/mlx5/hws/mlx5dr_cmd.c 
b/drivers/net/mlx5/hws/mlx5dr_cmd.c
index 2156fd6643..b120be2d88 100644
--- a/drivers/net/mlx5/hws/mlx5dr_cmd.c
+++ b/drivers/net/mlx5/hws/mlx5dr_cmd.c
@@ -716,6 +716,34 @@ int mlx5dr_cmd_sq_modify_rdy(struct mlx5dr_devx_obj 
*devx_obj)
return ret;
 }
 
+int mlx5dr_cmd_allow_other_vhca_access(struct ibv_context *ctx,
+  struct 
mlx5dr_cmd_allow_other_vhca_access_attr *attr)
+{
+   uint32_t out[MLX5_ST_SZ_DW(allow_other_vhca_access_out)] = {0};
+   uint32_t in[MLX5_ST_SZ_DW(allow_other_vhca_access_in)] = {0};
+   void *key;
+   int ret;
+
+   MLX5_SET(allow_other_vhca_access_in,
+in, opcode, MLX5_CMD_OP_ALLOW_OTHER_VHCA_ACCESS);
+   MLX5_SET(allow_other_vhca_access_in,
+in, object_type_to_be_accessed, attr->obj_type);
+   MLX5_SET(allow_other_vhca_access_in,
+in, object_id_to_be_accessed, attr->obj_id);
+
+   key = MLX5_ADDR_OF(allow_other_vhca_access_in, in, access_key);
+   memcpy(key, attr->access_key, sizeof(attr->access_key));
+
+   ret = mlx5_glue->devx_general_cmd(ctx, in, sizeof(in), out, 
sizeof(out));
+   if (ret) {
+   DR_LOG(ERR, "Failed to execute ALLOW_OTHER_VHCA_ACCESS 
command");
+   rte_errno = errno;
+   return rte_errno;
+   }
+
+   return 0;
+}
+
 int mlx5dr_cmd_query_caps(struct ibv_context *ctx,
  struct mlx5dr_cmd_query_caps *caps)
 {
diff --git a/drivers/net/mlx5/hws/mlx5dr_cmd.h 
b/drivers/net/mlx5/hws/mlx5dr_cmd.h
index ab61e27fd8..ea6ced9d27 100644
--- a/drivers/net/mlx5/hws/mlx5dr_cmd.h
+++ b/drivers/net/mlx5/hws/mlx5dr_cmd.h
@@ -124,6 +124,12 @@ struct mlx5dr_cmd_sq_create_attr {
uint32_t ts_format;
 };
 
+struct mlx5dr_cmd_allow_other_vhca_access_attr {
+   uint16_t obj_type;
+   uint32_t obj_id;
+   uint8_t access_key[32];
+};
+
 struct mlx5dr_cmd_query_ft_caps {
uint8_t max_level;
uint8_t reparse;
@@ -230,4 +236,7 @@ void mlx5dr_cmd_set_attr_connect_miss_tbl(struct 
mlx5dr_context *ctx,
  uint32_t fw_ft_type,
  enum mlx5dr_table_type type,
  struct mlx5dr_cmd_ft_modify_attr 
*ft_attr);
+
+int mlx5dr_cmd_allow_other_vhca_access(struct ibv_context *ctx,
+  struct 
mlx5dr_cmd_allow_other_vhca_access_attr *attr);
 #endif /* MLX5DR_CMD_H_ */
-- 
2.18.2



[PATCH V1 11/11] net/mlx5/hws: add debug details for cross gvmi

2022-12-19 Thread Erez Shitrit


For context, table and matcher.

Signed-off-by: Erez Shitrit 
Signed-off-by: Hamdan Igbaria 
Reviewed-by: Alex Vesker 
---
 drivers/net/mlx5/hws/mlx5dr_debug.c | 22 --
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/drivers/net/mlx5/hws/mlx5dr_debug.c 
b/drivers/net/mlx5/hws/mlx5dr_debug.c
index 55011c208d..239dfcae46 100644
--- a/drivers/net/mlx5/hws/mlx5dr_debug.c
+++ b/drivers/net/mlx5/hws/mlx5dr_debug.c
@@ -177,6 +177,7 @@ mlx5dr_debug_dump_matcher_attr(FILE *f, struct 
mlx5dr_matcher *matcher)
 
 static int mlx5dr_debug_dump_matcher(FILE *f, struct mlx5dr_matcher *matcher)
 {
+   bool is_shared = mlx5dr_context_shared_gvmi_used(matcher->tbl->ctx);
bool is_root = matcher->tbl->level == MLX5DR_ROOT_LEVEL;
enum mlx5dr_table_type tbl_type = matcher->tbl->type;
struct mlx5dr_devx_obj *ste_0, *ste_1 = NULL;
@@ -205,11 +206,13 @@ static int mlx5dr_debug_dump_matcher(FILE *f, struct 
mlx5dr_matcher *matcher)
ste_1 = NULL;
}
 
-   ret = fprintf(f, ",%d,%d,%d,%d",
+   ret = fprintf(f, ",%d,%d,%d,%d,%d",
  matcher->match_ste.rtc_0 ? matcher->match_ste.rtc_0->id : 
0,
  ste_0 ? (int)ste_0->id : -1,
  matcher->match_ste.rtc_1 ? matcher->match_ste.rtc_1->id : 
0,
- ste_1 ? (int)ste_1->id : -1);
+ ste_1 ? (int)ste_1->id : -1,
+ is_shared && !is_root ?
+ matcher->match_ste.aliased_rtc_0->id : 0);
if (ret < 0)
goto out_err;
 
@@ -253,18 +256,20 @@ static int mlx5dr_debug_dump_matcher(FILE *f, struct 
mlx5dr_matcher *matcher)
 
 static int mlx5dr_debug_dump_table(FILE *f, struct mlx5dr_table *tbl)
 {
+   bool is_shared = mlx5dr_context_shared_gvmi_used(tbl->ctx);
bool is_root = tbl->level == MLX5DR_ROOT_LEVEL;
struct mlx5dr_matcher *matcher;
int ret;
 
-   ret = fprintf(f, "%d,0x%" PRIx64 ",0x%" PRIx64 ",%d,%d,%d,%d\n",
+   ret = fprintf(f, "%d,0x%" PRIx64 ",0x%" PRIx64 ",%d,%d,%d,%d,%d\n",
  MLX5DR_DEBUG_RES_TYPE_TABLE,
  (uint64_t)(uintptr_t)tbl,
  (uint64_t)(uintptr_t)tbl->ctx,
  is_root ? 0 : tbl->ft->id,
  tbl->type,
  is_root ? 0 : tbl->fw_ft_type,
- tbl->level);
+ tbl->level,
+ is_shared && !is_root ? tbl->local_ft->id : 0);
if (ret < 0) {
rte_errno = EINVAL;
return rte_errno;
@@ -383,12 +388,17 @@ static int mlx5dr_debug_dump_context_attr(FILE *f, struct 
mlx5dr_context *ctx)
 {
int ret;
 
-   ret = fprintf(f, "%u,0x%" PRIx64 ",%d,%zu,%d\n",
+   ret = fprintf(f, "%u,0x%" PRIx64 ",%d,%zu,%d,%s,%d,%d\n",
  MLX5DR_DEBUG_RES_TYPE_CONTEXT_ATTR,
  (uint64_t)(uintptr_t)ctx,
  ctx->pd_num,
  ctx->queues,
- ctx->send_queue->num_entries);
+ ctx->send_queue->num_entries,
+ mlx5dr_context_shared_gvmi_used(ctx) ?
+ mlx5_glue->get_device_name(ctx->ibv_ctx->device) : "None",
+ ctx->caps->vhca_id,
+ mlx5dr_context_shared_gvmi_used(ctx) ?
+ ctx->caps->shared_vhca_id : 0x);
if (ret < 0) {
rte_errno = EINVAL;
return rte_errno;
-- 
2.18.2



[PATCH V1 07/11] net/mlx5/hws: added command to create alias objects

2022-12-19 Thread Erez Shitrit
From: Yevgeny Kliteynik 


Added support for a command that can create alias objects - objects
that are accessed across VHCA.

Signed-off-by: Yevgeny Kliteynik 
Reviewed-by: Alex Vesker 
---
 drivers/common/mlx5/mlx5_prm.h| 23 ++--
 drivers/net/mlx5/hws/mlx5dr_cmd.c | 44 +++
 drivers/net/mlx5/hws/mlx5dr_cmd.h | 11 
 3 files changed, 76 insertions(+), 2 deletions(-)

diff --git a/drivers/common/mlx5/mlx5_prm.h b/drivers/common/mlx5/mlx5_prm.h
index 9d36645949..fd1ad55f1f 100644
--- a/drivers/common/mlx5/mlx5_prm.h
+++ b/drivers/common/mlx5/mlx5_prm.h
@@ -3135,9 +3135,10 @@ struct mlx5_ifc_general_obj_in_cmd_hdr_bits {
u8 obj_id[0x20];
union {
struct {
-   u8 reserved_at_60[0x3];
+   u8 alias_object[0x1];
+   u8 reserved_at_61[0x2];
u8 log_obj_range[0x5];
-   u8 reserved_at_58[0x18];
+   u8 reserved_at_68[0x18];
};
u8 obj_offset[0x20];
};
@@ -3234,6 +3235,19 @@ struct mlx5_ifc_rtc_bits {
u8 reserved_at_180[0x280];
 };
 
+struct mlx5_ifc_alias_context_bits {
+   u8 vhca_id_to_be_accessed[0x10];
+   u8 reserved_at_10[0xd];
+   u8 status[0x3];
+   u8 object_id_to_be_accessed[0x20];
+   u8 reserved_at_40[0x40];
+   union {
+   u8 access_key_raw[0x100];
+   u8 access_key[8][0x20];
+   };
+   u8 metadata[0x80];
+};
+
 enum mlx5_ifc_stc_action_type {
MLX5_IFC_STC_ACTION_TYPE_NOP = 0x00,
MLX5_IFC_STC_ACTION_TYPE_COPY = 0x05,
@@ -3478,6 +3492,11 @@ struct mlx5_ifc_create_header_modify_pattern_in_bits {
struct mlx5_ifc_header_modify_pattern_in_bits pattern;
 };
 
+struct mlx5_ifc_create_alias_obj_in_bits {
+   struct mlx5_ifc_general_obj_in_cmd_hdr_bits hdr;
+   struct mlx5_ifc_alias_context_bits alias_ctx;
+};
+
 enum {
MLX5_CRYPTO_KEY_SIZE_128b = 0x0,
MLX5_CRYPTO_KEY_SIZE_256b = 0x1,
diff --git a/drivers/net/mlx5/hws/mlx5dr_cmd.c 
b/drivers/net/mlx5/hws/mlx5dr_cmd.c
index b120be2d88..9b9f70c933 100644
--- a/drivers/net/mlx5/hws/mlx5dr_cmd.c
+++ b/drivers/net/mlx5/hws/mlx5dr_cmd.c
@@ -744,6 +744,50 @@ int mlx5dr_cmd_allow_other_vhca_access(struct ibv_context 
*ctx,
return 0;
 }
 
+struct mlx5dr_devx_obj *
+mlx5dr_cmd_alias_obj_create(struct ibv_context *ctx,
+   struct mlx5dr_cmd_alias_obj_create_attr *alias_attr)
+{
+   uint32_t out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0};
+   uint32_t in[MLX5_ST_SZ_DW(create_alias_obj_in)] = {0};
+   struct mlx5dr_devx_obj *devx_obj;
+   void *attr;
+   void *key;
+
+   devx_obj = simple_malloc(sizeof(*devx_obj));
+   if (!devx_obj) {
+   DR_LOG(ERR, "Failed to allocate memory for ALIAS general 
object");
+   rte_errno = ENOMEM;
+   return NULL;
+   }
+
+   attr = MLX5_ADDR_OF(create_alias_obj_in, in, hdr);
+   MLX5_SET(general_obj_in_cmd_hdr,
+attr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
+   MLX5_SET(general_obj_in_cmd_hdr,
+attr, obj_type, alias_attr->obj_type);
+   MLX5_SET(general_obj_in_cmd_hdr, attr, alias_object, 1);
+
+   attr = MLX5_ADDR_OF(create_alias_obj_in, in, alias_ctx);
+   MLX5_SET(alias_context, attr, vhca_id_to_be_accessed, 
alias_attr->vhca_id);
+   MLX5_SET(alias_context, attr, object_id_to_be_accessed, 
alias_attr->obj_id);
+
+   key = MLX5_ADDR_OF(alias_context, attr, access_key);
+   memcpy(key, alias_attr->access_key, sizeof(alias_attr->access_key));
+
+   devx_obj->obj = mlx5_glue->devx_obj_create(ctx, in, sizeof(in), out, 
sizeof(out));
+   if (!devx_obj->obj) {
+   DR_LOG(ERR, "Failed to create ALIAS OBJ");
+   simple_free(devx_obj);
+   rte_errno = errno;
+   return NULL;
+   }
+
+   devx_obj->id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
+
+   return devx_obj;
+}
+
 int mlx5dr_cmd_query_caps(struct ibv_context *ctx,
  struct mlx5dr_cmd_query_caps *caps)
 {
diff --git a/drivers/net/mlx5/hws/mlx5dr_cmd.h 
b/drivers/net/mlx5/hws/mlx5dr_cmd.h
index ea6ced9d27..824ca5e846 100644
--- a/drivers/net/mlx5/hws/mlx5dr_cmd.h
+++ b/drivers/net/mlx5/hws/mlx5dr_cmd.h
@@ -46,6 +46,13 @@ struct mlx5dr_cmd_rtc_create_attr {
bool is_jumbo;
 };
 
+struct mlx5dr_cmd_alias_obj_create_attr {
+   uint32_t obj_id;
+   uint16_t vhca_id;
+   uint16_t obj_type;
+   uint8_t access_key[32];
+};
+
 struct mlx5dr_cmd_stc_create_attr {
uint8_t log_obj_range;
uint8_t table_type;
@@ -217,6 +224,10 @@ mlx5dr_cmd_header_modify_pattern_create(struct ibv_context 
*ctx,
uint32_t pattern_length,
uint8_t *actions);
 
+struct mlx5dr_devx_obj *
+mlx5dr

[PATCH V1 08/11] net/mlx5/hws: add vhca identifier ID to the caps

2022-12-19 Thread Erez Shitrit


And read it in the query_cap function

Signed-off-by: Erez Shitrit 
Reviewed-by: Alex Vesker 
---
 drivers/net/mlx5/hws/mlx5dr_cmd.c | 3 +++
 drivers/net/mlx5/hws/mlx5dr_cmd.h | 1 +
 2 files changed, 4 insertions(+)

diff --git a/drivers/net/mlx5/hws/mlx5dr_cmd.c 
b/drivers/net/mlx5/hws/mlx5dr_cmd.c
index 9b9f70c933..d525867de5 100644
--- a/drivers/net/mlx5/hws/mlx5dr_cmd.c
+++ b/drivers/net/mlx5/hws/mlx5dr_cmd.c
@@ -837,6 +837,9 @@ int mlx5dr_cmd_query_caps(struct ibv_context *ctx,
MLX5_GET64(query_hca_cap_out, out,
   
capability.cmd_hca_cap.match_definer_format_supported);
 
+   caps->vhca_id = MLX5_GET(query_hca_cap_out, out,
+capability.cmd_hca_cap.vhca_id);
+
caps->sq_ts_format = MLX5_GET(query_hca_cap_out, out,
  capability.cmd_hca_cap.sq_ts_format);
 
diff --git a/drivers/net/mlx5/hws/mlx5dr_cmd.h 
b/drivers/net/mlx5/hws/mlx5dr_cmd.h
index 824ca5e846..8b8d5d00b0 100644
--- a/drivers/net/mlx5/hws/mlx5dr_cmd.h
+++ b/drivers/net/mlx5/hws/mlx5dr_cmd.h
@@ -176,6 +176,7 @@ struct mlx5dr_cmd_query_caps {
uint8_t sq_ts_format;
uint64_t definer_format_sup;
uint32_t trivial_match_definer;
+   uint32_t vhca_id;
bool cross_vhca_resources;
char fw_ver[64];
 };
-- 
2.18.2



[PATCH V1 09/11] net/mlx5/hws: support shared ibv-context with local one

2022-12-19 Thread Erez Shitrit


The idea is to have a shared ibv_context that all the resources
allocated on it (FT + TIR are exceptions)
When ever a resource created locally an alias object to that resource
allocated and it used in the other context.
The connections between the resources are done according to each type of
the resource, to the original resource or to its alias resource.

Signed-off-by: Erez Shitrit 
Reviewed-by: Alex Vesker 
---
 drivers/common/mlx5/mlx5_prm.h |   6 +
 drivers/net/mlx5/hws/mlx5dr.h  |   2 +
 drivers/net/mlx5/hws/mlx5dr_action.c   |   9 +-
 drivers/net/mlx5/hws/mlx5dr_cmd.c  |  19 ++-
 drivers/net/mlx5/hws/mlx5dr_cmd.h  |   7 +-
 drivers/net/mlx5/hws/mlx5dr_context.c  |  34 +++-
 drivers/net/mlx5/hws/mlx5dr_context.h  |  22 +++
 drivers/net/mlx5/hws/mlx5dr_debug.c|   2 +-
 drivers/net/mlx5/hws/mlx5dr_internal.h |   1 +
 drivers/net/mlx5/hws/mlx5dr_matcher.c  | 218 +++--
 drivers/net/mlx5/hws/mlx5dr_matcher.h  |   9 +
 drivers/net/mlx5/hws/mlx5dr_table.c| 191 +-
 drivers/net/mlx5/hws/mlx5dr_table.h|   4 +-
 13 files changed, 484 insertions(+), 40 deletions(-)

diff --git a/drivers/common/mlx5/mlx5_prm.h b/drivers/common/mlx5/mlx5_prm.h
index fd1ad55f1f..a716a33e84 100644
--- a/drivers/common/mlx5/mlx5_prm.h
+++ b/drivers/common/mlx5/mlx5_prm.h
@@ -291,6 +291,12 @@
 
 #define MAX_ACTIONS_DATA_IN_HEADER_MODIFY 512
 
+/* Alias FT id passed to the ALLOW_OTHER_VHCA_ACCESS & CREATE_GENERAL_OBJECT
+ * commands should have the following format:
+ * {table_type: 8bits, table_id: 24bits}.
+ */
+#define FT_ID_FT_TYPE_OFFSET 24
+
 /* Completion mode. */
 enum mlx5_completion_mode {
MLX5_COMP_ONLY_ERR = 0x0,
diff --git a/drivers/net/mlx5/hws/mlx5dr.h b/drivers/net/mlx5/hws/mlx5dr.h
index f8de27c615..aa36e3111f 100644
--- a/drivers/net/mlx5/hws/mlx5dr.h
+++ b/drivers/net/mlx5/hws/mlx5dr.h
@@ -96,6 +96,8 @@ struct mlx5dr_context_attr {
size_t initial_log_ste_memory; /* Currently not in use */
/* Optional PD used for allocating res ources */
struct ibv_pd *pd;
+   /* Optional other ctx for resources allocation, all objects will be 
created on it */
+   struct ibv_context *shared_ibv_ctx;
 };
 
 struct mlx5dr_table_attr {
diff --git a/drivers/net/mlx5/hws/mlx5dr_action.c 
b/drivers/net/mlx5/hws/mlx5dr_action.c
index b0ae4e7693..da19c1ca7d 100644
--- a/drivers/net/mlx5/hws/mlx5dr_action.c
+++ b/drivers/net/mlx5/hws/mlx5dr_action.c
@@ -1147,6 +1147,7 @@ mlx5dr_action_create_reformat_root(struct mlx5dr_action 
*action,
 {
enum mlx5dv_flow_table_type ft_type = 0; /*fix compilation warn*/
uint32_t verb_reformat_type = 0;
+   struct ibv_context *ibv_ctx;
int ret;
 
/* Convert action to FT type and verbs reformat type */
@@ -1157,8 +1158,9 @@ mlx5dr_action_create_reformat_root(struct mlx5dr_action 
*action,
mlx5dr_action_conv_reformat_to_verbs(action->type, &verb_reformat_type);
 
/* Create the reformat type for root table */
+   ibv_ctx = mlx5dr_context_get_local_ibv(action->ctx);
action->flow_action =
-   
mlx5_glue->dv_create_flow_action_packet_reformat_root(action->ctx->ibv_ctx,
+   mlx5_glue->dv_create_flow_action_packet_reformat_root(ibv_ctx,
  data_sz,
  data,
  
verb_reformat_type,
@@ -1496,14 +1498,17 @@ mlx5dr_action_create_modify_header_root(struct 
mlx5dr_action *action,
__be64 *actions)
 {
enum mlx5dv_flow_table_type ft_type = 0;
+   struct ibv_context *local_ibv_ctx;
int ret;
 
ret = mlx5dr_action_conv_flags_to_ft_type(action->flags, &ft_type);
if (ret)
return rte_errno;
 
+   local_ibv_ctx = mlx5dr_context_get_local_ibv(action->ctx);
+
action->flow_action =
-   
mlx5_glue->dv_create_flow_action_modify_header_root(action->ctx->ibv_ctx,
+   
mlx5_glue->dv_create_flow_action_modify_header_root(local_ibv_ctx,
actions_sz,
(uint64_t 
*)actions,
ft_type);
diff --git a/drivers/net/mlx5/hws/mlx5dr_cmd.c 
b/drivers/net/mlx5/hws/mlx5dr_cmd.c
index d525867de5..754a424bd7 100644
--- a/drivers/net/mlx5/hws/mlx5dr_cmd.c
+++ b/drivers/net/mlx5/hws/mlx5dr_cmd.c
@@ -217,18 +217,23 @@ void mlx5dr_cmd_set_attr_connect_miss_tbl(struct 
mlx5dr_context *ctx,
 {
struct mlx5dr_devx_obj *default_miss_tbl;
 
-   if (type != MLX5DR_TABLE_TYPE_FDB)
+   if (type != MLX5DR_TABLE_TYPE_FDB && 
!mlx5dr_context_shared_gvmi_used(ctx))
return;
 
-   default_miss_tbl = ctx->common_res[type

[PATCH V1 10/11] net/mlx5/hws: support actions while shared resources is used

2022-12-19 Thread Erez Shitrit


TIR/FT actions are different in the context of shared ibv resource, it
created on the local ibv_context and aliased to the shared ibv_context.
Other actions should be created on the shared ibv resource, new flag was
added to indicates where this object came from.

Signed-off-by: Erez Shitrit 
Reviewed-by: Alex Vesker 
---
 drivers/net/mlx5/hws/mlx5dr.h|  5 ++-
 drivers/net/mlx5/hws/mlx5dr_action.c | 60 ++--
 drivers/net/mlx5/hws/mlx5dr_action.h |  3 ++
 drivers/net/mlx5/mlx5_devx.c |  2 +-
 4 files changed, 64 insertions(+), 6 deletions(-)

diff --git a/drivers/net/mlx5/hws/mlx5dr.h b/drivers/net/mlx5/hws/mlx5dr.h
index aa36e3111f..c0f9a5e984 100644
--- a/drivers/net/mlx5/hws/mlx5dr.h
+++ b/drivers/net/mlx5/hws/mlx5dr.h
@@ -410,12 +410,15 @@ mlx5dr_action_create_dest_vport(struct mlx5dr_context 
*ctx,
  * Direct rule TIR devx object.
  * @param[in] flags
  * Action creation flags. (enum mlx5dr_action_flags)
+ * @param[in] is_local
+ * indicates where the tir object was created, local gvmi or other gvmi
  * @return pointer to mlx5dr_action on success NULL otherwise.
  */
 struct mlx5dr_action *
 mlx5dr_action_create_dest_tir(struct mlx5dr_context *ctx,
  struct mlx5dr_devx_obj *obj,
- uint32_t flags);
+ uint32_t flags,
+ bool is_local);
 
 /* Create direct rule TAG action.
  *
diff --git a/drivers/net/mlx5/hws/mlx5dr_action.c 
b/drivers/net/mlx5/hws/mlx5dr_action.c
index da19c1ca7d..2db62635c1 100644
--- a/drivers/net/mlx5/hws/mlx5dr_action.c
+++ b/drivers/net/mlx5/hws/mlx5dr_action.c
@@ -744,7 +744,10 @@ mlx5dr_action_create_dest_table(struct mlx5dr_context *ctx,
return NULL;
 
if (mlx5dr_action_is_root_flags(flags)) {
-   action->devx_obj = tbl->ft->obj;
+   if (mlx5dr_context_shared_gvmi_used(ctx))
+   action->devx_obj = tbl->local_ft->obj;
+   else
+   action->devx_obj = tbl->ft->obj;
} else {
ret = mlx5dr_action_create_stcs(action, tbl->ft);
if (ret)
@@ -758,10 +761,38 @@ mlx5dr_action_create_dest_table(struct mlx5dr_context 
*ctx,
return NULL;
 }
 
+static int mlx5dr_action_get_dest_tir_obj(struct mlx5dr_context *ctx,
+ struct mlx5dr_action *action,
+ struct mlx5dr_devx_obj *obj,
+ struct mlx5dr_devx_obj **ret_obj)
+{
+   int ret;
+
+   if (mlx5dr_context_shared_gvmi_used(ctx)) {
+   ret = mlx5dr_matcher_create_aliased_obj(ctx,
+   ctx->local_ibv_ctx,
+   ctx->ibv_ctx,
+   ctx->caps->vhca_id,
+   obj->id,
+   
MLX5_GENERAL_OBJ_TYPE_TIR_ALIAS,
+   
&action->alias.devx_obj);
+   if (ret) {
+   DR_LOG(ERR, "Failed to create tir alias");
+   return rte_errno;
+   }
+   *ret_obj = action->alias.devx_obj;
+   } else {
+   *ret_obj = obj;
+   }
+
+   return 0;
+}
+
 struct mlx5dr_action *
 mlx5dr_action_create_dest_tir(struct mlx5dr_context *ctx,
  struct mlx5dr_devx_obj *obj,
- uint32_t flags)
+ uint32_t flags,
+ bool is_local)
 {
struct mlx5dr_action *action;
int ret;
@@ -773,6 +804,13 @@ mlx5dr_action_create_dest_tir(struct mlx5dr_context *ctx,
return NULL;
}
 
+   if (!is_local) {
+   DR_LOG(ERR, "TIR should be created on local ibv_device, flags: 
0x%x",
+  flags);
+   rte_errno = ENOTSUP;
+   return NULL;
+   }
+
action = mlx5dr_action_create_generic(ctx, flags, 
MLX5DR_ACTION_TYP_TIR);
if (!action)
return NULL;
@@ -780,13 +818,23 @@ mlx5dr_action_create_dest_tir(struct mlx5dr_context *ctx,
if (mlx5dr_action_is_root_flags(flags)) {
action->devx_obj = obj->obj;
} else {
-   ret = mlx5dr_action_create_stcs(action, obj);
-   if (ret)
+   struct mlx5dr_devx_obj *cur_obj = NULL; /*compilation warn*/
+
+   ret = mlx5dr_action_get_dest_tir_obj(ctx, action, obj, 
&cur_obj);
+   if (ret) {
+   DR_LOG(ERR, "Failed to create tir alias (flags: %d)", 
flags);
goto free_action;
+   }
+
+   ret = mlx5dr_action_create_stcs(action, cur_obj);
+   if (ret)
+   

RE: [PATCH v2] mempool: micro-optimize put function

2022-12-19 Thread Morten Brørup
PING mempool maintainers.

If you don't provide ACK or Review to patches, how should Thomas know that it 
is ready for merging?

-Morten

> From: Morten Brørup [mailto:m...@smartsharesystems.com]
> Sent: Wednesday, 16 November 2022 18.39
> 
> > From: Honnappa Nagarahalli [mailto:honnappa.nagaraha...@arm.com]
> > Sent: Wednesday, 16 November 2022 17.27
> >
> > 
> >
> > > > >
> > > > > Micro-optimization:
> > > > > Reduced the most likely code path in the generic put function
> by
> > > > moving an
> > > > > unlikely check out of the most likely code path and further
> down.
> > > > >
> > > > > Also updated the comments in the function.
> > > > >
> > > > > v2 (feedback from Andrew Rybchenko):
> > > > > * Modified comparison to prevent overflow if n is really huge
> and
> > > > > len
> > > > is
> > > > >   non-zero.
> > > > > * Added assertion about the invariant preventing overflow in
> the
> > > > >   comparison.
> > > > > * Crossing the threshold is not extremely unlikely, so removed
> > > > likely()
> > > > >   from that comparison.
> > > > >   The compiler will generate code with optimal static branch
> > > > prediction
> > > > >   here anyway.
> > > > >
> > > > > Signed-off-by: Morten Brørup 
> > > > > ---
> > > > >  lib/mempool/rte_mempool.h | 36 ---
> --
> > ---
> > > > >  1 file changed, 20 insertions(+), 16 deletions(-)
> > > > >
> > > > > diff --git a/lib/mempool/rte_mempool.h
> > b/lib/mempool/rte_mempool.h
> > > > > index 9f530db24b..dd1a3177d6 100644
> > > > > --- a/lib/mempool/rte_mempool.h
> > > > > +++ b/lib/mempool/rte_mempool.h
> > > > > @@ -1364,32 +1364,36 @@ rte_mempool_do_generic_put(struct
> > > > > rte_mempool *mp, void * const *obj_table,  {
> > > > >   void **cache_objs;
> > > > >
> > > > > - /* No cache provided */
> > > > > + /* No cache provided? */
> > > > >   if (unlikely(cache == NULL))
> > > > >   goto driver_enqueue;
> > > > >
> > > > > - /* increment stat now, adding in mempool always success */
> > > > > + /* Increment stats now, adding in mempool always succeeds.
> > */
> > > > >   RTE_MEMPOOL_CACHE_STAT_ADD(cache, put_bulk, 1);
> > > > >   RTE_MEMPOOL_CACHE_STAT_ADD(cache, put_objs, n);
> > > > >
> > > > > - /* The request itself is too big for the cache */
> > > > > - if (unlikely(n > cache->flushthresh))
> > > > > - goto driver_enqueue_stats_incremented;
> > > > > -
> > > > > - /*
> > > > > -  * The cache follows the following algorithm:
> > > > > -  *   1. If the objects cannot be added to the cache without
> > > > crossing
> > > > > -  *  the flush threshold, flush the cache to the
> > backend.
> > > > > -  *   2. Add the objects to the cache.
> > > > > -  */
> > > > > + /* Assert the invariant preventing overflow in the
> > comparison
> > > > below.
> > > > > */
> > > > > + RTE_ASSERT(cache->len <= cache->flushthresh);
> > > > >
> > > > > - if (cache->len + n <= cache->flushthresh) {
> > > > > + if (n <= cache->flushthresh - cache->len) {
> > > > > + /*
> > > > > +  * The objects can be added to the cache without
> > crossing
> > > > the
> > > > > +  * flush threshold.
> > > > > +  */
> > > > >   cache_objs = &cache->objs[cache->len];
> > > > >   cache->len += n;
> > > > > - } else {
> > > > > + } else if (likely(n <= cache->flushthresh)) {
> > > > IMO, this is a misconfiguration on the application part. In the
> > PMDs I
> > > > have looked at, max value of 'n' is controlled by compile time
> > > > constants. Application could do a compile time check on the cache
> > > > threshold or we could have another RTE_ASSERT on this.
> > >
> > > There could be applications using a mempool for something else than
> > mbufs.
> > Agree
> >
> > >
> > > In that case, the application should be allowed to get/put many
> > objects in
> > > one transaction.
> > Still, this is a misconfiguration on the application. On one hand the
> > threshold is configured for 'x' but they are sending a request which
> is
> > more than 'x'. It should be possible to change the threshold
> > configuration or reduce the request size.
> >
> > If the application does not fix the misconfiguration, it is possible
> > that it will always hit this case and does not get the benefit of
> using
> > the per-core cache.
> 
> Correct. I suppose this is the intended behavior of this API.
> 
> The zero-copy API proposed in another patch [1] has stricter
> requirements to the bulk size.
> 
> [1]: http://inbox.dpdk.org/dev/20221115161822.70886-1-
> m...@smartsharesystems.com/T/#u
> 
> >
> > With this check, we are introducing an additional memcpy as well. I
> am
> > not sure if reusing the latest buffers is better than having an
> memcpy.
> 
> There is no additional memcpy. The large bulk transfer is stored
> directly in the backend pool, bypassing the mempool cache.
> 
> Please note that this check is no

RE: [PATCH v19.11 LTS] kni: fix build for Suse

2022-12-19 Thread Gao, DaxueX
> From: Ferruh Yigit 
> Sent: 2022年12月16日 20:11
> To: Thomas Monjalon 
> Cc: dev@dpdk.org; David Marchand ;
> sta...@dpdk.org; christian.ehrha...@canonical.com; Gao, DaxueX
> 
> Subject: [PATCH v19.11 LTS] kni: fix build for Suse
> 
> Wrong macro is used in the patch that detects if '.ndo_tx_timeout'
> has 'txqueue' parameter or not, which is causing build error.
> 
> Fixed by using correct macro.
> 
> Bugzilla ID: 1141
> Fixes: d43fa3e198c0 ("kni: fix build for SLES15-SP3 (Make)")
> Cc: sta...@dpdk.org
> 
> Signed-off-by: Ferruh Yigit 

Tested-by: Daxue Gao 


[PATCH v5 0/5] support dmadev/ethdev stats reset

2022-12-19 Thread Chengwen Feng
This patchset contains dmadev/ethdev stats reset, and also support
hide zero for ethdev xstats.

Chengwen Feng (5):
  dmadev: support stats reset telemetry command
  telemetry: fix repeated display when callback don't set dict
  ethdev: support xstats reset telemetry command
  ethdev: telemetry xstats support hide zero
  ethdev: add newline to telemetry log string

 lib/dmadev/rte_dmadev.c   | 40 +++
 lib/ethdev/rte_ethdev.c   | 57 ---
 lib/telemetry/telemetry.c |  4 ++-
 3 files changed, 91 insertions(+), 10 deletions(-)

-- 
2.17.1



[PATCH v5 1/5] dmadev: support stats reset telemetry command

2022-12-19 Thread Chengwen Feng
The stats reset is useful for debugging, so add it to the dmadev
telemetry command lists.

Signed-off-by: Chengwen Feng 
---
 lib/dmadev/rte_dmadev.c | 40 
 1 file changed, 40 insertions(+)

diff --git a/lib/dmadev/rte_dmadev.c b/lib/dmadev/rte_dmadev.c
index 4da653eec7..93e15d4972 100644
--- a/lib/dmadev/rte_dmadev.c
+++ b/lib/dmadev/rte_dmadev.c
@@ -994,6 +994,44 @@ dmadev_handle_dev_stats(const char *cmd __rte_unused,
return 0;
 }
 
+static int
+dmadev_handle_dev_stats_reset(const char *cmd __rte_unused,
+   const char *params,
+   struct rte_tel_data *d __rte_unused)
+{
+   struct rte_dma_info dma_info;
+   int dev_id, ret, vchan_id;
+   const char *vchan_param;
+   char *end_param;
+
+   if (params == NULL || strlen(params) == 0 || !isdigit(*params))
+   return -EINVAL;
+
+   dev_id = strtoul(params, &end_param, 0);
+
+   /* Function info_get validates dev_id so we don't need to. */
+   ret = rte_dma_info_get(dev_id, &dma_info);
+   if (ret < 0)
+   return -EINVAL;
+
+   /* If the device has one vchan the user does not need to supply the
+* vchan id and only the device id is needed, no extra parameters.
+*/
+   if (dma_info.nb_vchans == 1 && *end_param == '\0')
+   vchan_id = 0;
+   else {
+   vchan_param = strtok(end_param, ",");
+   if (!vchan_param || strlen(vchan_param) == 0 || 
!isdigit(*vchan_param))
+   return -EINVAL;
+
+   vchan_id = strtoul(vchan_param, &end_param, 0);
+   }
+   if (*end_param != '\0')
+   RTE_DMA_LOG(WARNING, "Extra parameters passed to dmadev 
telemetry command, ignoring");
+
+   return rte_dma_stats_reset(dev_id, vchan_id);
+}
+
 #ifndef RTE_EXEC_ENV_WINDOWS
 static int
 dmadev_handle_dev_dump(const char *cmd __rte_unused,
@@ -1041,6 +1079,8 @@ RTE_INIT(dmadev_init_telemetry)
"Returns information for a dmadev. Parameters: int 
dev_id");
rte_telemetry_register_cmd("/dmadev/stats", dmadev_handle_dev_stats,
"Returns the stats for a dmadev vchannel. Parameters: 
int dev_id, vchan_id (Optional if only one vchannel)");
+   rte_telemetry_register_cmd("/dmadev/stats_reset", 
dmadev_handle_dev_stats_reset,
+   "Reset the stats for a dmadev vchannel. Parameters: int 
dev_id, vchan_id (Optional if only one vchannel)");
 #ifndef RTE_EXEC_ENV_WINDOWS
rte_telemetry_register_cmd("/dmadev/dump", dmadev_handle_dev_dump,
"Returns dump information for a dmadev. Parameters: int 
dev_id");
-- 
2.17.1



[PATCH v5 2/5] telemetry: fix repeated display when callback don't set dict

2022-12-19 Thread Chengwen Feng
When telemetry callback didn't set dict and return a non-negative
number, the telemetry will repeat to display the last result.

Fixes: 6dd571fd07c3 ("telemetry: introduce new functionality")
Cc: sta...@dpdk.org

Signed-off-by: Chengwen Feng 
---
 lib/telemetry/telemetry.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/lib/telemetry/telemetry.c b/lib/telemetry/telemetry.c
index 8fbb4f3060..a6c84e3499 100644
--- a/lib/telemetry/telemetry.c
+++ b/lib/telemetry/telemetry.c
@@ -334,8 +334,10 @@ static void
 perform_command(telemetry_cb fn, const char *cmd, const char *param, int s)
 {
struct rte_tel_data data;
+   int ret;
 
-   int ret = fn(cmd, param, &data);
+   rte_tel_data_start_dict(&data);
+   ret = fn(cmd, param, &data);
if (ret < 0) {
char out_buf[MAX_CMD_LEN + 10];
int used = snprintf(out_buf, sizeof(out_buf), "{\"%.*s\":null}",
-- 
2.17.1



[PATCH v5 3/5] ethdev: support xstats reset telemetry command

2022-12-19 Thread Chengwen Feng
The xstats reset is useful for debugging, so add it to the ethdev
telemetry command lists.

Signed-off-by: Chengwen Feng 
---
 lib/ethdev/rte_ethdev.c | 23 +++
 1 file changed, 23 insertions(+)

diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c
index 5d5e18db1e..0774de81b0 100644
--- a/lib/ethdev/rte_ethdev.c
+++ b/lib/ethdev/rte_ethdev.c
@@ -5915,6 +5915,27 @@ eth_dev_handle_port_xstats(const char *cmd __rte_unused,
return 0;
 }
 
+static int
+eth_dev_handle_port_xstats_reset(const char *cmd __rte_unused,
+   const char *params,
+   struct rte_tel_data *d __rte_unused)
+{
+   char *end_param;
+   int port_id;
+
+   if (params == NULL || strlen(params) == 0 || !isdigit(*params))
+   return -1;
+
+   port_id = strtoul(params, &end_param, 0);
+   if (*end_param != '\0')
+   RTE_ETHDEV_LOG(NOTICE,
+   "Extra parameters passed to ethdev telemetry command, 
ignoring\n");
+   if (!rte_eth_dev_is_valid_port(port_id))
+   return -1;
+
+   return rte_eth_xstats_reset(port_id);
+}
+
 #ifndef RTE_EXEC_ENV_WINDOWS
 static int
 eth_dev_handle_port_dump_priv(const char *cmd __rte_unused,
@@ -6328,6 +6349,8 @@ RTE_INIT(ethdev_init_telemetry)
"Returns the common stats for a port. Parameters: int 
port_id");
rte_telemetry_register_cmd("/ethdev/xstats", eth_dev_handle_port_xstats,
"Returns the extended stats for a port. Parameters: int 
port_id");
+   rte_telemetry_register_cmd("/ethdev/xstats_reset", 
eth_dev_handle_port_xstats_reset,
+   "Reset the extended stats for a port. Parameters: int 
port_id");
 #ifndef RTE_EXEC_ENV_WINDOWS
rte_telemetry_register_cmd("/ethdev/dump_priv", 
eth_dev_handle_port_dump_priv,
"Returns dump private information for a port. 
Parameters: int port_id");
-- 
2.17.1



[PATCH v5 5/5] ethdev: add newline to telemetry log string

2022-12-19 Thread Chengwen Feng
The telemetry related code may invoke RTE_ETHDEV_LOG to display
information, the newline character is not added automatically to the
RTE_ETHDEV_LOG, therefore, the newline character must be explicitly
added to the log string.

Cc: sta...@dpdk.org

Signed-off-by: Chengwen Feng 
---
 lib/ethdev/rte_ethdev.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c
index 2075086eeb..30f79b7d4c 100644
--- a/lib/ethdev/rte_ethdev.c
+++ b/lib/ethdev/rte_ethdev.c
@@ -5968,7 +5968,7 @@ eth_dev_handle_port_dump_priv(const char *cmd 
__rte_unused,
port_id = strtoul(params, &end_param, 0);
if (*end_param != '\0')
RTE_ETHDEV_LOG(NOTICE,
-   "Extra parameters passed to ethdev telemetry command, 
ignoring");
+   "Extra parameters passed to ethdev telemetry command, 
ignoring\n");
if (!rte_eth_dev_is_valid_port(port_id))
return -EINVAL;
 
@@ -6010,7 +6010,7 @@ eth_dev_handle_port_link_status(const char *cmd 
__rte_unused,
port_id = strtoul(params, &end_param, 0);
if (*end_param != '\0')
RTE_ETHDEV_LOG(NOTICE,
-   "Extra parameters passed to ethdev telemetry command, 
ignoring");
+   "Extra parameters passed to ethdev telemetry command, 
ignoring\n");
if (!rte_eth_dev_is_valid_port(port_id))
return -1;
 
@@ -6048,7 +6048,7 @@ eth_dev_handle_port_info(const char *cmd __rte_unused,
port_id = strtoul(params, &end_param, 0);
if (*end_param != '\0')
RTE_ETHDEV_LOG(NOTICE,
-   "Extra parameters passed to ethdev telemetry command, 
ignoring");
+   "Extra parameters passed to ethdev telemetry command, 
ignoring\n");
 
if (!rte_eth_dev_is_valid_port(port_id))
return -EINVAL;
-- 
2.17.1



[PATCH v5 4/5] ethdev: telemetry xstats support hide zero

2022-12-19 Thread Chengwen Feng
The number of xstats may be large, after the hide zero option is added,
only non-zero values can be displayed.

Signed-off-by: Chengwen Feng 
---
 lib/ethdev/rte_ethdev.c | 28 ++--
 1 file changed, 22 insertions(+), 6 deletions(-)

diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c
index 0774de81b0..2075086eeb 100644
--- a/lib/ethdev/rte_ethdev.c
+++ b/lib/ethdev/rte_ethdev.c
@@ -5870,20 +5870,33 @@ eth_dev_handle_port_xstats(const char *cmd __rte_unused,
 {
struct rte_eth_xstat *eth_xstats;
struct rte_eth_xstat_name *xstat_names;
+   char *end_param, *hide_param;
int port_id, num_xstats;
+   int hide_zero = 0;
int i, ret;
-   char *end_param;
 
if (params == NULL || strlen(params) == 0 || !isdigit(*params))
return -1;
 
port_id = strtoul(params, &end_param, 0);
-   if (*end_param != '\0')
-   RTE_ETHDEV_LOG(NOTICE,
-   "Extra parameters passed to ethdev telemetry command, 
ignoring");
if (!rte_eth_dev_is_valid_port(port_id))
return -1;
 
+   if (*end_param != '\0') {
+   hide_param = strtok(end_param, ",");
+   if (!hide_param || strlen(hide_param) == 0 || 
!isdigit(*hide_param))
+   return -EINVAL;
+   hide_zero = strtoul(hide_param, &end_param, 0);
+   if (*end_param != '\0')
+   RTE_ETHDEV_LOG(NOTICE,
+   "Extra parameters passed to ethdev telemetry 
command, ignoring\n");
+   if (hide_zero != 0 && hide_zero != 1) {
+   hide_zero = !!hide_zero;
+   RTE_ETHDEV_LOG(NOTICE,
+   "Hide zero parameter is non-boolean, cast to 
boolean\n");
+   }
+   }
+
num_xstats = rte_eth_xstats_get(port_id, NULL, 0);
if (num_xstats < 0)
return -1;
@@ -5908,9 +5921,12 @@ eth_dev_handle_port_xstats(const char *cmd __rte_unused,
}
 
rte_tel_data_start_dict(d);
-   for (i = 0; i < num_xstats; i++)
+   for (i = 0; i < num_xstats; i++) {
+   if (hide_zero && eth_xstats[i].value == 0)
+   continue;
rte_tel_data_add_dict_u64(d, xstat_names[i].name,
eth_xstats[i].value);
+   }
free(eth_xstats);
return 0;
 }
@@ -6348,7 +6364,7 @@ RTE_INIT(ethdev_init_telemetry)
rte_telemetry_register_cmd("/ethdev/stats", eth_dev_handle_port_stats,
"Returns the common stats for a port. Parameters: int 
port_id");
rte_telemetry_register_cmd("/ethdev/xstats", eth_dev_handle_port_xstats,
-   "Returns the extended stats for a port. Parameters: int 
port_id");
+   "Returns the extended stats for a port. Parameters: int 
port_id, hide_zero (Optional, specify whether to hide zero values)");
rte_telemetry_register_cmd("/ethdev/xstats_reset", 
eth_dev_handle_port_xstats_reset,
"Reset the extended stats for a port. Parameters: int 
port_id");
 #ifndef RTE_EXEC_ENV_WINDOWS
-- 
2.17.1



Re: [PATCH V8 6/8] telemetry: support adding integer value as hexadecimal

2022-12-19 Thread Bruce Richardson
On Mon, Dec 19, 2022 at 03:06:46PM +0800, Huisong Li wrote:
> Sometimes displaying a unsigned integer value as hexadecimal encoded style
> is more expected for human consumption, such as, offload capability and
> device flag. This patch introduces two APIs to add unsigned integer value
> as hexadecimal encoded string to array or dictionary. And user can choose
> whether the stored value is padding to the specified width.
> 
> Signed-off-by: Huisong Li 
> Acked-by: Morten Brørup 
> Acked-by: Chengwen Feng 

Acked-by: Bruce Richardson 


Re: NIC support for IOVA as VA

2022-12-19 Thread fengchengwen
Hi Morten,

  +1 for features.

  Also this is a compilation option. Can you add compilation and related test 
cases
to the DPDK CI? In this way, we can check whether the patch affects the 
code(IOVA_AS_PA).


On 2022/12/7 16:06, Morten Brørup wrote:
> Should support for IOVA as VA be listed under NIC features [1]?
> 
> [1]: http://doc.dpdk.org/guides/nics/
> 
> 
> Med venlig hilsen / Kind regards,
> -Morten Brørup
> 
> 
> 
> .
> 


Re: [PATCH v5 2/5] telemetry: fix repeated display when callback don't set dict

2022-12-19 Thread Bruce Richardson
On Mon, Dec 19, 2022 at 09:07:20AM +, Chengwen Feng wrote:
> When telemetry callback didn't set dict and return a non-negative
> number, the telemetry will repeat to display the last result.
> 
> Fixes: 6dd571fd07c3 ("telemetry: introduce new functionality")
> Cc: sta...@dpdk.org
> 
> Signed-off-by: Chengwen Feng 
> ---

Hi Chengwen,

I'm a little curious about this bug. Can you describe some steps to
reproduce it as I'm curious as to exactly what is happening. The fix seems
a little strange to me so I'd like to investigate a little more to see if
other approaches might work.

Thanks,
/Bruce


Re: [RFC PATCH 2/7] telemetry: add uint type as alias for u64

2022-12-19 Thread Thomas Monjalon
15/12/2022 14:58, Bruce Richardson:
> On Thu, Dec 15, 2022 at 02:36:51PM +0100, Thomas Monjalon wrote:
> > 15/12/2022 10:44, Bruce Richardson:
> > > On Wed, Dec 14, 2022 at 09:38:45AM -0800, Tyler Retzlaff wrote:
> > > > On Tue, Dec 13, 2022 at 06:27:25PM +, Bruce Richardson wrote:
> > > > > For future standardization on the "uint" name for unsigned values 
> > > > > rather
> > > > > than the existing "u64" one, we can for now:
> > > > > * rename all internal values to use uint rather than u64
> > > > > * add new function names to alias the existing u64 ones
> > > > > 
> > > > > Suggested-by: Morten Brørup 
> > > > > Signed-off-by: Bruce Richardson 
> > > > 
> > > > when adding __rte_experimental api i have been asked to add the
> > > > following boilerplate documentation block. i'm not pushing it, just
> > > > recalling it is what i get asked for, so in case it's something we do?
> > > > see lib/eal/include/rte_thread.h as an example
> > > > 
> > > > 
> > > > ```
> > > >  * @warning
> > > >  * @b EXPERIMENTAL: this API may change without prior notice.
> > > > ```
> > > >
> > > 
> > > Ok, thanks for the notice.
> > > 
> > > Actually, related to this, since we are adding these functions as aliases
> > > for existing stable functions, I would like to see these being added not 
> > > as
> > > experimental. The reason for that, is that while they are experimental, we
> > > cannot feasibly mark the old function names as deprecated. :-(
> > > 
> > > Adding Thomas and David on CC for their thoughts.
> > 
> > Is it related to telemetry?
> > 
> > In general, yes we cannot deprecate something if there is no stable 
> > replacement.
> > The recommended step is to introduce a new experimental API
> > and deprecate the old one when the new API is stable.
> > 
> Yes, understood.
> What we are really trying to do here is to rename an API, by process of
> adding the new API and then marking the old one as deprecated. The small
> issue is that adding the new one it is by default experimental, meaning we
> need to wait for deprecating old one. Ideally, as soon as the new API is
> added, we would like to point people to use that, but can't really do so
> while it is experimental.
> 
> ---
> 
> By way of explicit detail, Morten pointed out the inconsistency in the
> telemetry APIs and types:
> 
> * we have add_*_int, which takes a 32-bit signed value
> * we have add_*_u64 which takes 64-bit unsigned (as name suggests).
> 
> The ideal end-state is to always use 64-bit values (since there is no space
> saving from 32-bit as a union is used), and just name everything as "int"
> or "uint" for signed/unsigned. The two big steps here are:
> 
> * expanding type of the "int" functions to take 64-bit parameters - this is
>   ABI change but not API one, since existing code will happily promote
>   values on compile. Therefore, we just use ABI versioning to have a 32-bit
>   version for older linked binaries.
> * the rename of the rte_tel_data_add_array_u64 and
>   rte_tel_data_add_dict_u64 to *_uint variants. Though keeping
>   compatibility is easier, as we can just add new functions, the overall
>   process is slower since the new functions technically should be added as
>   experimental - hence the email. For the case of function renaming, do we
>   still need to have the "renamed" versions as experimental initially?

If a function is simply renamed, I think there is no need for the experimental 
step.
Would we keep an alias with the old name for some time?




Re: 21.11.3 patches review and test

2022-12-19 Thread Kevin Traynor

On 15/12/2022 17:51, Ali Alnubani wrote:

-Original Message-
From: Kevin Traynor 
Sent: Tuesday, December 6, 2022 1:30 PM
To: sta...@dpdk.org
Cc: dev@dpdk.org; Abhishek Marathe ;
Ali Alnubani ; benjamin.wal...@intel.com; David
Christensen ; Hemant Agrawal
; Ian Stokes ; Jerin
Jacob ; John McNamara ;
Ju-Hyoung Lee ; Kevin Traynor
; Luca Boccassi ; Pei Zhang
; qian.q...@intel.com; Raslan Darawsheh
; NBU-Contact-Thomas Monjalon (EXTERNAL)
; yangh...@redhat.com; yuan.p...@intel.com;
zhaoyan.c...@intel.com
Subject: 21.11.3 patches review and test

Hi all,

Here is a list of patches targeted for stable release 21.11.3.

The planned date for the final release is 19th December.

Please help with testing and validation of your use cases and report
any issues/results with reply-all to this mail. For the final release
the fixes and reported validations will be added to the release notes.

A release candidate tarball can be found at:

 https://dpdk.org/browse/dpdk-stable/tag/?id=v21.11.3-rc1

These patches are located at branch 21.11 of dpdk-stable repo:
 https://dpdk.org/browse/dpdk-stable/

Thanks.

Kevin

---


Hello,

We ran the following functional tests with Nvidia hardware on 21.11.3-rc1:
- Basic functionality:
   Send and receive multiple types of traffic.
- testpmd xstats counter test.
- testpmd timestamp test.
- Changing/checking link status through testpmd.
- RTE flow tests:
   Items:
   - ecpri
   - eth
   - flex
   - geneve
   - geneve_opt
   - gre
   - gre_key
   - gre_option
   - gtp
   - gtp_psc
   - icmp
   - icmp6
   - integrity
   - ipv4
   - ipv6
   - ipv6_frag_ext
   - mark
   - meta
   - mpls
   - nvgre
   - tag
   - tcp
   - udp
   - vlan
   - vxlan
   - vxlan_gpe

   Actions:
   - age
   - count
   - dec_tcp_ack
   - dec_tcp_seq
   - dec_ttl
   - drop
   - flag
   - inc_tcp_ack
   - inc_tcp_seq
   - jump
   - mark
   - meter
   - modify_field
   - nvgre_decap
   - nvgre_encap
   - of_pop_vlan
   - of_push_vlan
   - of_set_vlan_pcp
   - of_set_vlan_vid
   - queue
   - raw_decap
   - raw_encap
   - rss
   - sample
   - set_ipv4_dscp
   - set_ipv4_dst
   - set_ipv4_src
   - set_ipv6_dscp
   - set_ipv6_dst
   - set_ipv6_src
   - set_mac_dst
   - set_mac_src
   - set_meta
   - set_tag
   - set_tp_dst
   - set_tp_src
   - set_ttl
   - vxlan_decap
   - vxlan_encap
- Some RSS tests.
- VLAN filtering, stripping and insertion tests.
- Checksum and TSO tests.
- ptype tests.
- link_status_interrupt example application tests.
- l3fwd-power example application tests.
- Multi-process example applications tests.
- Hardware LRO tests.
- Regex application tests.
- Buffer Split tests.
- Tx scheduling tests.

Functional tests ran on:
- NIC: ConnectX-4 Lx / OS: Ubuntu 20.04 LTS / Driver: 
MLNX_OFED_LINUX-5.8-1.0.1.1 / Firmware: 14.32.1010
- NIC: ConnectX-5 / OS: Ubuntu 20.04 LTS / Driver: MLNX_OFED_LINUX-5.8-1.0.1.1 
/ Firmware: 16.35.1012
- NIC: ConnectX-6 Dx / OS: Ubuntu 20.04 LTS / Driver: 
MLNX_OFED_LINUX-5.7-1.0.2.0 / Firmware: 22.35.1012
- DPU: BlueField-2 / DOCA SW version: 1.5.0 / Firmware: 24.35.1012

We don't see any new issues in functional testing caused by changes in this 
release.

Additionally, we ran compilation tests with multiple configurations on the 
following OS/driver combinations:
- Ubuntu 22.04.1 with MLNX_OFED_LINUX-5.8-1.1.2.1.
- Ubuntu 20.04.5 with MLNX_OFED_LINUX-5.8-1.1.2.1.
- Ubuntu 20.04.5 with rdma-core master (76cfaa1).
- Ubuntu 20.04.5 with rdma-core v28.0.
- Ubuntu 18.04.6 with rdma-core v17.1.
- Ubuntu 18.04.6 with rdma-core master (76cfaa1) (i386).
- Ubuntu 16.04.7 with rdma-core v22.7.
- Fedora 37 with rdma-core v41.0.
- Fedora 38 (Rawhide) with rdma-core v41.0.
- CentOS 7 7.9.2009 with rdma-core master (76cfaa1).
- CentOS 7 7.9.2009 with MLNX_OFED_LINUX-5.8-1.0.1.1.
- CentOS 8 8.4.2105 with rdma-core master (76cfaa1).
- OpenSUSE Leap 15.4 with rdma-core v38.1.
- Windows Server 2019 with Clang 11.0.0.

The builds are passing on most OS except for Fedora 37 and 38, where I have the 
following issues:
- Bug 1149 - [21.11] lib/ring build failure with gcc 12 and debug enabled 
(https://bugs.dpdk.org/show_bug.cgi?id=1149)
- Bug 1150 - [21.11] failure to build API's html docs on Fedora 37 
(https://bugs.dpdk.org/show_bug.cgi?id=1150)



Thanks for testing Ali. I will take a look at these.
Kevin.


Thanks,
Ali




[dpdk-dev] [PATCH] net/cnxk: support flow info API

2022-12-19 Thread psatheesh
From: Satheesh Paul 

Implement rte_flow_info_get API to get the maximum
number of counters and meters.

Signed-off-by: Satheesh Paul 
Reviewed-by: Kiran Kumar K 
---
Depends-on: patch-26075 ("common/cnxk: fix dual VLAN parsing issue")

 drivers/net/cnxk/cn10k_ethdev.c|  1 +
 drivers/net/cnxk/cn10k_flow.c  | 16 
 drivers/net/cnxk/cn10k_flow.h  |  8 ++--
 drivers/net/cnxk/cn9k_ethdev.c |  1 +
 drivers/net/cnxk/cn9k_flow.c   | 15 +++
 drivers/net/cnxk/cn9k_flow.h   | 10 ++
 drivers/net/cnxk/cnxk_ethdev.h |  1 +
 drivers/net/cnxk/cnxk_ethdev_mtr.c |  1 -
 8 files changed, 46 insertions(+), 7 deletions(-)

diff --git a/drivers/net/cnxk/cn10k_ethdev.c b/drivers/net/cnxk/cn10k_ethdev.c
index 4658713591..ef1637ec08 100644
--- a/drivers/net/cnxk/cn10k_ethdev.c
+++ b/drivers/net/cnxk/cn10k_ethdev.c
@@ -754,6 +754,7 @@ npc_flow_ops_override(void)
/* Update platform specific ops */
cnxk_flow_ops.create = cn10k_flow_create;
cnxk_flow_ops.destroy = cn10k_flow_destroy;
+   cnxk_flow_ops.info_get = cn10k_flow_info_get;
 }
 
 static int
diff --git a/drivers/net/cnxk/cn10k_flow.c b/drivers/net/cnxk/cn10k_flow.c
index 7df879a2bb..2ce9e1de74 100644
--- a/drivers/net/cnxk/cn10k_flow.c
+++ b/drivers/net/cnxk/cn10k_flow.c
@@ -207,6 +207,22 @@ cn10k_flow_create(struct rte_eth_dev *eth_dev, const 
struct rte_flow_attr *attr,
return (struct rte_flow *)flow;
 }
 
+int
+cn10k_flow_info_get(struct rte_eth_dev *dev, struct rte_flow_port_info 
*port_info,
+   struct rte_flow_queue_info *queue_info, struct 
rte_flow_error *err)
+{
+   RTE_SET_USED(dev);
+   RTE_SET_USED(err);
+
+   memset(port_info, 0, sizeof(*port_info));
+   memset(queue_info, 0, sizeof(*queue_info));
+
+   port_info->max_nb_counters = CN10K_NPC_COUNTERS_MAX;
+   port_info->max_nb_meters = CNXK_NIX_MTR_COUNT_MAX;
+
+   return 0;
+}
+
 int
 cn10k_flow_destroy(struct rte_eth_dev *eth_dev, struct rte_flow *rte_flow,
   struct rte_flow_error *error)
diff --git a/drivers/net/cnxk/cn10k_flow.h b/drivers/net/cnxk/cn10k_flow.h
index f64fcf2a5e..316b74e6a6 100644
--- a/drivers/net/cnxk/cn10k_flow.h
+++ b/drivers/net/cnxk/cn10k_flow.h
@@ -6,12 +6,16 @@
 
 #include 
 
-struct rte_flow *cn10k_flow_create(struct rte_eth_dev *dev,
-  const struct rte_flow_attr *attr,
+struct rte_flow *cn10k_flow_create(struct rte_eth_dev *dev, const struct 
rte_flow_attr *attr,
   const struct rte_flow_item pattern[],
   const struct rte_flow_action actions[],
   struct rte_flow_error *error);
 int cn10k_flow_destroy(struct rte_eth_dev *dev, struct rte_flow *flow,
   struct rte_flow_error *error);
 
+int cn10k_flow_info_get(struct rte_eth_dev *dev, struct rte_flow_port_info 
*port_info,
+   struct rte_flow_queue_info *queue_info, struct 
rte_flow_error *err);
+
+#define CN10K_NPC_COUNTERS_MAX 512
+
 #endif /* __CN10K_RTE_FLOW_H__ */
diff --git a/drivers/net/cnxk/cn9k_ethdev.c b/drivers/net/cnxk/cn9k_ethdev.c
index 3b702d9696..f08e986bd1 100644
--- a/drivers/net/cnxk/cn9k_ethdev.c
+++ b/drivers/net/cnxk/cn9k_ethdev.c
@@ -674,6 +674,7 @@ npc_flow_ops_override(void)
/* Update platform specific ops */
cnxk_flow_ops.create = cn9k_flow_create;
cnxk_flow_ops.destroy = cn9k_flow_destroy;
+   cnxk_flow_ops.info_get = cn9k_flow_info_get;
 }
 
 static int
diff --git a/drivers/net/cnxk/cn9k_flow.c b/drivers/net/cnxk/cn9k_flow.c
index 15ccdf8919..a418af185d 100644
--- a/drivers/net/cnxk/cn9k_flow.c
+++ b/drivers/net/cnxk/cn9k_flow.c
@@ -54,3 +54,18 @@ cn9k_flow_destroy(struct rte_eth_dev *eth_dev, struct 
rte_flow *rte_flow,
 
return cnxk_flow_destroy(eth_dev, flow, error);
 }
+
+int
+cn9k_flow_info_get(struct rte_eth_dev *dev, struct rte_flow_port_info 
*port_info,
+  struct rte_flow_queue_info *queue_info, struct 
rte_flow_error *err)
+{
+   RTE_SET_USED(dev);
+   RTE_SET_USED(err);
+
+   memset(port_info, 0, sizeof(*port_info));
+   memset(queue_info, 0, sizeof(*queue_info));
+
+   port_info->max_nb_counters = CN9K_NPC_COUNTERS_MAX;
+
+   return 0;
+}
diff --git a/drivers/net/cnxk/cn9k_flow.h b/drivers/net/cnxk/cn9k_flow.h
index 43d59e1eb2..26f93ea204 100644
--- a/drivers/net/cnxk/cn9k_flow.h
+++ b/drivers/net/cnxk/cn9k_flow.h
@@ -6,12 +6,14 @@
 
 #include 
 
-struct rte_flow *cn9k_flow_create(struct rte_eth_dev *dev,
- const struct rte_flow_attr *attr,
+struct rte_flow *cn9k_flow_create(struct rte_eth_dev *dev, const struct 
rte_flow_attr *attr,
  const struct rte_flow_item pattern[],
  const struct rte_flow_action actions[],
  struct rte_flow_error *error);
-int cn9k_flow_destroy(

Re: [RFC PATCH 2/7] telemetry: add uint type as alias for u64

2022-12-19 Thread Bruce Richardson
On Mon, Dec 19, 2022 at 11:37:19AM +0100, Thomas Monjalon wrote:
> 15/12/2022 14:58, Bruce Richardson:
> > On Thu, Dec 15, 2022 at 02:36:51PM +0100, Thomas Monjalon wrote:
> > > 15/12/2022 10:44, Bruce Richardson:
> > > > On Wed, Dec 14, 2022 at 09:38:45AM -0800, Tyler Retzlaff wrote:
> > > > > On Tue, Dec 13, 2022 at 06:27:25PM +, Bruce Richardson wrote:
> > > > > > For future standardization on the "uint" name for unsigned values 
> > > > > > rather
> > > > > > than the existing "u64" one, we can for now:
> > > > > > * rename all internal values to use uint rather than u64
> > > > > > * add new function names to alias the existing u64 ones
> > > > > > 
> > > > > > Suggested-by: Morten Brørup 
> > > > > > Signed-off-by: Bruce Richardson 
> > > > > 
> > > > > when adding __rte_experimental api i have been asked to add the
> > > > > following boilerplate documentation block. i'm not pushing it, just
> > > > > recalling it is what i get asked for, so in case it's something we do?
> > > > > see lib/eal/include/rte_thread.h as an example
> > > > > 
> > > > > 
> > > > > ```
> > > > >  * @warning
> > > > >  * @b EXPERIMENTAL: this API may change without prior notice.
> > > > > ```
> > > > >
> > > > 
> > > > Ok, thanks for the notice.
> > > > 
> > > > Actually, related to this, since we are adding these functions as 
> > > > aliases
> > > > for existing stable functions, I would like to see these being added 
> > > > not as
> > > > experimental. The reason for that, is that while they are experimental, 
> > > > we
> > > > cannot feasibly mark the old function names as deprecated. :-(
> > > > 
> > > > Adding Thomas and David on CC for their thoughts.
> > > 
> > > Is it related to telemetry?
> > > 
> > > In general, yes we cannot deprecate something if there is no stable 
> > > replacement.
> > > The recommended step is to introduce a new experimental API
> > > and deprecate the old one when the new API is stable.
> > > 
> > Yes, understood.
> > What we are really trying to do here is to rename an API, by process of
> > adding the new API and then marking the old one as deprecated. The small
> > issue is that adding the new one it is by default experimental, meaning we
> > need to wait for deprecating old one. Ideally, as soon as the new API is
> > added, we would like to point people to use that, but can't really do so
> > while it is experimental.
> > 
> > ---
> > 
> > By way of explicit detail, Morten pointed out the inconsistency in the
> > telemetry APIs and types:
> > 
> > * we have add_*_int, which takes a 32-bit signed value
> > * we have add_*_u64 which takes 64-bit unsigned (as name suggests).
> > 
> > The ideal end-state is to always use 64-bit values (since there is no space
> > saving from 32-bit as a union is used), and just name everything as "int"
> > or "uint" for signed/unsigned. The two big steps here are:
> > 
> > * expanding type of the "int" functions to take 64-bit parameters - this is
> >   ABI change but not API one, since existing code will happily promote
> >   values on compile. Therefore, we just use ABI versioning to have a 32-bit
> >   version for older linked binaries.
> > * the rename of the rte_tel_data_add_array_u64 and
> >   rte_tel_data_add_dict_u64 to *_uint variants. Though keeping
> >   compatibility is easier, as we can just add new functions, the overall
> >   process is slower since the new functions technically should be added as
> >   experimental - hence the email. For the case of function renaming, do we
> >   still need to have the "renamed" versions as experimental initially?
> 
> If a function is simply renamed, I think there is no need for the 
> experimental step.
> Would we keep an alias with the old name for some time?
> 
Yes, the old name should go through the deprecation process. No 
hurry with removal.

/Bruce


Re: [RFC] eal: per-thread constructors/destructors

2022-12-19 Thread Mattias Rönnblom
On 2022-12-18 11:01, Morten Brørup wrote:
> This RFC introduces per-thread constructors/destructors:
> * Per-thread constructors are functions called as the first thing from newly 
> created threads. This can be used to initialize variables in thread-local 
> storage, variables depending on the (tid_t) thread id, and to call setup 
> functions that must be called from the thread itself.
> * Per-thread destructors are functions called (from the thread) as the last 
> thing when a thread ends.
> 
> At this time, I am seeking feedback on the concept and the proposed 
> limitations.
> 

This seems like a useful mechanism, that would simplify things for 
libraries and PMDs that have dynamically allocated per-thread resources. 
It's unclear to me, how many such modules there are.

> Processes have __attribute__(constructor/destructor) to set up functions to 
> be called before main(). Nothing similar exists for threads, so we have to 
> design it ourselves.
> 
> The proposed per-thread constructors/destructors should not apply to all 
> threads - only to threads created through the DPDK threads API. Agree?
>  > DPDK has the RTE_INIT()/RTE_FINI() macros for adding process 
constructors/destructors at build time, so I propose a similar API, i.e. 
adding RTE_THREAD_INIT() and RTE_THREAD_FINI() macros to build a list of 
per-thread constructors and destructors at build time.
> 
> Two functions that call the list of per-thread constructors respectively 
> destructors will be introduced.
> 
> The per-thread constructor function will be called from the newly created 
> threads within DPDK:
> 1. EAL/SERVICE threads: In eal_thread_loop() before the loop.
> 2. CTRL threads: In ctrl_thread_init() before start_routine().
> 3. Non-EAL threads: In rte_thread_register().
> 
> Are any thread types missing in the list above?
> 

Maybe all unregistered non-EAL threads should be excluded, including 
such created via rte_ctrl_thread_create().

In that case, you would have:
1. EAL threads (both the main and the worker lcores, and both lcores 
employed as service and non-service lcores).
2. Registered non-EAL threads (created using rte_ctrl_thread_create() or 
any other means, but having in common having called rte_thread_register()).

Another thing which is unclear to me is if libraries/PMDs tend to want 
to differentiate between EAL threads and registered non-EAL threads. EAL 
threads are generally required to be safe from preemption and also 
generally do fast path/packet processing kind of work, while what 
registered non-EAL threads tend to do (and thus what kind of API access 
they require) is more open.

Another thing to consider is how this mechanism should work in regards 
to scaling up/down a DPDK application. When an EAL thread is taken out 
of use, should the finalize function be called? It might complicate 
things, but on the other hand would allow libraries and PMDs to free 
resources no longer required.

> 
> The per-thread destructor function will also be registered by the per-thread 
> constructor, using the POSIX pthread_cleanup_push() function.
> 
> 
> Examples of where per-thread constructors are useful:
> PRNG state initialization [1]: 
> https://protect2.fireeye.com/v1/url?k=31323334-501d5122-313273af-45444731-fc4c610a30810b5d&q=1&e=d8580fcf-a6bb-4e2b-aec4-a772629ece08&u=http%3A%2F%2Finbox.dpdk.org%2Fdev%2F2a5121c7-369f-afde-0898-d45a5b368c3a%40ericsson.com%2F
> PMU event setup [2]: 
> https://protect2.fireeye.com/v1/url?k=31323334-501d5122-313273af-45444731-9d4f6edbd54a1ac0&q=1&e=d8580fcf-a6bb-4e2b-aec4-a772629ece08&u=http%3A%2F%2Finbox.dpdk.org%2Fdev%2Fa059c403-8c6c-79c3-c3e9-a8c1815f5a14%40ericsson.com%2FT%2F%23m3d29fbc301059f007425ac148a4113e66d2ef983
> 
> 
> Please also refer to the discussion [3] about lcore init callbacks for 
> further thoughts. Especially the de-facto inability to establish 
> constructors/destructors at runtime.
> 
> [3]: 
> https://protect2.fireeye.com/v1/url?k=31323334-501d5122-313273af-45444731-94c23fe30fb99087&q=1&e=d8580fcf-a6bb-4e2b-aec4-a772629ece08&u=http%3A%2F%2Finbox.dpdk.org%2Fdev%2F98CBD80474FA8B44BF855DF32C47DC35D875B6%40smartserver.smartshare.dk%2FT%2F%23m23b9a930a4050dc6b0305d3653754bd152c09ab7
> 
> 
> Med venlig hilsen / Kind regards,
> -Morten Brørup



Re: C++20 report error at file rte_spinlock.h

2022-12-19 Thread Tyler Retzlaff
On Tue, Dec 13, 2022 at 06:11:06AM +, Zhou, Xiangyun wrote:
> Dear dpdk dev,
> 
> I'm using dpdk 21.11 LTS, when compile my program with CPP flag "-std=c++20", 
> the compiler report below errors. After checking file rte_spinlock.h, I think 
> the error report by compiler is valid, there should be a potential issue when 
> using functions rte_spinlock_recursive_lock, rte_spinlock_recursive_unlock 
> and rte_spinlock_recursive_trylock in multi-thread, we could either remove 
> "volatile" definition to ask users to handle the multi-thread issue, or using 
> atomic operatings instead of self-increment and self-decrement.
> 
> 
> /home/dpdk/lib/eal/include/generic/rte_spinlock.h:221:12: error: increment of 
> object of volatile-qualified type 'volatile int' is deprecated 
> [-Werror,-Wdeprecated-volatile]
> slr->count++;
>   ^
> /home/dpdk/lib/eal/include/generic/rte_spinlock.h:231:6: error: decrement of 
> object of volatile-qualified type 'volatile int' is deprecated 
> [-Werror,-Wdeprecated-volatile]
> if (--(slr->count) == 0) {
> ^
> /home/dpdk/lib/eal/include/generic/rte_spinlock.h:255:12: error: increment of 
> object of volatile-qualified type 'volatile int' is deprecated 
> [-Werror,-Wdeprecated-volatile]
> slr->count++;
> 

i have work in progress to optionally use standard atomics but in the
meantime the correct thing to do here is to use the gcc builtins that
match the requirements of the c++11 memory model.

the code should be converted to use __atomic_fetch_{add,sub} or
__atomic_{add,sub}_fetch as appropriate.

ty.


RE: C++20 report error at file rte_spinlock.h

2022-12-19 Thread Konstantin Ananyev


> On Tue, Dec 13, 2022 at 06:11:06AM +, Zhou, Xiangyun wrote:
> > Dear dpdk dev,
> >
> > I'm using dpdk 21.11 LTS, when compile my program with CPP flag 
> > "-std=c++20", the compiler report below errors. After checking file
> rte_spinlock.h, I think the error report by compiler is valid, there should 
> be a potential issue when using functions
> rte_spinlock_recursive_lock, rte_spinlock_recursive_unlock and 
> rte_spinlock_recursive_trylock in multi-thread, we could either
> remove "volatile" definition to ask users to handle the multi-thread issue, 
> or using atomic operatings instead of self-increment and
> self-decrement.
> >
> >
> > /home/dpdk/lib/eal/include/generic/rte_spinlock.h:221:12: error: increment 
> > of object of volatile-qualified type 'volatile int' is
> deprecated [-Werror,-Wdeprecated-volatile]
> > slr->count++;
> >   ^
> > /home/dpdk/lib/eal/include/generic/rte_spinlock.h:231:6: error: decrement 
> > of object of volatile-qualified type 'volatile int' is
> deprecated [-Werror,-Wdeprecated-volatile]
> > if (--(slr->count) == 0) {
> > ^
> > /home/dpdk/lib/eal/include/generic/rte_spinlock.h:255:12: error: increment 
> > of object of volatile-qualified type 'volatile int' is
> deprecated [-Werror,-Wdeprecated-volatile]
> > slr->count++;
> >
> 
> i have work in progress to optionally use standard atomics but in the
> meantime the correct thing to do here is to use the gcc builtins that
> match the requirements of the c++11 memory model.
> 
> the code should be converted to use __atomic_fetch_{add,sub} or
> __atomic_{add,sub}_fetch as appropriate.
> 
> ty.

>From looking at the code, I don't think it is necessary:
both 'user' and 'count' supposed to be protected by 'sl'.
In fact, it looks safe just to remove 'volatile' qualifier here.
 



RE: [PATCH v4 4/4] eventdev/timer: change eventdev reconfig logic

2022-12-19 Thread Carrillo, Erik G
Hi Harish,

Adding a couple of comments inline:

> -Original Message-
> From: Naga Harish K, S V 
> Sent: Monday, December 19, 2022 12:29 AM
> To: jer...@marvell.com; Carrillo, Erik G ; Gujjar,
> Abhinandan S 
> Cc: dev@dpdk.org; Jayatheerthan, Jay 
> Subject: [PATCH v4 4/4] eventdev/timer: change eventdev reconfig logic
> 
> When rte_event_timer_adapter_create() is used for creating adapter
> instance, eventdev is reconfigured with additional
> ``rte_event_dev_config::nb_event_ports`` parameter.
> 
> This eventdev reconfig logic is enhanced to increment the
> ``rte_event_dev_config::nb_single_link_event_port_queues``
> parameter if the adapter event port config is of type
> ``RTE_EVENT_PORT_CFG_SINGLE_LINK``.
> 
> With this change the application is no longer need to configure the eventdev
> with ``rte_event_dev_config::nb_single_link_event_port_queues``
> parameter required for timer adapter when the adapter is created using
> above mentioned api.
> 
> Signed-off-by: Naga Harish K S V 
> Acked-by: Abhinandan Gujjar 
> ---
> v2:
> * fix build error in documentation
> v3:
> * update doxygen
> v4:
> * fix programmer guide
> ---
> ---
>  doc/guides/prog_guide/event_timer_adapter.rst | 17 ++
>  lib/eventdev/rte_event_timer_adapter.c| 23 +++
>  lib/eventdev/rte_event_timer_adapter.h| 13 +++
>  3 files changed, 43 insertions(+), 10 deletions(-)
> 
> diff --git a/doc/guides/prog_guide/event_timer_adapter.rst
> b/doc/guides/prog_guide/event_timer_adapter.rst
> index d7307a29bb..b457c879b0 100644
> --- a/doc/guides/prog_guide/event_timer_adapter.rst
> +++ b/doc/guides/prog_guide/event_timer_adapter.rst
> @@ -139,6 +139,23 @@ This function is passed a callback function that will be
> invoked if the  adapter needs to create an event port, giving the application
> the opportunity  to control how it is done.
> 
> +Event device configuration for service based adapter
> +

We can use '^' instead of '~' here to make it a subsection.

> +
> +When rte_event_timer_adapter_create() is used for creating adapter
> +instance, eventdev is reconfigured with additional
> +``rte_event_dev_config::nb_event_ports`` parameter.

How about something along the lines of:

"When rte_event_timer_adapter_create() is used to create an adapter instance, 
``rte_event_dev_config::nb_event_ports`` is automatically incremented, and the 
eventdev is reconfigured with the additional port."

> +This eventdev reconfig logic also increment the

"increments"

> +``rte_event_dev_config::nb_single_link_event_port_queues``
> +parameter if the adapter event port config is of type
> +``RTE_EVENT_PORT_CFG_SINGLE_LINK``.
> +
> +So the application is no longer need to configure the event device with

"application no longer needs"

> +``rte_event_dev_config::nb_event_ports`` and
> +``rte_event_dev_config::nb_single_link_event_port_queues``
> +parameters required for timer adapter when the adapter is created using
> +above mentioned api.
> +
>  Adapter modes
>  ^
>  An event timer adapter can be configured in either periodic or non-periodic
> mode diff --git a/lib/eventdev/rte_event_timer_adapter.c
> b/lib/eventdev/rte_event_timer_adapter.c
> index a0f14bf861..5ed233db00 100644
> --- a/lib/eventdev/rte_event_timer_adapter.c
> +++ b/lib/eventdev/rte_event_timer_adapter.c
> @@ -88,7 +88,20 @@ default_port_conf_cb(uint16_t id, uint8_t
> event_dev_id, uint8_t *event_port_id,
>   rte_event_dev_stop(dev_id);
> 
>   port_id = dev_conf.nb_event_ports;
> + if (conf_arg != NULL)
> + port_conf = conf_arg;
> + else {
> + port_conf = &def_port_conf;
> + ret = rte_event_port_default_conf_get(dev_id, port_id,
> +   port_conf);
> + if (ret < 0)
> + return ret;
> + }
> +
>   dev_conf.nb_event_ports += 1;
> + if (port_conf->event_port_cfg &
> RTE_EVENT_PORT_CFG_SINGLE_LINK)
> + dev_conf.nb_single_link_event_port_queues += 1;
> +
>   ret = rte_event_dev_configure(dev_id, &dev_conf);
>   if (ret < 0) {
>   EVTIM_LOG_ERR("failed to configure event dev %u\n",
> dev_id); @@ -99,16 +112,6 @@ default_port_conf_cb(uint16_t id, uint8_t
> event_dev_id, uint8_t *event_port_id,
>   return ret;
>   }
> 
> - if (conf_arg != NULL)
> - port_conf = conf_arg;
> - else {
> - port_conf = &def_port_conf;
> - ret = rte_event_port_default_conf_get(dev_id, port_id,
> -   port_conf);
> - if (ret < 0)
> - return ret;
> - }
> -
>   ret = rte_event_port_setup(dev_id, port_id, port_conf);
>   if (ret < 0) {
>   EVTIM_LOG_ERR("failed to setup event port %u on event
> dev %u\n", diff --git a/lib/eventdev/rte_event_timer_adapter.h
> b/lib/eventdev/r

Re: [PATCH] net/gve: add support for basic stats

2022-12-19 Thread Rushil Gupta
Hi all
Josh just found out some inconsistencies in the Tx/Rx statistics sum
for all ports. Not sure if we can screenshot here but it goes like
this:
Tx-dropped for port0: 447034
Tx-dropped for port1: 0
Accumulated forward statistics for all ports: 807630

Please note that this issue is only with Tx-dropped (not Tx-packets/Tx-total).


On Wed, Dec 7, 2022 at 8:39 AM Stephen Hemminger
 wrote:
>
> On Wed, 7 Dec 2022 15:09:08 +
> Ferruh Yigit  wrote:
>
> > On 11/24/2022 7:33 AM, Junfeng Guo wrote:
> > > Add support for dev_ops of stats_get and stats_reset.
> > >
> > > Queue stats update will be moved into xstat [1], but the basic stats
> > > items may still be required. So we just keep the remaining ones and
> > > will implement the queue stats via xstats in the coming release.
> > >
> > > [1]
> > > https://elixir.bootlin.com/dpdk/v22.07/ \
> > > source/doc/guides/rel_notes/deprecation.rst#L118
> > >
> > > Signed-off-by: Xiaoyun Li 
> > > Signed-off-by: Junfeng Guo 
> >
> > <...>
> >
> > > +static int
> > > +gve_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
> > > +{
> > > +   uint16_t i;
> > > +
> > > +   for (i = 0; i < dev->data->nb_tx_queues; i++) {
> > > +   struct gve_tx_queue *txq = dev->data->tx_queues[i];
> > > +   if (txq == NULL)
> > > +   continue;
> > > +
> > > +   stats->opackets += txq->packets;
> > > +   stats->obytes += txq->bytes;
> > > +   stats->oerrors += txq->errors;
> >
> > Hi Junfeng, Qi, Jingjing, Beilei,
> >
> > Above logic looks wrong to me, did you test it?
> >
> > If the 'gve_dev_stats_get()' called multiple times (without stats reset
> > in between), same values will be keep added to stats.
> > Some hw based implementations does this, because reading from stats
> > registers automatically reset those registers but this shouldn't be case
> > for this driver.
> >
> > I expect it to be something like:
> >
> > local_stats = 0
> > foreach queue
> >   local_stats += queue->stats
> > stats = local_stats
>
> The zero of local_stats is unnecessary.
> The only caller of the PMD stats_get is rte_ethdev_stats_get
> and it zeros the stats structure before calling the PMD.
>
>
> int
> rte_eth_stats_get(uint16_t port_id, struct rte_eth_stats *stats)
> {
> struct rte_eth_dev *dev;
>
> RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
> dev = &rte_eth_devices[port_id];
>
> memset(stats, 0, sizeof(*stats));
> ...
> stats->rx_nombuf = dev->data->rx_mbuf_alloc_failed;
> return eth_err(port_id, (*dev->dev_ops->stats_get)(dev, stats));
>
> If any PMD has extra memset in their stats get that could be removed.


[PATCH v2] eventdev/timer: add API to get remaining ticks

2022-12-19 Thread Erik Gabriel Carrillo
Introduce an event timer adapter API which allows users to determine how
many adapter ticks remain until an event timer fires.

Signed-off-by: Erik Gabriel Carrillo 
---
v2:
* Rename API to rte_event_timer_get_remaining_ticks
* Assert that API out param is non-NULL instead of checking and returning
  error

 app/test/test_event_timer_adapter.c| 68 ++
 lib/eventdev/event_timer_adapter_pmd.h |  7 +++
 lib/eventdev/rte_event_timer_adapter.c | 52 
 lib/eventdev/rte_event_timer_adapter.h | 27 ++
 lib/eventdev/version.map   |  3 ++
 5 files changed, 157 insertions(+)

diff --git a/app/test/test_event_timer_adapter.c 
b/app/test/test_event_timer_adapter.c
index 1a440dfd10..1a1fb48b24 100644
--- a/app/test/test_event_timer_adapter.c
+++ b/app/test/test_event_timer_adapter.c
@@ -1920,6 +1920,72 @@ adapter_create_max(void)
return TEST_SUCCESS;
 }
 
+static inline int
+test_timer_ticks_remaining(void)
+{
+   uint64_t ticks_remaining = UINT64_MAX;
+   struct rte_event_timer *ev_tim;
+   struct rte_event ev;
+   int ret, i;
+   const struct rte_event_timer tim = {
+   .ev.op = RTE_EVENT_OP_NEW,
+   .ev.queue_id = 0,
+   .ev.sched_type = RTE_SCHED_TYPE_ATOMIC,
+   .ev.priority = RTE_EVENT_DEV_PRIORITY_NORMAL,
+   .ev.event_type =  RTE_EVENT_TYPE_TIMER,
+   .state = RTE_EVENT_TIMER_NOT_ARMED,
+   };
+
+   rte_mempool_get(eventdev_test_mempool, (void **)&ev_tim);
+   *ev_tim = tim;
+   ev_tim->ev.event_ptr = ev_tim;
+#define TEST_TICKS 5
+   ev_tim->timeout_ticks = CALC_TICKS(TEST_TICKS);
+
+   /* Test that unarmed timer returns error */
+   TEST_ASSERT_FAIL(rte_event_timer_get_remaining_ticks(timdev, ev_tim,
+&ticks_remaining),
+"Didn't fail to get ticks for unarmed event timer");
+
+   TEST_ASSERT_EQUAL(rte_event_timer_arm_burst(timdev, &ev_tim, 1), 1,
+ "Failed to arm timer with proper timeout.");
+   TEST_ASSERT_EQUAL(ev_tim->state, RTE_EVENT_TIMER_ARMED,
+ "Improper timer state set expected %d returned %d",
+ RTE_EVENT_TIMER_ARMED, ev_tim->state);
+
+   for (i = 0; i < TEST_TICKS; i++) {
+   ret = rte_event_timer_get_remaining_ticks(timdev, ev_tim,
+ &ticks_remaining);
+   if (ret < 0)
+   return TEST_FAILED;
+
+   TEST_ASSERT_EQUAL((int)ticks_remaining, TEST_TICKS - i,
+ "Expected %d ticks remaining, got %"PRIu64"",
+ TEST_TICKS - i, ticks_remaining);
+
+   rte_delay_ms(100);
+   }
+
+   rte_delay_ms(100);
+
+   TEST_ASSERT_EQUAL(rte_event_dequeue_burst(evdev, 0, &ev, 1, 0), 1,
+ "Armed timer failed to trigger.");
+   TEST_ASSERT_EQUAL(ev_tim->state, RTE_EVENT_TIMER_NOT_ARMED,
+ "Improper timer state set expected %d returned %d",
+ RTE_EVENT_TIMER_NOT_ARMED, ev_tim->state);
+
+   /* Test that timer that fired returns error */
+   TEST_ASSERT_FAIL(rte_event_timer_get_remaining_ticks(timdev, ev_tim,
+  &ticks_remaining),
+"Didn't fail to get ticks for unarmed event timer");
+
+   rte_mempool_put(eventdev_test_mempool, (void *)ev_tim);
+
+#undef TEST_TICKS
+   return TEST_SUCCESS;
+}
+
+
 static struct unit_test_suite event_timer_adptr_functional_testsuite  = {
.suite_name = "event timer functional test suite",
.setup = testsuite_setup,
@@ -1982,6 +2048,8 @@ static struct unit_test_suite 
event_timer_adptr_functional_testsuite  = {
TEST_CASE_ST(timdev_setup_msec, timdev_teardown,
adapter_tick_resolution),
TEST_CASE(adapter_create_max),
+   TEST_CASE_ST(timdev_setup_msec, timdev_teardown,
+   test_timer_ticks_remaining),
TEST_CASES_END() /**< NULL terminate unit test array */
}
 };
diff --git a/lib/eventdev/event_timer_adapter_pmd.h 
b/lib/eventdev/event_timer_adapter_pmd.h
index 189017b5c1..7ba9df463b 100644
--- a/lib/eventdev/event_timer_adapter_pmd.h
+++ b/lib/eventdev/event_timer_adapter_pmd.h
@@ -52,6 +52,11 @@ typedef int (*rte_event_timer_adapter_stats_get_t)(
 typedef int (*rte_event_timer_adapter_stats_reset_t)(
const struct rte_event_timer_adapter *adapter);
 /**< @internal Reset statistics for event timer adapter */
+typedef int (*rte_event_timer_get_remaining_ticks_t)(
+   const struct rte_event_timer_adapter *adapter,
+   const struct rte_event_timer *evtim,
+   uint64_t *ticks_remaining);
+/*

RE: [EXT] [PATCH v2] eventdev/timer: add API to get remaining ticks

2022-12-19 Thread Pavan Nikhilesh Bhagavatula



> -Original Message-
> From: Erik Gabriel Carrillo 
> Sent: Tuesday, December 20, 2022 1:29 AM
> To: Jerin Jacob Kollanukkaran 
> Cc: hof...@lysator.liu.se; s.v.naga.haris...@intel.com;
> jay.jayatheert...@intel.com; Pavan Nikhilesh Bhagavatula
> ; Shijith Thotton ;
> dev@dpdk.org
> Subject: [EXT] [PATCH v2] eventdev/timer: add API to get remaining ticks
> 
> External Email
> 
> --
> Introduce an event timer adapter API which allows users to determine how
> many adapter ticks remain until an event timer fires.
> 
> Signed-off-by: Erik Gabriel Carrillo 
> ---
> v2:
> * Rename API to rte_event_timer_get_remaining_ticks
> * Assert that API out param is non-NULL instead of checking and returning
>   error
> 
>  app/test/test_event_timer_adapter.c| 68
> ++
>  lib/eventdev/event_timer_adapter_pmd.h |  7 +++
>  lib/eventdev/rte_event_timer_adapter.c | 52 
>  lib/eventdev/rte_event_timer_adapter.h | 27 ++
>  lib/eventdev/version.map   |  3 ++
>  5 files changed, 157 insertions(+)
> 
> diff --git a/app/test/test_event_timer_adapter.c
> b/app/test/test_event_timer_adapter.c
> index 1a440dfd10..1a1fb48b24 100644
> --- a/app/test/test_event_timer_adapter.c
> +++ b/app/test/test_event_timer_adapter.c
> @@ -1920,6 +1920,72 @@ adapter_create_max(void)
>   return TEST_SUCCESS;
>  }
> 
> +static inline int
> +test_timer_ticks_remaining(void)
> +{
> + uint64_t ticks_remaining = UINT64_MAX;
> + struct rte_event_timer *ev_tim;
> + struct rte_event ev;
> + int ret, i;
> + const struct rte_event_timer tim = {
> + .ev.op = RTE_EVENT_OP_NEW,
> + .ev.queue_id = 0,
> + .ev.sched_type = RTE_SCHED_TYPE_ATOMIC,
> + .ev.priority = RTE_EVENT_DEV_PRIORITY_NORMAL,
> + .ev.event_type =  RTE_EVENT_TYPE_TIMER,
> + .state = RTE_EVENT_TIMER_NOT_ARMED,
> + };
> +
> + rte_mempool_get(eventdev_test_mempool, (void **)&ev_tim);
> + *ev_tim = tim;
> + ev_tim->ev.event_ptr = ev_tim;
> +#define TEST_TICKS 5
> + ev_tim->timeout_ticks = CALC_TICKS(TEST_TICKS);
> +
> + /* Test that unarmed timer returns error */
> + TEST_ASSERT_FAIL(rte_event_timer_get_remaining_ticks(timdev,
> ev_tim,
> +  &ticks_remaining),
> +  "Didn't fail to get ticks for unarmed event timer");

Please handle ENOSUP case.

> +
> + TEST_ASSERT_EQUAL(rte_event_timer_arm_burst(timdev,
> &ev_tim, 1), 1,
> +   "Failed to arm timer with proper timeout.");
> + TEST_ASSERT_EQUAL(ev_tim->state, RTE_EVENT_TIMER_ARMED,
> +   "Improper timer state set expected %d returned
> %d",
> +   RTE_EVENT_TIMER_ARMED, ev_tim->state);
> +
> + for (i = 0; i < TEST_TICKS; i++) {
> + ret = rte_event_timer_get_remaining_ticks(timdev, ev_tim,
> +   &ticks_remaining);
> + if (ret < 0)
> + return TEST_FAILED;
> +
> + TEST_ASSERT_EQUAL((int)ticks_remaining, TEST_TICKS - i,
> +   "Expected %d ticks remaining, got
> %"PRIu64"",
> +   TEST_TICKS - i, ticks_remaining);
> +
> + rte_delay_ms(100);
> + }
> +
> + rte_delay_ms(100);
> +
> + TEST_ASSERT_EQUAL(rte_event_dequeue_burst(evdev, 0, &ev, 1,
> 0), 1,
> +   "Armed timer failed to trigger.");
> + TEST_ASSERT_EQUAL(ev_tim->state,
> RTE_EVENT_TIMER_NOT_ARMED,
> +   "Improper timer state set expected %d returned
> %d",
> +   RTE_EVENT_TIMER_NOT_ARMED, ev_tim->state);
> +
> + /* Test that timer that fired returns error */
> + TEST_ASSERT_FAIL(rte_event_timer_get_remaining_ticks(timdev,
> ev_tim,
> +&ticks_remaining),
> +  "Didn't fail to get ticks for unarmed event timer");
> +
> + rte_mempool_put(eventdev_test_mempool, (void *)ev_tim);
> +
> +#undef TEST_TICKS
> + return TEST_SUCCESS;
> +}
> +
> +
>  static struct unit_test_suite event_timer_adptr_functional_testsuite  = {
>   .suite_name = "event timer functional test suite",
>   .setup = testsuite_setup,
> @@ -1982,6 +2048,8 @@ static struct unit_test_suite
> event_timer_adptr_functional_testsuite  = {
>   TEST_CASE_ST(timdev_setup_msec, timdev_teardown,
>   adapter_tick_resolution),
>   TEST_CASE(adapter_create_max),
> + TEST_CASE_ST(timdev_setup_msec, timdev_teardown,
> + test_timer_ticks_remaining),
>   TEST_CASES_END() /**< NULL terminate unit test array */
>   }
>  };
> diff --git a/lib/eventdev/event_timer_adapter_pmd.h
> b/lib/eventdev/event_timer_a

[PATCH v3] eventdev/timer: add API to get remaining ticks

2022-12-19 Thread Erik Gabriel Carrillo
Introduce an event timer adapter API which allows users to determine how
many adapter ticks remain until an event timer fires.

Signed-off-by: Erik Gabriel Carrillo 
---
v3:
* Handle ENOTSUP case in unit test

v2:
* Rename API to rte_event_timer_get_remaining_ticks
* Assert that API out param is non-NULL instead of checking and returning
  error

 app/test/test_event_timer_adapter.c| 75 ++
 lib/eventdev/event_timer_adapter_pmd.h |  7 +++
 lib/eventdev/rte_event_timer_adapter.c | 52 ++
 lib/eventdev/rte_event_timer_adapter.h | 27 ++
 lib/eventdev/version.map   |  3 ++
 5 files changed, 164 insertions(+)

diff --git a/app/test/test_event_timer_adapter.c 
b/app/test/test_event_timer_adapter.c
index 1a440dfd10..6241a70597 100644
--- a/app/test/test_event_timer_adapter.c
+++ b/app/test/test_event_timer_adapter.c
@@ -1920,6 +1920,79 @@ adapter_create_max(void)
return TEST_SUCCESS;
 }
 
+static inline int
+test_timer_ticks_remaining(void)
+{
+   uint64_t ticks_remaining = UINT64_MAX;
+   struct rte_event_timer *ev_tim;
+   struct rte_event ev;
+   int ret, i;
+   const struct rte_event_timer tim = {
+   .ev.op = RTE_EVENT_OP_NEW,
+   .ev.queue_id = 0,
+   .ev.sched_type = RTE_SCHED_TYPE_ATOMIC,
+   .ev.priority = RTE_EVENT_DEV_PRIORITY_NORMAL,
+   .ev.event_type =  RTE_EVENT_TYPE_TIMER,
+   .state = RTE_EVENT_TIMER_NOT_ARMED,
+   };
+
+   rte_mempool_get(eventdev_test_mempool, (void **)&ev_tim);
+   *ev_tim = tim;
+   ev_tim->ev.event_ptr = ev_tim;
+#define TEST_TICKS 5
+   ev_tim->timeout_ticks = CALC_TICKS(TEST_TICKS);
+
+   ret = rte_event_timer_get_remaining_ticks(timdev, ev_tim,
+ &ticks_remaining);
+   if (ret == -ENOTSUP) {
+   rte_mempool_put(eventdev_test_mempool, (void *)ev_tim);
+   printf("API not supported, skipping test\n");
+   return TEST_SKIPPED;
+   }
+
+   /* Test that unarmed timer returns error */
+   TEST_ASSERT_FAIL(ret,
+"Didn't fail to get ticks for unarmed event timer");
+
+   TEST_ASSERT_EQUAL(rte_event_timer_arm_burst(timdev, &ev_tim, 1), 1,
+ "Failed to arm timer with proper timeout.");
+   TEST_ASSERT_EQUAL(ev_tim->state, RTE_EVENT_TIMER_ARMED,
+ "Improper timer state set expected %d returned %d",
+ RTE_EVENT_TIMER_ARMED, ev_tim->state);
+
+   for (i = 0; i < TEST_TICKS; i++) {
+   ret = rte_event_timer_get_remaining_ticks(timdev, ev_tim,
+ &ticks_remaining);
+   if (ret < 0)
+   return TEST_FAILED;
+
+   TEST_ASSERT_EQUAL((int)ticks_remaining, TEST_TICKS - i,
+ "Expected %d ticks remaining, got %"PRIu64"",
+ TEST_TICKS - i, ticks_remaining);
+
+   rte_delay_ms(100);
+   }
+
+   rte_delay_ms(100);
+
+   TEST_ASSERT_EQUAL(rte_event_dequeue_burst(evdev, 0, &ev, 1, 0), 1,
+ "Armed timer failed to trigger.");
+   TEST_ASSERT_EQUAL(ev_tim->state, RTE_EVENT_TIMER_NOT_ARMED,
+ "Improper timer state set expected %d returned %d",
+ RTE_EVENT_TIMER_NOT_ARMED, ev_tim->state);
+
+   /* Test that timer that fired returns error */
+   TEST_ASSERT_FAIL(rte_event_timer_get_remaining_ticks(timdev, ev_tim,
+  &ticks_remaining),
+"Didn't fail to get ticks for unarmed event timer");
+
+   rte_mempool_put(eventdev_test_mempool, (void *)ev_tim);
+
+#undef TEST_TICKS
+   return TEST_SUCCESS;
+}
+
+
 static struct unit_test_suite event_timer_adptr_functional_testsuite  = {
.suite_name = "event timer functional test suite",
.setup = testsuite_setup,
@@ -1982,6 +2055,8 @@ static struct unit_test_suite 
event_timer_adptr_functional_testsuite  = {
TEST_CASE_ST(timdev_setup_msec, timdev_teardown,
adapter_tick_resolution),
TEST_CASE(adapter_create_max),
+   TEST_CASE_ST(timdev_setup_msec, timdev_teardown,
+   test_timer_ticks_remaining),
TEST_CASES_END() /**< NULL terminate unit test array */
}
 };
diff --git a/lib/eventdev/event_timer_adapter_pmd.h 
b/lib/eventdev/event_timer_adapter_pmd.h
index 189017b5c1..7ba9df463b 100644
--- a/lib/eventdev/event_timer_adapter_pmd.h
+++ b/lib/eventdev/event_timer_adapter_pmd.h
@@ -52,6 +52,11 @@ typedef int (*rte_event_timer_adapter_stats_get_t)(
 typedef int (*rte_event_timer_adapter_stats_reset_t)(
const struct rte_event_timer_adapter *adapter);
 /**< @internal Re

RE: [EXT] [PATCH v2] eventdev/timer: add API to get remaining ticks

2022-12-19 Thread Carrillo, Erik G
<... snipped ...>

> > diff --git a/app/test/test_event_timer_adapter.c
> > b/app/test/test_event_timer_adapter.c
> > index 1a440dfd10..1a1fb48b24 100644
> > --- a/app/test/test_event_timer_adapter.c
> > +++ b/app/test/test_event_timer_adapter.c
> > @@ -1920,6 +1920,72 @@ adapter_create_max(void)
> > return TEST_SUCCESS;
> >  }
> >
> > +static inline int
> > +test_timer_ticks_remaining(void)
> > +{
> > +   uint64_t ticks_remaining = UINT64_MAX;
> > +   struct rte_event_timer *ev_tim;
> > +   struct rte_event ev;
> > +   int ret, i;
> > +   const struct rte_event_timer tim = {
> > +   .ev.op = RTE_EVENT_OP_NEW,
> > +   .ev.queue_id = 0,
> > +   .ev.sched_type = RTE_SCHED_TYPE_ATOMIC,
> > +   .ev.priority = RTE_EVENT_DEV_PRIORITY_NORMAL,
> > +   .ev.event_type =  RTE_EVENT_TYPE_TIMER,
> > +   .state = RTE_EVENT_TIMER_NOT_ARMED,
> > +   };
> > +
> > +   rte_mempool_get(eventdev_test_mempool, (void **)&ev_tim);
> > +   *ev_tim = tim;
> > +   ev_tim->ev.event_ptr = ev_tim;
> > +#define TEST_TICKS 5
> > +   ev_tim->timeout_ticks = CALC_TICKS(TEST_TICKS);
> > +
> > +   /* Test that unarmed timer returns error */
> > +   TEST_ASSERT_FAIL(rte_event_timer_get_remaining_ticks(timdev,
> > ev_tim,
> > +&ticks_remaining),
> > +"Didn't fail to get ticks for unarmed event timer");
> 
> Please handle ENOSUP case.
> 

Good catch - updated in v3.

Thanks,
Erik

<... snipped ...>


[PATCH] net/iavf: add lock for vf commands

2022-12-19 Thread Mike Pattrick
iavf admin queue commands aren't thread-safe. Bugs surrounding this
issue can manifest in a variety of ways but frequently pend_cmd is
over written. Simultaneously executing commands can result in a
misconfigured device or DPDK sleeping in a thread for 2 second.

Despite this limitation, vf commands may be executed from both
iavf_dev_alarm_handler() in a control thread and the applications main
thread. This is trivial to simulate in the testpmd application by
creating a bond of vf's in active backup mode, and then turning the
bond off and then on again repeatedly.

Previously [1] was proposed as a potential solution, but this commit
has been reverted. I propose adding locks until a more complete
solution is available.

[1] commit cb5c1b91f76f ("net/iavf: add thread for event callbacks")

Fixes: 48de41ca11f0 ("net/avf: enable link status update")
Fixes: 84108425054a ("net/iavf: support asynchronous virtual channel message")
Cc: sta...@dpdk.org

Signed-off-by: Mike Pattrick 
---
 drivers/net/iavf/iavf.h   |  1 +
 drivers/net/iavf/iavf_vchnl.c | 76 +++
 2 files changed, 77 insertions(+)

diff --git a/drivers/net/iavf/iavf.h b/drivers/net/iavf/iavf.h
index 1edebab8dc..aa18650ffa 100644
--- a/drivers/net/iavf/iavf.h
+++ b/drivers/net/iavf/iavf.h
@@ -262,6 +262,7 @@ struct iavf_info {
struct iavf_qv_map *qv_map; /* queue vector mapping */
struct iavf_flow_list flow_list;
rte_spinlock_t flow_ops_lock;
+   rte_spinlock_t aq_lock;
struct iavf_parser_list rss_parser_list;
struct iavf_parser_list dist_parser_list;
struct iavf_parser_list ipsec_crypto_parser_list;
diff --git a/drivers/net/iavf/iavf_vchnl.c b/drivers/net/iavf/iavf_vchnl.c
index f92daf97f2..ca435dee2e 100644
--- a/drivers/net/iavf/iavf_vchnl.c
+++ b/drivers/net/iavf/iavf_vchnl.c
@@ -554,7 +554,9 @@ iavf_enable_vlan_strip(struct iavf_adapter *adapter)
args.in_args_size = 0;
args.out_buffer = vf->aq_resp;
args.out_size = IAVF_AQ_BUF_SZ;
+   rte_spinlock_lock(&vf->aq_lock);
ret = iavf_execute_vf_cmd(adapter, &args, 0);
+   rte_spinlock_unlock(&vf->aq_lock);
if (ret)
PMD_DRV_LOG(ERR, "Failed to execute command of"
" OP_ENABLE_VLAN_STRIPPING");
@@ -575,7 +577,9 @@ iavf_disable_vlan_strip(struct iavf_adapter *adapter)
args.in_args_size = 0;
args.out_buffer = vf->aq_resp;
args.out_size = IAVF_AQ_BUF_SZ;
+   rte_spinlock_lock(&vf->aq_lock);
ret = iavf_execute_vf_cmd(adapter, &args, 0);
+   rte_spinlock_unlock(&vf->aq_lock);
if (ret)
PMD_DRV_LOG(ERR, "Failed to execute command of"
" OP_DISABLE_VLAN_STRIPPING");
@@ -604,7 +608,9 @@ iavf_check_api_version(struct iavf_adapter *adapter)
args.out_buffer = vf->aq_resp;
args.out_size = IAVF_AQ_BUF_SZ;
 
+   rte_spinlock_lock(&vf->aq_lock);
err = iavf_execute_vf_cmd(adapter, &args, 0);
+   rte_spinlock_unlock(&vf->aq_lock);
if (err) {
PMD_INIT_LOG(ERR, "Fail to execute command of OP_VERSION");
return err;
@@ -665,7 +671,9 @@ iavf_get_vf_resource(struct iavf_adapter *adapter)
args.in_args = (uint8_t *)∩︀
args.in_args_size = sizeof(caps);
 
+   rte_spinlock_lock(&vf->aq_lock);
err = iavf_execute_vf_cmd(adapter, &args, 0);
+   rte_spinlock_unlock(&vf->aq_lock);
 
if (err) {
PMD_DRV_LOG(ERR,
@@ -710,7 +718,9 @@ iavf_get_supported_rxdid(struct iavf_adapter *adapter)
args.out_buffer = vf->aq_resp;
args.out_size = IAVF_AQ_BUF_SZ;
 
+   rte_spinlock_lock(&vf->aq_lock);
ret = iavf_execute_vf_cmd(adapter, &args, 0);
+   rte_spinlock_unlock(&vf->aq_lock);
if (ret) {
PMD_DRV_LOG(ERR,
"Failed to execute command of 
OP_GET_SUPPORTED_RXDIDS");
@@ -754,7 +764,9 @@ iavf_config_vlan_strip_v2(struct iavf_adapter *adapter, 
bool enable)
args.in_args_size = sizeof(vlan_strip);
args.out_buffer = vf->aq_resp;
args.out_size = IAVF_AQ_BUF_SZ;
+   rte_spinlock_lock(&vf->aq_lock);
ret = iavf_execute_vf_cmd(adapter, &args, 0);
+   rte_spinlock_unlock(&vf->aq_lock);
if (ret)
PMD_DRV_LOG(ERR, "fail to execute command %s",
enable ? "VIRTCHNL_OP_ENABLE_VLAN_STRIPPING_V2" :
@@ -794,7 +806,9 @@ iavf_config_vlan_insert_v2(struct iavf_adapter *adapter, 
bool enable)
args.in_args_size = sizeof(vlan_insert);
args.out_buffer = vf->aq_resp;
args.out_size = IAVF_AQ_BUF_SZ;
+   rte_spinlock_lock(&vf->aq_lock);
ret = iavf_execute_vf_cmd(adapter, &args, 0);
+   rte_spinlock_unlock(&vf->aq_lock);
if (ret)
PMD_DRV_LOG(ERR, "fail to execute command %s",
enable ? "VIRTCHNL_OP_ENABLE_VLAN_INSERTION_V2" :
@@ -837,

[PATCH 0/3] Async vhost packed ring optimization

2022-12-19 Thread Cheng Jiang
To improve the performance of async vhost packed ring. We remove the
unnecessary data copy in async vhost packed ring. And add the batch
data path in both enqueue data path and dequeue data path.

Cheng Jiang (3):
  vhost: remove redundant copy for packed shadow used ring
  vhost: add batch enqueue in async vhost packed ring
  vhost: add batch dequeue in async vhost packed ring

 lib/vhost/virtio_net.c | 393 +
 1 file changed, 355 insertions(+), 38 deletions(-)

--
2.35.1



[PATCH 1/3] vhost: remove redundant copy for packed shadow used ring

2022-12-19 Thread Cheng Jiang
In the packed ring enqueue data path of the current asynchronous
Vhost design, the shadow used ring is first copied to the sync
shadow used ring, and then it will be moved to the async shadow
used ring for some historical reasons. This is completely unnecessary.
This patch removes redundant copy for the shadow used ring. The async
shadow used ring will be updated directly.

Signed-off-by: Cheng Jiang 
---
 lib/vhost/virtio_net.c | 66 --
 1 file changed, 31 insertions(+), 35 deletions(-)

diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c
index 9abf752f30..7c3ec128a0 100644
--- a/lib/vhost/virtio_net.c
+++ b/lib/vhost/virtio_net.c
@@ -572,6 +572,26 @@ vhost_shadow_enqueue_packed(struct vhost_virtqueue *vq,
}
 }
 
+static __rte_always_inline void
+vhost_async_shadow_enqueue_packed(struct vhost_virtqueue *vq,
+  uint32_t *len,
+  uint16_t *id,
+  uint16_t *count,
+  uint16_t num_buffers)
+{
+   uint16_t i;
+   struct vhost_async *async = vq->async;
+
+   for (i = 0; i < num_buffers; i++) {
+   async->buffers_packed[async->buffer_idx_packed].id  = id[i];
+   async->buffers_packed[async->buffer_idx_packed].len = len[i];
+   async->buffers_packed[async->buffer_idx_packed].count = 
count[i];
+   async->buffer_idx_packed++;
+   if (async->buffer_idx_packed >= vq->size)
+   async->buffer_idx_packed -= vq->size;
+   }
+}
+
 static __rte_always_inline void
 vhost_shadow_enqueue_single_packed(struct virtio_net *dev,
   struct vhost_virtqueue *vq,
@@ -1647,23 +1667,6 @@ store_dma_desc_info_split(struct vring_used_elem 
*s_ring, struct vring_used_elem
}
 }
 
-static __rte_always_inline void
-store_dma_desc_info_packed(struct vring_used_elem_packed *s_ring,
-   struct vring_used_elem_packed *d_ring,
-   uint16_t ring_size, uint16_t s_idx, uint16_t d_idx, uint16_t 
count)
-{
-   size_t elem_size = sizeof(struct vring_used_elem_packed);
-
-   if (d_idx + count <= ring_size) {
-   rte_memcpy(d_ring + d_idx, s_ring + s_idx, count * elem_size);
-   } else {
-   uint16_t size = ring_size - d_idx;
-
-   rte_memcpy(d_ring + d_idx, s_ring + s_idx, size * elem_size);
-   rte_memcpy(d_ring, s_ring + s_idx + size, (count - size) * 
elem_size);
-   }
-}
-
 static __rte_noinline uint32_t
 virtio_dev_rx_async_submit_split(struct virtio_net *dev, struct 
vhost_virtqueue *vq,
struct rte_mbuf **pkts, uint32_t count, int16_t dma_id, uint16_t 
vchan_id)
@@ -1822,7 +1825,8 @@ vhost_enqueue_async_packed(struct virtio_net *dev,
if (unlikely(mbuf_to_desc(dev, vq, pkt, buf_vec, nr_vec, *nr_buffers, 
true) < 0))
return -1;
 
-   vhost_shadow_enqueue_packed(vq, buffer_len, buffer_buf_id, 
buffer_desc_count, *nr_buffers);
+   vhost_async_shadow_enqueue_packed(vq, buffer_len, buffer_buf_id,
+   buffer_desc_count, *nr_buffers);
 
return 0;
 }
@@ -1852,6 +1856,7 @@ dma_error_handler_packed(struct vhost_virtqueue *vq, 
uint16_t slot_idx,
 {
uint16_t descs_err = 0;
uint16_t buffers_err = 0;
+   struct vhost_async *async = vq->async;
struct async_inflight_info *pkts_info = vq->async->pkts_info;
 
*pkt_idx -= nr_err;
@@ -1869,7 +1874,10 @@ dma_error_handler_packed(struct vhost_virtqueue *vq, 
uint16_t slot_idx,
vq->avail_wrap_counter ^= 1;
}
 
-   vq->shadow_used_idx -= buffers_err;
+   if (async->buffer_idx_packed >= buffers_err)
+   async->buffer_idx_packed -= buffers_err;
+   else
+   async->buffer_idx_packed = async->buffer_idx_packed + vq->size 
- buffers_err;
 }
 
 static __rte_noinline uint32_t
@@ -1921,23 +1929,11 @@ virtio_dev_rx_async_submit_packed(struct virtio_net 
*dev, struct vhost_virtqueue
dma_error_handler_packed(vq, slot_idx, pkt_err, &pkt_idx);
}
 
-   if (likely(vq->shadow_used_idx)) {
-   /* keep used descriptors. */
-   store_dma_desc_info_packed(vq->shadow_used_packed, 
async->buffers_packed,
-   vq->size, 0, async->buffer_idx_packed,
-   vq->shadow_used_idx);
-
-   async->buffer_idx_packed += vq->shadow_used_idx;
-   if (async->buffer_idx_packed >= vq->size)
-   async->buffer_idx_packed -= vq->size;
-
-   async->pkts_idx += pkt_idx;
-   if (async->pkts_idx >= vq->size)
-   async->pkts_idx -= vq->size;
+   async->pkts_idx += pkt_idx;
+   if (async->pkts_idx >= vq->size)
+   async->pkts_idx -= vq->size;
 
-   vq->sh

[PATCH 2/3] vhost: add batch enqueue in async vhost packed ring

2022-12-19 Thread Cheng Jiang
Add batch enqueue function in asynchronous vhost packed ring to
improve the performance. Chained mbufs are not supported, it will
be handled in single enqueue function.

Signed-off-by: Cheng Jiang 
---
 lib/vhost/virtio_net.c | 157 +
 1 file changed, 157 insertions(+)

diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c
index 7c3ec128a0..ac8c404327 100644
--- a/lib/vhost/virtio_net.c
+++ b/lib/vhost/virtio_net.c
@@ -432,6 +432,24 @@ vhost_flush_enqueue_batch_packed(struct virtio_net *dev,
vq_inc_last_used_packed(vq, PACKED_BATCH_SIZE);
 }
 
+static __rte_always_inline void
+vhost_async_shadow_enqueue_packed_batch(struct vhost_virtqueue *vq,
+uint64_t *lens,
+uint16_t *ids)
+{
+   uint16_t i;
+   struct vhost_async *async = vq->async;
+
+   vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
+   async->buffers_packed[async->buffer_idx_packed].id  = ids[i];
+   async->buffers_packed[async->buffer_idx_packed].len = lens[i];
+   async->buffers_packed[async->buffer_idx_packed].count = 1;
+   async->buffer_idx_packed++;
+   if (async->buffer_idx_packed >= vq->size)
+   async->buffer_idx_packed -= vq->size;
+   }
+}
+
 static __rte_always_inline void
 vhost_shadow_dequeue_batch_packed_inorder(struct vhost_virtqueue *vq,
  uint16_t id)
@@ -1451,6 +1469,58 @@ virtio_dev_rx_sync_batch_check(struct virtio_net *dev,
return 0;
 }
 
+static __rte_always_inline int
+virtio_dev_rx_async_batch_check(struct vhost_virtqueue *vq,
+  struct rte_mbuf **pkts,
+  uint64_t *desc_addrs,
+  uint64_t *lens,
+  int16_t dma_id,
+  uint16_t vchan_id)
+{
+   bool wrap_counter = vq->avail_wrap_counter;
+   struct vring_packed_desc *descs = vq->desc_packed;
+   uint16_t avail_idx = vq->last_avail_idx;
+   uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf);
+   uint16_t i;
+
+   if (unlikely(avail_idx & PACKED_BATCH_MASK))
+   return -1;
+
+   if (unlikely((avail_idx + PACKED_BATCH_SIZE) > vq->size))
+   return -1;
+
+   vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
+   if (unlikely(pkts[i]->next != NULL))
+   return -1;
+   if (unlikely(!desc_is_avail(&descs[avail_idx + i],
+   wrap_counter)))
+   return -1;
+   }
+
+   vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
+   lens[i] = descs[avail_idx + i].len;
+
+   vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
+   if (unlikely(pkts[i]->pkt_len > (lens[i] - buf_offset)))
+   return -1;
+   }
+
+   vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
+   desc_addrs[i] =  descs[avail_idx + i].addr;
+
+   vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
+   if (unlikely(!desc_addrs[i]))
+   return -1;
+   if (unlikely(lens[i] != descs[avail_idx + i].len))
+   return -1;
+   }
+
+   if (rte_dma_burst_capacity(dma_id, vchan_id) < PACKED_BATCH_SIZE)
+   return -1;
+
+   return 0;
+}
+
 static __rte_always_inline void
 virtio_dev_rx_batch_packed_copy(struct virtio_net *dev,
   struct vhost_virtqueue *vq,
@@ -1850,6 +1920,78 @@ virtio_dev_rx_async_packed(struct virtio_net *dev, 
struct vhost_virtqueue *vq,
return 0;
 }
 
+static __rte_always_inline void
+virtio_dev_rx_async_packed_batch_enqueue(struct virtio_net *dev,
+  struct vhost_virtqueue *vq,
+  struct rte_mbuf **pkts,
+  uint64_t *desc_addrs,
+  uint64_t *lens)
+{
+   uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf);
+   struct virtio_net_hdr_mrg_rxbuf *hdrs[PACKED_BATCH_SIZE];
+   struct vring_packed_desc *descs = vq->desc_packed;
+   struct vhost_async *async = vq->async;
+   uint16_t avail_idx = vq->last_avail_idx;
+   uint32_t mbuf_offset = 0;
+   uint16_t ids[PACKED_BATCH_SIZE];
+   uint64_t mapped_len[PACKED_BATCH_SIZE];
+   void *host_iova[PACKED_BATCH_SIZE];
+   uintptr_t desc;
+   uint16_t i;
+
+   vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
+   rte_prefetch0((void *)(uintptr_t)desc_addrs[i]);
+   desc = vhost_iova_to_vva(dev, vq, desc_addrs[i], &lens[i], 
VHOST_ACCESS_RW);
+   hdrs[i] = (struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)desc;
+   lens[i] = pkts[i]->pkt_len +
+   sizeof(struct virtio_net_hdr_mrg_rxbuf);
+   }
+
+   vhost_for_

[PATCH 3/3] vhost: add batch dequeue in async vhost packed ring

2022-12-19 Thread Cheng Jiang
Add batch dequeue function in asynchronous vhost packed ring to
improve the performance. Chained mbufs are not supported, it will
be handled in single dequeue function.

Signed-off-by: Cheng Jiang 
Signed-off-by: Yuan Wang 
---
 lib/vhost/virtio_net.c | 170 -
 1 file changed, 167 insertions(+), 3 deletions(-)

diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c
index ac8c404327..9cd69fc7bf 100644
--- a/lib/vhost/virtio_net.c
+++ b/lib/vhost/virtio_net.c
@@ -450,6 +450,23 @@ vhost_async_shadow_enqueue_packed_batch(struct 
vhost_virtqueue *vq,
}
 }
 
+static __rte_always_inline void
+vhost_async_shadow_dequeue_packed_batch(struct vhost_virtqueue *vq, uint16_t 
*ids)
+{
+   uint16_t i;
+   struct vhost_async *async = vq->async;
+
+   vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
+   async->buffers_packed[async->buffer_idx_packed].id  = ids[i];
+   async->buffers_packed[async->buffer_idx_packed].len = 0;
+   async->buffers_packed[async->buffer_idx_packed].count = 1;
+
+   async->buffer_idx_packed++;
+   if (async->buffer_idx_packed >= vq->size)
+   async->buffer_idx_packed -= vq->size;
+   }
+}
+
 static __rte_always_inline void
 vhost_shadow_dequeue_batch_packed_inorder(struct vhost_virtqueue *vq,
  uint16_t id)
@@ -3193,6 +3210,80 @@ vhost_reserve_avail_batch_packed(struct virtio_net *dev,
return -1;
 }
 
+static __rte_always_inline int
+vhost_async_tx_batch_packed_check(struct virtio_net *dev,
+struct vhost_virtqueue *vq,
+struct rte_mbuf **pkts,
+uint16_t avail_idx,
+uintptr_t *desc_addrs,
+uint64_t *lens,
+uint16_t *ids,
+int16_t dma_id,
+uint16_t vchan_id)
+{
+   bool wrap = vq->avail_wrap_counter;
+   struct vring_packed_desc *descs = vq->desc_packed;
+   uint64_t buf_lens[PACKED_BATCH_SIZE];
+   uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf);
+   uint16_t flags, i;
+
+   if (unlikely(avail_idx & PACKED_BATCH_MASK))
+   return -1;
+   if (unlikely((avail_idx + PACKED_BATCH_SIZE) > vq->size))
+   return -1;
+
+   vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
+   flags = descs[avail_idx + i].flags;
+   if (unlikely((wrap != !!(flags & VRING_DESC_F_AVAIL)) ||
+(wrap == !!(flags & VRING_DESC_F_USED))  ||
+(flags & PACKED_DESC_SINGLE_DEQUEUE_FLAG)))
+   return -1;
+   }
+
+   rte_atomic_thread_fence(__ATOMIC_ACQUIRE);
+
+   vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
+   lens[i] = descs[avail_idx + i].len;
+
+   vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
+   desc_addrs[i] = descs[avail_idx + i].addr;
+   }
+
+   vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
+   if (unlikely(!desc_addrs[i]))
+   return -1;
+   if (unlikely((lens[i] != descs[avail_idx + i].len)))
+   return -1;
+   }
+
+   vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
+   if (virtio_dev_pktmbuf_prep(dev, pkts[i], lens[i]))
+   goto err;
+   }
+
+   vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
+   buf_lens[i] = pkts[i]->buf_len - pkts[i]->data_off;
+
+   vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
+   if (unlikely(buf_lens[i] < (lens[i] - buf_offset)))
+   goto err;
+   }
+
+   vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
+   pkts[i]->pkt_len = lens[i] - buf_offset;
+   pkts[i]->data_len = pkts[i]->pkt_len;
+   ids[i] = descs[avail_idx + i].id;
+   }
+
+   if (rte_dma_burst_capacity(dma_id, vchan_id) < PACKED_BATCH_SIZE)
+   return -1;
+
+   return 0;
+
+err:
+   return -1;
+}
+
 static __rte_always_inline int
 virtio_dev_tx_batch_packed(struct virtio_net *dev,
   struct vhost_virtqueue *vq,
@@ -3769,16 +3860,74 @@ virtio_dev_tx_async_single_packed(struct virtio_net 
*dev,
return err;
 }
 
+static __rte_always_inline int
+virtio_dev_tx_async_packed_batch(struct virtio_net *dev,
+  struct vhost_virtqueue *vq,
+  struct rte_mbuf **pkts, uint16_t slot_idx,
+  uint16_t dma_id, uint16_t vchan_id)
+{
+   uint16_t avail_idx = vq->last_avail_idx;
+   uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf);
+   struct vhost_async *async = vq->async;
+   struct async_inflig

[PATCH] app/dma-perf: introduce dma-perf application

2022-12-19 Thread Cheng Jiang
There are many high-performance DMA devices supported in DPDK now, and
these DMA devices can also be integrated into other modules of DPDK as
accelerators, such as Vhost. Before integrating DMA into applications,
developers need to know the performance of these DMA devices in various
scenarios and the performance of CPUs in the same scenario, such as
different buffer lengths. Only in this way can we know the target
performance of the application accelerated by using them. This patch
introduces a high-performance testing tool, which supports comparing the
performance of CPU and DMA in different scenarios automatically with a
pre-set config file. Memory Copy performance test are supported for now.

Signed-off-by: Cheng Jiang 
Signed-off-by: Jiayu Hu 
Signed-off-by: Yuan Wang 
Acked-by: Morten Brørup 
---
 app/meson.build   |   1 +
 app/test-dma-perf/benchmark.c | 539 ++
 app/test-dma-perf/benchmark.h |  12 +
 app/test-dma-perf/config.ini  |  61 
 app/test-dma-perf/main.c  | 419 ++
 app/test-dma-perf/main.h  |  51 
 app/test-dma-perf/meson.build |  16 +
 7 files changed, 1099 insertions(+)
 create mode 100644 app/test-dma-perf/benchmark.c
 create mode 100644 app/test-dma-perf/benchmark.h
 create mode 100644 app/test-dma-perf/config.ini
 create mode 100644 app/test-dma-perf/main.c
 create mode 100644 app/test-dma-perf/main.h
 create mode 100644 app/test-dma-perf/meson.build

diff --git a/app/meson.build b/app/meson.build
index e32ea4bd5c..a060ad2725 100644
--- a/app/meson.build
+++ b/app/meson.build
@@ -28,6 +28,7 @@ apps = [
 'test-regex',
 'test-sad',
 'test-security-perf',
+'test-dma-perf',
 ]
 
 default_cflags = machine_args + ['-DALLOW_EXPERIMENTAL_API']
diff --git a/app/test-dma-perf/benchmark.c b/app/test-dma-perf/benchmark.c
new file mode 100644
index 00..f6f7cc9ed3
--- /dev/null
+++ b/app/test-dma-perf/benchmark.c
@@ -0,0 +1,539 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Intel Corporation
+ */
+
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "main.h"
+#include "benchmark.h"
+
+
+#define MAX_DMA_CPL_NB 255
+
+#define CSV_LINE_DMA_FMT "Scenario %u,%u,%u,%u,%u,%u,%" PRIu64 ",%.3lf,%" 
PRIu64 "\n"
+#define CSV_LINE_CPU_FMT "Scenario %u,%u,NA,%u,%u,%u,%" PRIu64 ",%.3lf,%" 
PRIu64 "\n"
+
+struct lcore_params {
+   uint16_t dev_id;
+   uint32_t nr_buf;
+   uint16_t kick_batch;
+   uint32_t buf_size;
+   uint32_t repeat_times;
+   uint16_t mpool_iter_step;
+   struct rte_mbuf **srcs;
+   struct rte_mbuf **dsts;
+   uint8_t scenario_id;
+};
+
+struct buf_info {
+   struct rte_mbuf **array;
+   uint32_t nr_buf;
+   uint32_t buf_size;
+};
+
+static struct rte_mempool *src_pool;
+static struct rte_mempool *dst_pool;
+
+uint16_t dmadev_ids[MAX_WORKER_NB];
+uint32_t nb_dmadevs;
+
+extern char output_str[MAX_WORKER_NB][MAX_OUTPUT_STR_LEN];
+
+#define PRINT_ERR(...) print_err(__func__, __LINE__, __VA_ARGS__)
+
+static inline int
+__rte_format_printf(3, 4)
+print_err(const char *func, int lineno, const char *format, ...)
+{
+   va_list ap;
+   int ret;
+
+   ret = fprintf(stderr, "In %s:%d - ", func, lineno);
+   va_start(ap, format);
+   ret += vfprintf(stderr, format, ap);
+   va_end(ap);
+
+   return ret;
+}
+
+static inline void
+calc_result(struct lcore_params *p, uint64_t cp_cycle_sum, double time_sec,
+   uint32_t repeat_times, uint32_t *memory, uint64_t 
*ave_cycle,
+   float *bandwidth, uint64_t *ops)
+{
+   *memory = (p->buf_size * p->nr_buf * 2) / (1024 * 1024);
+   *ave_cycle = cp_cycle_sum / (p->repeat_times * p->nr_buf);
+   *bandwidth = p->buf_size * 8 * rte_get_timer_hz() / (*ave_cycle * 1000 
* 1000 * 1000.0);
+   *ops = (double)p->nr_buf * repeat_times / time_sec;
+}
+
+static void
+output_result(uint8_t scenario_id, uint32_t lcore_id, uint16_t dev_id, 
uint64_t ave_cycle,
+   uint32_t buf_size, uint32_t nr_buf, uint32_t memory,
+   float bandwidth, uint64_t ops, bool is_dma)
+{
+   if (is_dma)
+   printf("lcore %u, DMA %u:\n"
+   "average cycles: %" PRIu64 ","
+   " buffer size: %u, nr_buf: %u,"
+   " memory: %uMB, frequency: %" PRIu64 ".\n",
+   lcore_id,
+   dev_id,
+   ave_cycle,
+   buf_size,
+   nr_buf,
+   memory,
+   rte_get_timer_hz());
+   else
+   printf("lcore %u\n"
+   "average cycles: %" PRIu64 ","
+   " buffer size: %u, nr_buf: %u,"
+   " memory: %uMB, frequency: %" PRIu64 "

[PATCH 0/3] net/igc: support PTP timesync

2022-12-19 Thread Simei Su
[PATCH 1/3] code refactoring.
[PATCH 2/3] add related definitions for ptp timesync.
[PATCH 3/3] add IEEE1588 API to support timesync.

Simei Su (3):
  net/igc: code refactoring
  net/igc/base: support PTP timesync
  net/igc: support IEEE 1588 PTP

 drivers/net/igc/base/igc_defines.h |  11 ++
 drivers/net/igc/igc_ethdev.c   | 222 +
 drivers/net/igc/igc_ethdev.h   |   4 +-
 drivers/net/igc/igc_txrx.c | 166 ---
 drivers/net/igc/igc_txrx.h | 116 +++
 5 files changed, 397 insertions(+), 122 deletions(-)

-- 
2.9.5



[PATCH 1/3] net/igc: code refactoring

2022-12-19 Thread Simei Su
Move related structures for Rx/Tx queue from igc_txrx.c to igc_txrx.h
to make code cleaner and variables used more conveniently.

Signed-off-by: Simei Su 
---
 drivers/net/igc/igc_txrx.c | 118 -
 drivers/net/igc/igc_txrx.h | 115 +++
 2 files changed, 115 insertions(+), 118 deletions(-)

diff --git a/drivers/net/igc/igc_txrx.c b/drivers/net/igc/igc_txrx.c
index ffd219b..c462e91 100644
--- a/drivers/net/igc/igc_txrx.c
+++ b/drivers/net/igc/igc_txrx.c
@@ -93,124 +93,6 @@
 
 #define IGC_TX_OFFLOAD_NOTSUP_MASK (RTE_MBUF_F_TX_OFFLOAD_MASK ^ 
IGC_TX_OFFLOAD_MASK)
 
-/**
- * Structure associated with each descriptor of the RX ring of a RX queue.
- */
-struct igc_rx_entry {
-   struct rte_mbuf *mbuf; /**< mbuf associated with RX descriptor. */
-};
-
-/**
- * Structure associated with each RX queue.
- */
-struct igc_rx_queue {
-   struct rte_mempool  *mb_pool;   /**< mbuf pool to populate RX ring. */
-   volatile union igc_adv_rx_desc *rx_ring;
-   /**< RX ring virtual address. */
-   uint64_trx_ring_phys_addr; /**< RX ring DMA address. */
-   volatile uint32_t   *rdt_reg_addr; /**< RDT register address. */
-   volatile uint32_t   *rdh_reg_addr; /**< RDH register address. */
-   struct igc_rx_entry *sw_ring;   /**< address of RX software ring. */
-   struct rte_mbuf *pkt_first_seg; /**< First segment of current packet. */
-   struct rte_mbuf *pkt_last_seg;  /**< Last segment of current packet. */
-   uint16_tnb_rx_desc; /**< number of RX descriptors. */
-   uint16_trx_tail;/**< current value of RDT register. */
-   uint16_tnb_rx_hold; /**< number of held free RX desc. */
-   uint16_trx_free_thresh; /**< max free RX desc to hold. */
-   uint16_tqueue_id;   /**< RX queue index. */
-   uint16_treg_idx;/**< RX queue register index. */
-   uint16_tport_id;/**< Device port identifier. */
-   uint8_t pthresh;/**< Prefetch threshold register. */
-   uint8_t hthresh;/**< Host threshold register. */
-   uint8_t wthresh;/**< Write-back threshold register. */
-   uint8_t crc_len;/**< 0 if CRC stripped, 4 otherwise. */
-   uint8_t drop_en;/**< If not 0, set SRRCTL.Drop_En. */
-   uint32_tflags;  /**< RX flags. */
-   uint64_toffloads;   /**< offloads of RTE_ETH_RX_OFFLOAD_* */
-};
-
-/** Offload features */
-union igc_tx_offload {
-   uint64_t data;
-   struct {
-   uint64_t l3_len:9; /**< L3 (IP) Header Length. */
-   uint64_t l2_len:7; /**< L2 (MAC) Header Length. */
-   uint64_t vlan_tci:16;
-   /**< VLAN Tag Control Identifier(CPU order). */
-   uint64_t l4_len:8; /**< L4 (TCP/UDP) Header Length. */
-   uint64_t tso_segsz:16; /**< TCP TSO segment size. */
-   /* uint64_t unused:8; */
-   };
-};
-
-/*
- * Compare mask for igc_tx_offload.data,
- * should be in sync with igc_tx_offload layout.
- */
-#define TX_MACIP_LEN_CMP_MASK  0xULL /**< L2L3 header mask. */
-#define TX_VLAN_CMP_MASK   0xULL /**< Vlan mask. */
-#define TX_TCP_LEN_CMP_MASK0x00FFULL /**< TCP header mask. */
-#define TX_TSO_MSS_CMP_MASK0x0000ULL /**< TSO segsz mask. */
-/** Mac + IP + TCP + Mss mask. */
-#define TX_TSO_CMP_MASK\
-   (TX_MACIP_LEN_CMP_MASK | TX_TCP_LEN_CMP_MASK | TX_TSO_MSS_CMP_MASK)
-
-/**
- * Structure to check if new context need be built
- */
-struct igc_advctx_info {
-   uint64_t flags;   /**< ol_flags related to context build. */
-   /** tx offload: vlan, tso, l2-l3-l4 lengths. */
-   union igc_tx_offload tx_offload;
-   /** compare mask for tx offload. */
-   union igc_tx_offload tx_offload_mask;
-};
-
-/**
- * Hardware context number
- */
-enum {
-   IGC_CTX_0= 0, /**< CTX0*/
-   IGC_CTX_1= 1, /**< CTX1*/
-   IGC_CTX_NUM  = 2, /**< CTX_NUM */
-};
-
-/**
- * Structure associated with each descriptor of the TX ring of a TX queue.
- */
-struct igc_tx_entry {
-   struct rte_mbuf *mbuf; /**< mbuf associated with TX desc, if any. */
-   uint16_t next_id; /**< Index of next descriptor in ring. */
-   uint16_t last_id; /**< Index of last scattered descriptor. */
-};
-
-/**
- * Structure associated with each TX queue.
- */
-struct igc_tx_queue {
-   volatile union igc_adv_tx_desc *tx_ring; /**< TX ring address */
-   uint64_t   tx_ring_phys_addr; /**< TX ring DMA address. */
-   struct igc_tx_entry*sw_ring; /**< virtual address of SW ring. */
-   volatile uint32_t  *tdt_reg_addr; /**< Address of TDT register. */
-   uint32_t   txd_type;  /**< Device-specific TXD t

[PATCH 2/3] net/igc/base: support PTP timesync

2022-12-19 Thread Simei Su
Add definitions for timesync enabling.

Signed-off-by: Simei Su 
---
 drivers/net/igc/base/igc_defines.h | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/drivers/net/igc/base/igc_defines.h 
b/drivers/net/igc/base/igc_defines.h
index 61964bc..dd7330a 100644
--- a/drivers/net/igc/base/igc_defines.h
+++ b/drivers/net/igc/base/igc_defines.h
@@ -795,6 +795,17 @@
 
 #define TSYNC_INTERRUPTS   TSINTR_TXTS
 
+/* Split Replication Receive Control */
+#define IGC_SRRCTL_TIMESTAMP   0x4000
+#define IGC_SRRCTL_TIMER1SEL(timer)(((timer) & 0x3) << 14)
+#define IGC_SRRCTL_TIMER0SEL(timer)(((timer) & 0x3) << 17)
+
+/* Sample RX tstamp in PHY sop */
+#define IGC_TSYNCRXCTL_RXSYNSIG 0x0400
+
+/* Sample TX tstamp in PHY sop */
+#define IGC_TSYNCTXCTL_TXSYNSIG 0x0020
+
 /* TSAUXC Configuration Bits */
 #define TSAUXC_EN_TT0  (1 << 0)  /* Enable target time 0. */
 #define TSAUXC_EN_TT1  (1 << 1)  /* Enable target time 1. */
-- 
2.9.5



[PATCH 3/3] net/igc: support IEEE 1588 PTP

2022-12-19 Thread Simei Su
Add igc support for new ethdev APIs to enable/disable and read/write/adjust
IEEE1588 PTP timestamps.

The example command for running ptpclient is as below:
./build/examples/dpdk-ptpclient -c 1 -n 3 -- -T 0 -p 0x1

Signed-off-by: Simei Su 
---
 drivers/net/igc/igc_ethdev.c | 222 +++
 drivers/net/igc/igc_ethdev.h |   4 +-
 drivers/net/igc/igc_txrx.c   |  50 +-
 drivers/net/igc/igc_txrx.h   |   1 +
 4 files changed, 272 insertions(+), 5 deletions(-)

diff --git a/drivers/net/igc/igc_ethdev.c b/drivers/net/igc/igc_ethdev.c
index dcd262f..ef3346b 100644
--- a/drivers/net/igc/igc_ethdev.c
+++ b/drivers/net/igc/igc_ethdev.c
@@ -78,6 +78,16 @@
 #define IGC_ALARM_INTERVAL 800u
 /* us, about 13.6s some per-queue registers will wrap around back to 0. */
 
+/* Transmit and receive latency (for PTP timestamps) */
+#define IGC_I225_TX_LATENCY_10 240
+#define IGC_I225_TX_LATENCY_10058
+#define IGC_I225_TX_LATENCY_1000   80
+#define IGC_I225_TX_LATENCY_2500   1325
+#define IGC_I225_RX_LATENCY_10 6450
+#define IGC_I225_RX_LATENCY_100185
+#define IGC_I225_RX_LATENCY_1000   300
+#define IGC_I225_RX_LATENCY_2500   1485
+
 static const struct rte_eth_desc_lim rx_desc_lim = {
.nb_max = IGC_MAX_RXD,
.nb_min = IGC_MIN_RXD,
@@ -245,6 +255,18 @@ eth_igc_vlan_filter_set(struct rte_eth_dev *dev, uint16_t 
vlan_id, int on);
 static int eth_igc_vlan_offload_set(struct rte_eth_dev *dev, int mask);
 static int eth_igc_vlan_tpid_set(struct rte_eth_dev *dev,
  enum rte_vlan_type vlan_type, uint16_t tpid);
+static int eth_igc_timesync_enable(struct rte_eth_dev *dev);
+static int eth_igc_timesync_disable(struct rte_eth_dev *dev);
+static int eth_igc_timesync_read_rx_timestamp(struct rte_eth_dev *dev,
+ struct timespec *timestamp,
+ uint32_t flags);
+static int eth_igc_timesync_read_tx_timestamp(struct rte_eth_dev *dev,
+ struct timespec *timestamp);
+static int eth_igc_timesync_adjust_time(struct rte_eth_dev *dev, int64_t 
delta);
+static int eth_igc_timesync_read_time(struct rte_eth_dev *dev,
+ struct timespec *timestamp);
+static int eth_igc_timesync_write_time(struct rte_eth_dev *dev,
+  const struct timespec *timestamp);
 
 static const struct eth_dev_ops eth_igc_ops = {
.dev_configure  = eth_igc_configure,
@@ -298,6 +320,13 @@ static const struct eth_dev_ops eth_igc_ops = {
.vlan_tpid_set  = eth_igc_vlan_tpid_set,
.vlan_strip_queue_set   = eth_igc_vlan_strip_queue_set,
.flow_ops_get   = eth_igc_flow_ops_get,
+   .timesync_enable= eth_igc_timesync_enable,
+   .timesync_disable   = eth_igc_timesync_disable,
+   .timesync_read_rx_timestamp = eth_igc_timesync_read_rx_timestamp,
+   .timesync_read_tx_timestamp = eth_igc_timesync_read_tx_timestamp,
+   .timesync_adjust_time   = eth_igc_timesync_adjust_time,
+   .timesync_read_time = eth_igc_timesync_read_time,
+   .timesync_write_time= eth_igc_timesync_write_time,
 };
 
 /*
@@ -2582,6 +2611,199 @@ eth_igc_vlan_tpid_set(struct rte_eth_dev *dev,
 }
 
 static int
+eth_igc_timesync_enable(struct rte_eth_dev *dev)
+{
+   struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
+   struct timespec system_time;
+   struct igc_rx_queue *rxq;
+   uint32_t val;
+   uint16_t i;
+
+   IGC_WRITE_REG(hw, IGC_TSAUXC, 0x0);
+
+   clock_gettime(CLOCK_REALTIME, &system_time);
+   IGC_WRITE_REG(hw, IGC_SYSTIML, system_time.tv_nsec);
+   IGC_WRITE_REG(hw, IGC_SYSTIMH, system_time.tv_sec);
+
+   /* Enable timestamping of received PTP packets. */
+   val = IGC_READ_REG(hw, IGC_RXPBS);
+   val |= IGC_RXPBS_CFG_TS_EN;
+   IGC_WRITE_REG(hw, IGC_RXPBS, val);
+
+   for (i = 0; i < dev->data->nb_rx_queues; i++) {
+   val = IGC_READ_REG(hw, IGC_SRRCTL(i));
+   /* For now, only support retrieving Rx timestamp from timer0. */
+   val |= IGC_SRRCTL_TIMER1SEL(0) | IGC_SRRCTL_TIMER0SEL(0) |
+  IGC_SRRCTL_TIMESTAMP;
+   IGC_WRITE_REG(hw, IGC_SRRCTL(i), val);
+   }
+
+   val = IGC_TSYNCRXCTL_ENABLED | IGC_TSYNCRXCTL_TYPE_ALL |
+ IGC_TSYNCRXCTL_RXSYNSIG;
+   IGC_WRITE_REG(hw, IGC_TSYNCRXCTL, val);
+
+   /* Enable Timestamping of transmitted PTP packets. */
+   IGC_WRITE_REG(hw, IGC_TSYNCTXCTL, IGC_TSYNCTXCTL_ENABLED |
+ IGC_TSYNCTXCTL_TXSYNSIG);
+
+   /* Read TXSTMP registers to discard any timestamp previously stored. */
+   IGC_READ_REG(hw, IGC_TXSTMPL);
+   IGC_READ_REG(hw, IGC_TXSTMPH);
+
+   for (i = 0; i < dev->data->nb_rx_queues; i++) {
+   rxq = dev->data->rx_queues[i];
+   rxq->offlo

[PATCH 0/2] net/igc: support Tx timestamp offload

2022-12-19 Thread Simei Su
[PATCH 1/2] add related definitions to support Tx timestamp offload.
[PATCH 2/2] enable Tx timestamp offload by "Launch Time".

Simei Su (2):
  net/igc/base: support Tx timestamp offload
  net/igc: enable Tx timestamp offload

 drivers/net/igc/base/igc_defines.h |  9 +
 drivers/net/igc/base/igc_regs.h|  8 +
 drivers/net/igc/igc_ethdev.c   | 70 ++
 drivers/net/igc/igc_ethdev.h   |  6 +++-
 drivers/net/igc/igc_txrx.c | 58 ++-
 drivers/net/igc/igc_txrx.h |  3 ++
 6 files changed, 145 insertions(+), 9 deletions(-)

-- 
2.9.5



[PATCH 1/2] net/igc/base: support Tx timestamp offload

2022-12-19 Thread Simei Su
Add definitions for Tx timestamp offload
"RTE_ETH_TX_OFFLOAD_SEND_ON_TIMESTAMP".

Signed-off-by: Simei Su 
---
 drivers/net/igc/base/igc_defines.h | 9 +
 drivers/net/igc/base/igc_regs.h| 8 
 2 files changed, 17 insertions(+)

diff --git a/drivers/net/igc/base/igc_defines.h 
b/drivers/net/igc/base/igc_defines.h
index dd7330a..280570b 100644
--- a/drivers/net/igc/base/igc_defines.h
+++ b/drivers/net/igc/base/igc_defines.h
@@ -188,6 +188,15 @@
 #define IGC_RCTL_BSEX  0x0200 /* Buffer size extension */
 #define IGC_RCTL_SECRC 0x0400 /* Strip Ethernet CRC */
 
+#define IGC_DTXMXPKTSZ_TSN 0x19 /* 1600 bytes of max TX DMA packet size */
+#define IGC_TXPBSIZE_TSN   0x04145145 /* 5k bytes buffer for each queue */
+
+/* Transmit Scheduling */
+#define IGC_TQAVCTRL_TRANSMIT_MODE_TSN 0x0001
+#define IGC_TQAVCTRL_ENHANCED_QAV  0x0008
+
+#define IGC_TXQCTL_QUEUE_MODE_LAUNCHT  0x0001
+
 /* Use byte values for the following shift parameters
  * Usage:
  * psrctl |= (((ROUNDUP(value0, 128) >> IGC_PSRCTL_BSIZE0_SHIFT) &
diff --git a/drivers/net/igc/base/igc_regs.h b/drivers/net/igc/base/igc_regs.h
index d424387..e423814 100644
--- a/drivers/net/igc/base/igc_regs.h
+++ b/drivers/net/igc/base/igc_regs.h
@@ -602,6 +602,14 @@
 #define IGC_RXMTRL 0x0B634 /* Time sync Rx EtherType and Msg Type - RW */
 #define IGC_RXUDP  0x0B638 /* Time Sync Rx UDP Port - RW */
 
+#define IGC_QBVCYCLET  0x331C
+#define IGC_QBVCYCLET_S 0x3320
+#define IGC_STQT(_n)   (0x3324 + 0x4 * (_n))
+#define IGC_ENDQT(_n)  (0x3334 + 0x4 * (_n))
+#define IGC_TXQCTL(_n) (0x3344 + 0x4 * (_n))
+#define IGC_BASET_L0x3314
+#define IGC_BASET_H0x3318
+
 /* Filtering Registers */
 #define IGC_SAQF(_n)   (0x05980 + (4 * (_n))) /* Source Address Queue Fltr */
 #define IGC_DAQF(_n)   (0x059A0 + (4 * (_n))) /* Dest Address Queue Fltr */
-- 
2.9.5



[PATCH 2/2] net/igc: enable Tx timestamp offload

2022-12-19 Thread Simei Su
This patch supports Tx timestamp offload by leveraging NIC's
"Launch Time" for "RTE_ETH_TX_OFFLOAD_SEND_ON_TIMESTAMP".

Signed-off-by: Simei Su 
---
 drivers/net/igc/igc_ethdev.c | 70 
 drivers/net/igc/igc_ethdev.h |  6 +++-
 drivers/net/igc/igc_txrx.c   | 58 +++-
 drivers/net/igc/igc_txrx.h   |  3 ++
 4 files changed, 128 insertions(+), 9 deletions(-)

diff --git a/drivers/net/igc/igc_ethdev.c b/drivers/net/igc/igc_ethdev.c
index ef3346b..28f6cd5 100644
--- a/drivers/net/igc/igc_ethdev.c
+++ b/drivers/net/igc/igc_ethdev.c
@@ -88,6 +88,9 @@
 #define IGC_I225_RX_LATENCY_1000   300
 #define IGC_I225_RX_LATENCY_2500   1485
 
+uint64_t igc_timestamp_dynflag;
+int igc_timestamp_dynfield_offset = -1;
+
 static const struct rte_eth_desc_lim rx_desc_lim = {
.nb_max = IGC_MAX_RXD,
.nb_min = IGC_MIN_RXD,
@@ -267,6 +270,7 @@ static int eth_igc_timesync_read_time(struct rte_eth_dev 
*dev,
  struct timespec *timestamp);
 static int eth_igc_timesync_write_time(struct rte_eth_dev *dev,
   const struct timespec *timestamp);
+static int eth_igc_read_clock(struct rte_eth_dev *dev, uint64_t *clock);
 
 static const struct eth_dev_ops eth_igc_ops = {
.dev_configure  = eth_igc_configure,
@@ -327,6 +331,7 @@ static const struct eth_dev_ops eth_igc_ops = {
.timesync_adjust_time   = eth_igc_timesync_adjust_time,
.timesync_read_time = eth_igc_timesync_read_time,
.timesync_write_time= eth_igc_timesync_write_time,
+   .read_clock = eth_igc_read_clock,
 };
 
 /*
@@ -949,7 +954,12 @@ eth_igc_start(struct rte_eth_dev *dev)
struct igc_adapter *adapter = IGC_DEV_PRIVATE(dev);
struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
struct rte_intr_handle *intr_handle = pci_dev->intr_handle;
+   uint32_t nsec, sec, baset_l, baset_h, tqavctrl;
+   struct timespec system_time;
+   int64_t n, systime;
+   uint32_t txqctl = 0;
uint32_t *speeds;
+   uint16_t i;
int ret;
 
PMD_INIT_FUNC_TRACE();
@@ -1009,6 +1019,55 @@ eth_igc_start(struct rte_eth_dev *dev)
return ret;
}
 
+   if (igc_timestamp_dynflag > 0) {
+   adapter->base_time = 0;
+   adapter->cycle_time = NSEC_PER_SEC;
+
+   IGC_WRITE_REG(hw, IGC_TSSDP, 0);
+   IGC_WRITE_REG(hw, IGC_TSIM, TSINTR_TXTS);
+   IGC_WRITE_REG(hw, IGC_IMS, IGC_ICR_TS);
+
+   IGC_WRITE_REG(hw, IGC_TSAUXC, 0);
+   IGC_WRITE_REG(hw, IGC_I350_DTXMXPKTSZ, IGC_DTXMXPKTSZ_TSN);
+   IGC_WRITE_REG(hw, IGC_TXPBS, IGC_TXPBSIZE_TSN);
+
+   tqavctrl = IGC_READ_REG(hw, IGC_I210_TQAVCTRL);
+   tqavctrl |= IGC_TQAVCTRL_TRANSMIT_MODE_TSN |
+   IGC_TQAVCTRL_ENHANCED_QAV;
+   IGC_WRITE_REG(hw, IGC_I210_TQAVCTRL, tqavctrl);
+
+   IGC_WRITE_REG(hw, IGC_QBVCYCLET_S, adapter->cycle_time);
+   IGC_WRITE_REG(hw, IGC_QBVCYCLET, adapter->cycle_time);
+
+   for (i = 0; i < dev->data->nb_tx_queues; i++) {
+   IGC_WRITE_REG(hw, IGC_STQT(i), 0);
+   IGC_WRITE_REG(hw, IGC_ENDQT(i), NSEC_PER_SEC);
+
+   txqctl |= IGC_TXQCTL_QUEUE_MODE_LAUNCHT;
+   IGC_WRITE_REG(hw, IGC_TXQCTL(i), txqctl);
+   }
+
+   clock_gettime(CLOCK_REALTIME, &system_time);
+   IGC_WRITE_REG(hw, IGC_SYSTIML, system_time.tv_nsec);
+   IGC_WRITE_REG(hw, IGC_SYSTIMH, system_time.tv_sec);
+
+   nsec = IGC_READ_REG(hw, IGC_SYSTIML);
+   sec = IGC_READ_REG(hw, IGC_SYSTIMH);
+   systime = (int64_t)sec * NSEC_PER_SEC + (int64_t)nsec;
+
+   if (systime > adapter->base_time) {
+   n = (systime - adapter->base_time) /
+adapter->cycle_time;
+   adapter->base_time = adapter->base_time +
+   (n + 1) * adapter->cycle_time;
+   }
+
+   baset_h = adapter->base_time / NSEC_PER_SEC;
+   baset_l = adapter->base_time % NSEC_PER_SEC;
+   IGC_WRITE_REG(hw, IGC_BASET_H, baset_h);
+   IGC_WRITE_REG(hw, IGC_BASET_L, baset_l);
+   }
+
igc_clear_hw_cntrs_base_generic(hw);
 
/* VLAN Offload Settings */
@@ -2804,6 +2863,17 @@ eth_igc_timesync_disable(struct rte_eth_dev *dev)
 }
 
 static int
+eth_igc_read_clock(__rte_unused struct rte_eth_dev *dev, uint64_t *clock)
+{
+   struct timespec system_time;
+
+   clock_gettime(CLOCK_REALTIME, &system_time);
+   *clock = system_time.tv_sec * NSEC_PER_SEC + system_time.tv_nsec;
+
+   return 0;
+}
+
+static int
 eth_igc_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
struct rte_pc

[PATCH] app/crypto-perf: fix number of segments

2022-12-19 Thread Anoob Joseph
When segment size is provided, the total number of segments would be
calculated. Segment size updates due to headroom/tailroom need to be
accounted for when determining total number of segments required.

Fixes: c1670ae0022b ("app/crypto-perf: honour min headroom/tailroom")

Signed-off-by: Anoob Joseph 
Signed-off-by: Akhil Goyal 
---
 app/test-crypto-perf/cperf_test_common.c | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/app/test-crypto-perf/cperf_test_common.c 
b/app/test-crypto-perf/cperf_test_common.c
index 00aadc9a47..cc03b33ac5 100644
--- a/app/test-crypto-perf/cperf_test_common.c
+++ b/app/test-crypto-perf/cperf_test_common.c
@@ -198,9 +198,11 @@ cperf_alloc_common_memory(const struct cperf_options 
*options,
RTE_CACHE_LINE_ROUNDUP(crypto_op_total_size);
uint32_t mbuf_size = sizeof(struct rte_mbuf) + options->segment_sz;
uint32_t max_size = options->max_buffer_size + options->digest_sz;
-   uint16_t segments_nb = (max_size % options->segment_sz) ?
-   (max_size / options->segment_sz) + 1 :
-   max_size / options->segment_sz;
+   uint32_t segment_data_len = options->segment_sz - options->headroom_sz -
+   options->tailroom_sz;
+   uint16_t segments_nb = (max_size % segment_data_len) ?
+   (max_size / segment_data_len) + 1 :
+   (max_size / segment_data_len);
uint32_t obj_size = crypto_op_total_size_padded +
(mbuf_size * segments_nb);
 
-- 
2.25.1



RE: [PATCH] net/iavf:fix slow memory allocation

2022-12-19 Thread You, KaisenX


> -Original Message-
> From: Ferruh Yigit 
> Sent: 2022年12月13日 21:28
> To: You, KaisenX ; dev@dpdk.org; Burakov,
> Anatoly ; David Marchand
> 
> Cc: sta...@dpdk.org; Yang, Qiming ; Zhou, YidingX
> ; Wu, Jingjing ; Xing,
> Beilei ; Zhang, Qi Z ; Luca
> Boccassi ; Mcnamara, John
> ; Kevin Traynor 
> Subject: Re: [PATCH] net/iavf:fix slow memory allocation
> 
> On 12/13/2022 9:35 AM, Ferruh Yigit wrote:
> > On 12/13/2022 7:52 AM, You, KaisenX wrote:
> >>
> >>
> >>> -Original Message-
> >>> From: Ferruh Yigit 
> >>> Sent: 2022年12月8日 23:04
> >>> To: You, KaisenX ; dev@dpdk.org; Burakov,
> >>> Anatoly ; David Marchand
> >>> 
> >>> Cc: sta...@dpdk.org; Yang, Qiming ; Zhou,
> >>> YidingX ; Wu, Jingjing
> >>> ; Xing, Beilei ;
> >>> Zhang, Qi Z ; Luca Boccassi
> >>> ; Mcnamara, John ;
> Kevin
> >>> Traynor 
> >>> Subject: Re: [PATCH] net/iavf:fix slow memory allocation
> >>>
> >>> On 11/17/2022 6:57 AM, Kaisen You wrote:
>  In some cases, the DPDK does not allocate hugepage heap memory to
> >>> some
>  sockets due to the user setting parameters (e.g. -l 40-79, SOCKET 0
>  has no memory).
>  When the interrupt thread runs on the corresponding core of this
>  socket, each allocation/release will execute a whole set of heap
>  allocation/release operations,resulting in poor performance.
>  Instead we call malloc() to get memory from the system's heap space
>  to fix this problem.
> 
> >>>
> >>> Hi Kaisen,
> >>>
> >>> Using libc malloc can improve performance for this case, but I would
> >>> like to understand root cause of the problem.
> >>>
> >>>
> >>> As far as I can see, interrupt callbacks are run by interrupt thread
> >>> ("eal-intr- thread"), and interrupt thread created by
> 'rte_ctrl_thread_create()' API.
> >>>
> >>> 'rte_ctrl_thread_create()' comment mentions that "CPU affinity
> >>> retrieved at the time 'rte_eal_init()' was called,"
> >>>
> >>> And 'rte_eal_init()' is run on main lcore, which is the first lcore
> >>> in the core list (unless otherwise defined with --main-lcore).
> >>>
> >>> So, the interrupts should be running on a core that has hugepages
> >>> allocated for it, am I missing something here?
> >>>
> >>>
> >> Thank for your comments.  Let me try to explain the root cause here:
> >> eal_intr_thread the CPU in the corresponding slot does not create
> memory pool.
> >> That results in frequent memory subsequently creating/destructing.
> >>
> >> When testpmd started, the parameter (e.g. -l 40-79) is set.
> >> Different OS has different topology. Some OS like SUSE only creates
> >> memory pool for one CPU slot, while other system creates for two.
> >> That is why the problem occurs when using memories in different OS.
> >
> >
> > It is testpmd application that decides from which socket to allocate
> > memory from, right. This is nothing specific to OS.
> >
> > As far as I remember, testpmd logic is too allocate from socket that
> > its cores are used (provided with -l parameter), and allocate from
> > socket that device is attached to.
> >
> > So, in a dual socket system, if all used cores are in socket 1 and the
> > NIC is in socket 1, no memory is allocated for socket 0. This is to
> > optimize memory consumption.
> >
> >
> > Can you please confirm that the problem you are observing is because
> > interrupt handler is running on a CPU, which doesn't have memory
> > allocated for its socket?
> >
> > In this case what I don't understand is why interrupts is not running
> > on main lcore, which should be first core in the list, for "-l 40-79"
> > sample it should be lcore 40.
> > For your case, is interrupt handler run on core 0? Or any arbitrary core?
> > If so, can you please confirm when you provide core list as "-l 0,40-79"
> > fixes the issue?
> >
First of all, sorry to reply to you so late.
I can confirm that the problem I observed is because  interrupt handler is 
running on a CPU, which doesn't have memory allocated for its socket.

In my case, interrupt handler is running on core 0.
I tried providing "-l 0,40-79" as a startup parameter, this issue can be 
resolved.

I corrected the previous statement that this problem does  only occur on 
the SUSE system. In any OS, this problem occurs as long as the range of 
startup parameters is only on node1.

> >
> >>>
> >>>
> >>> And what about using 'rte_malloc_socket()' API (instead of
> >>> rte_malloc), which gets 'socket' as parameter, and provide the
> >>> socket that devices is on as parameter to this API? Is it possible to test
> this?
> >>>
> >>>
> >> As to the reason for not using rte_malloc_socket. I thought
> >> rte_malloc_socket() could solve the problem too. And the appropriate
> >> parameter should be the socket_id that created the memory pool for
> >> DPDK initialization. Assuming that> the socket_id of the initially
> >> allocated memory = 1, first let the
> > eal_intr_thread
> >> determine if it is on the socket_id, then record this socket_id in
> >> the eal_intr_thread and pass it

[PATCH v2 0/3] support match icmpv6 ID and sequence

2022-12-19 Thread Leo Xu
Currently, rte_flow API does not support matching
ID and sequence fields of icmp6 echo packets.

This patchset is used to support match icmpv6 ID and
sequence in rte_flow. It adds needed API in rte_flow,
and gives corresponding implementation for mlx5 pmd.

Leo Xu (3):
  ethdev: add ICMPv6 ID and sequence
  net/mlx5: add ICMPv6 ID and sequence match support
  net/mlx5/hws: add ICMPv6 ID and sequence match support

---
v2:
* rebase 23.03

 app/test-pmd/cmdline_flow.c | 70 
 doc/guides/nics/mlx5.rst|  2 +-
 doc/guides/prog_guide/rte_flow.rst  | 14 
 doc/guides/rel_notes/release_23_03.rst  | 10 +++
 doc/guides/testpmd_app_ug/testpmd_funcs.rst | 10 +++
 drivers/net/mlx5/hws/mlx5dr_definer.c   | 88 +
 drivers/net/mlx5/mlx5_flow.c| 61 ++
 drivers/net/mlx5/mlx5_flow.h|  4 +
 drivers/net/mlx5/mlx5_flow_dv.c | 76 ++
 drivers/net/mlx5/mlx5_flow_hw.c |  2 +
 lib/ethdev/rte_flow.c   |  4 +
 lib/ethdev/rte_flow.h   | 25 ++
 lib/net/meson.build |  1 +
 lib/net/rte_icmp6.h | 48 +++
 14 files changed, 414 insertions(+), 1 deletion(-)
 create mode 100644 lib/net/rte_icmp6.h

-- 
2.27.0



[PATCH v2 1/3] ethdev: add ICMPv6 ID and sequence

2022-12-19 Thread Leo Xu
This patch adds API support for ICMPv6 ID and sequence.
1: Add two new pattern item types for ICMPv6 echo request and reply:
  RTE_FLOW_ITEM_TYPE_ICMP6_ECHO_REQUEST
  RTE_FLOW_ITEM_TYPE_ICMP6_ECHO_REPLY

2: Add new header file rte_icmp6.h for ICMPv6 packet definitions.
3: Enhance testpmd flow pattern to support ICMPv6 identifier and sequence.

  Example of ICMPv6 echo pattern in testpmd command:

  pattern eth / ipv6 / icmp6_echo_request / end
  pattern eth / ipv6 / icmp6_echo_reply / end
  pattern eth / ipv6 / icmp6_echo_request ident is 20 seq is 30 / end

Signed-off-by: Leo Xu 
Signed-off-by: Bing Zhao 
---
 app/test-pmd/cmdline_flow.c | 70 +
 doc/guides/prog_guide/rte_flow.rst  | 14 +
 doc/guides/rel_notes/release_23_03.rst  |  4 ++
 doc/guides/testpmd_app_ug/testpmd_funcs.rst | 10 +++
 lib/ethdev/rte_flow.c   |  4 ++
 lib/ethdev/rte_flow.h   | 25 
 lib/net/meson.build |  1 +
 lib/net/rte_icmp6.h | 48 ++
 8 files changed, 176 insertions(+)
 create mode 100644 lib/net/rte_icmp6.h

diff --git a/app/test-pmd/cmdline_flow.c b/app/test-pmd/cmdline_flow.c
index 88108498e0..7dc1528899 100644
--- a/app/test-pmd/cmdline_flow.c
+++ b/app/test-pmd/cmdline_flow.c
@@ -360,6 +360,12 @@ enum index {
ITEM_ICMP6,
ITEM_ICMP6_TYPE,
ITEM_ICMP6_CODE,
+   ITEM_ICMP6_ECHO_REQUEST,
+   ITEM_ICMP6_ECHO_REQUEST_ID,
+   ITEM_ICMP6_ECHO_REQUEST_SEQ,
+   ITEM_ICMP6_ECHO_REPLY,
+   ITEM_ICMP6_ECHO_REPLY_ID,
+   ITEM_ICMP6_ECHO_REPLY_SEQ,
ITEM_ICMP6_ND_NS,
ITEM_ICMP6_ND_NS_TARGET_ADDR,
ITEM_ICMP6_ND_NA,
@@ -1327,6 +1333,8 @@ static const enum index next_item[] = {
ITEM_IPV6_EXT,
ITEM_IPV6_FRAG_EXT,
ITEM_ICMP6,
+   ITEM_ICMP6_ECHO_REQUEST,
+   ITEM_ICMP6_ECHO_REPLY,
ITEM_ICMP6_ND_NS,
ITEM_ICMP6_ND_NA,
ITEM_ICMP6_ND_OPT,
@@ -1575,6 +1583,20 @@ static const enum index item_icmp6[] = {
ZERO,
 };
 
+static const enum index item_icmp6_echo_request[] = {
+   ITEM_ICMP6_ECHO_REQUEST_ID,
+   ITEM_ICMP6_ECHO_REQUEST_SEQ,
+   ITEM_NEXT,
+   ZERO,
+};
+
+static const enum index item_icmp6_echo_reply[] = {
+   ITEM_ICMP6_ECHO_REPLY_ID,
+   ITEM_ICMP6_ECHO_REPLY_SEQ,
+   ITEM_NEXT,
+   ZERO,
+};
+
 static const enum index item_icmp6_nd_ns[] = {
ITEM_ICMP6_ND_NS_TARGET_ADDR,
ITEM_NEXT,
@@ -4323,6 +4345,54 @@ static const struct token token_list[] = {
.args = ARGS(ARGS_ENTRY_HTON(struct rte_flow_item_icmp6,
 code)),
},
+   [ITEM_ICMP6_ECHO_REQUEST] = {
+   .name = "icmp6_echo_request",
+   .help = "match ICMPv6 echo request",
+   .priv = PRIV_ITEM(ICMP6_ECHO_REQUEST,
+ sizeof(struct rte_flow_item_icmp6_echo)),
+   .next = NEXT(item_icmp6_echo_request),
+   .call = parse_vc,
+   },
+   [ITEM_ICMP6_ECHO_REQUEST_ID] = {
+   .name = "ident",
+   .help = "ICMPv6 echo request identifier",
+   .next = NEXT(item_icmp6_echo_request, 
NEXT_ENTRY(COMMON_UNSIGNED),
+item_param),
+   .args = ARGS(ARGS_ENTRY_HTON(struct rte_flow_item_icmp6_echo,
+echo.identifier)),
+   },
+   [ITEM_ICMP6_ECHO_REQUEST_SEQ] = {
+   .name = "seq",
+   .help = "ICMPv6 echo request sequence",
+   .next = NEXT(item_icmp6_echo_request, 
NEXT_ENTRY(COMMON_UNSIGNED),
+item_param),
+   .args = ARGS(ARGS_ENTRY_HTON(struct rte_flow_item_icmp6_echo,
+echo.sequence)),
+   },
+   [ITEM_ICMP6_ECHO_REPLY] = {
+   .name = "icmp6_echo_reply",
+   .help = "match ICMPv6 echo reply",
+   .priv = PRIV_ITEM(ICMP6_ECHO_REPLY,
+ sizeof(struct rte_flow_item_icmp6_echo)),
+   .next = NEXT(item_icmp6_echo_reply),
+   .call = parse_vc,
+   },
+   [ITEM_ICMP6_ECHO_REPLY_ID] = {
+   .name = "ident",
+   .help = "ICMPv6 echo reply identifier",
+   .next = NEXT(item_icmp6_echo_reply, NEXT_ENTRY(COMMON_UNSIGNED),
+item_param),
+   .args = ARGS(ARGS_ENTRY_HTON(struct rte_flow_item_icmp6_echo,
+echo.identifier)),
+   },
+   [ITEM_ICMP6_ECHO_REPLY_SEQ] = {
+   .name = "seq",
+   .help = "ICMPv6 echo reply sequence",
+   .next = NEXT(item_icmp6_echo_reply, NEXT_ENTRY(COMMON_UNSIGNED),
+item_param),
+   .args = ARGS(ARGS_ENTRY_HTON(struct rte_flow_item_icmp6_echo

[PATCH v2 2/3] net/mlx5: add ICMPv6 ID and sequence match support

2022-12-19 Thread Leo Xu
This patch adds ICMPv6 ID and sequence match support.
Since type and code of ICMPv6 echo is already specified by ITEM type:
  RTE_FLOW_ITEM_TYPE_ICMP6_ECHO_REQUEST
  RTE_FLOW_ITEM_TYPE_ICMP6_ECHO_REPLY
mlx5 pmd will set appropriate type and code automatically:
  Echo request: type(128), code(0)
  Echo reply:   type(129), code(0)
type and code provided by application will be ignored.

Signed-off-by: Leo Xu 
---
 doc/guides/nics/mlx5.rst   |  2 +-
 doc/guides/rel_notes/release_23_03.rst |  6 ++
 drivers/net/mlx5/mlx5_flow.c   | 61 +
 drivers/net/mlx5/mlx5_flow.h   |  4 ++
 drivers/net/mlx5/mlx5_flow_dv.c| 76 ++
 drivers/net/mlx5/mlx5_flow_hw.c|  2 +
 6 files changed, 150 insertions(+), 1 deletion(-)

diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst
index 51f51259e3..78693d19b0 100644
--- a/doc/guides/nics/mlx5.rst
+++ b/doc/guides/nics/mlx5.rst
@@ -405,7 +405,7 @@ Limitations
   - The input buffer, providing the removal size, is not validated.
   - The buffer size must match the length of the headers to be removed.
 
-- ICMP(code/type/identifier/sequence number) / ICMP6(code/type) matching, 
IP-in-IP and MPLS flow matching are all
+- ICMP(code/type/identifier/sequence number) / 
ICMP6(code/type/identifier/sequence number) matching, IP-in-IP and MPLS flow 
matching are all
   mutually exclusive features which cannot be supported together
   (see :ref:`mlx5_firmware_config`).
 
diff --git a/doc/guides/rel_notes/release_23_03.rst 
b/doc/guides/rel_notes/release_23_03.rst
index 5af9c43dd9..011c2489f7 100644
--- a/doc/guides/rel_notes/release_23_03.rst
+++ b/doc/guides/rel_notes/release_23_03.rst
@@ -60,6 +60,12 @@ New Features
   Added ``icmp6_echo`` item in rte_flow to support ID and sequence
   matching in ICMPv6 echo request/reply packets.
 
+* **Updated Mellanox mlx5 driver.**
+
+  Updated the Mellanox mlx5 driver with new features and improvements, 
including:
+
+  * Added support for matching on ICMPv6 ID and sequence fields.
+
 Removed Items
 -
 
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index a0cf677fb0..8eea78251e 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -2352,6 +2352,67 @@ mlx5_flow_validate_item_icmp6(const struct rte_flow_item 
*item,
return 0;
 }
 
+/**
+ * Validate ICMP6 echo request/reply item.
+ *
+ * @param[in] item
+ *   Item specification.
+ * @param[in] item_flags
+ *   Bit-fields that holds the items detected until now.
+ * @param[in] ext_vlan_sup
+ *   Whether extended VLAN features are supported or not.
+ * @param[out] error
+ *   Pointer to error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_flow_validate_item_icmp6_echo(const struct rte_flow_item *item,
+  uint64_t item_flags,
+  uint8_t target_protocol,
+  struct rte_flow_error *error)
+{
+   const struct rte_flow_item_icmp6_echo *mask = item->mask;
+   const struct rte_flow_item_icmp6_echo nic_mask = {
+   .echo.hdr.type = 0xff,
+   .echo.hdr.code = 0xff,
+   .echo.identifier = RTE_BE16(0x),
+   .echo.sequence = RTE_BE16(0x),
+   };
+   const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
+   const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
+ MLX5_FLOW_LAYER_OUTER_L3_IPV6;
+   const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
+ MLX5_FLOW_LAYER_OUTER_L4;
+   int ret;
+
+   if (target_protocol != 0xFF && target_protocol != IPPROTO_ICMPV6)
+   return rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ITEM, item,
+ "protocol filtering not compatible"
+ " with ICMP6 layer");
+   if (!(item_flags & l3m))
+   return rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ITEM, item,
+ "IPv6 is mandatory to filter on"
+ " ICMP6");
+   if (item_flags & l4m)
+   return rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ITEM, item,
+ "multiple L4 layers not supported");
+   if (!mask)
+   mask = &nic_mask;
+   ret = mlx5_flow_item_acceptable
+   (item, (const uint8_t *)mask,
+(const uint8_t *)&nic_mask,
+sizeof(struct rte_flow_item_icmp6_echo),
+MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
+   if (ret < 0)
+   return ret;
+   return 0;
+}
+
 /**
  * 

[PATCH v2 3/3] net/mlx5/hws: add ICMPv6 ID and sequence match support

2022-12-19 Thread Leo Xu
This patch adds ICMPv6 ID and sequence match support for HWS.
Since type and code of ICMPv6 echo is already specified by ITEM type:
  RTE_FLOW_ITEM_TYPE_ICMP6_ECHO_REQUEST
  RTE_FLOW_ITEM_TYPE_ICMP6_ECHO_REPLY
mlx5 pmd will set appropriate type and code automatically:
  Echo request: type(128), code(0)
  Echo reply:   type(129), code(0)
type and code provided by application will be ignored

This patch also fixes these issues in ICMP definer.
1. Parsing inner ICMP item gets and overwrites the outer IP_PROTOCOL
function, which will remove the outer L4 match incorrectly. Fix this
by getting correct inner function.
2. Member order of mlx5_ifc_header_icmp_bits doesn't follow ICMP format.
Reorder them to make it more consistent.

Signed-off-by: Leo Xu 
---
 drivers/net/mlx5/hws/mlx5dr_definer.c | 88 +++
 1 file changed, 88 insertions(+)

diff --git a/drivers/net/mlx5/hws/mlx5dr_definer.c 
b/drivers/net/mlx5/hws/mlx5dr_definer.c
index 6b98eb8c96..8fbc40ff15 100644
--- a/drivers/net/mlx5/hws/mlx5dr_definer.c
+++ b/drivers/net/mlx5/hws/mlx5dr_definer.c
@@ -368,6 +368,47 @@ mlx5dr_definer_icmp6_dw1_set(struct mlx5dr_definer_fc *fc,
DR_SET(tag, icmp_dw1, fc->byte_off, fc->bit_off, fc->bit_mask);
 }
 
+static void
+mlx5dr_definer_icmp6_echo_dw1_mask_set(struct mlx5dr_definer_fc *fc,
+  __rte_unused const void *item_spec,
+  uint8_t *tag)
+{
+   const struct rte_flow_item_icmp6 spec = {0xFF, 0xFF, 0x0};
+   mlx5dr_definer_icmp6_dw1_set(fc, &spec, tag);
+}
+
+static void
+mlx5dr_definer_icmp6_echo_request_dw1_set(struct mlx5dr_definer_fc *fc,
+ __rte_unused const void *item_spec,
+ uint8_t *tag)
+{
+   const struct rte_flow_item_icmp6 spec = {RTE_ICMP6_ECHO_REQUEST, 0, 0};
+   mlx5dr_definer_icmp6_dw1_set(fc, &spec, tag);
+}
+
+static void
+mlx5dr_definer_icmp6_echo_reply_dw1_set(struct mlx5dr_definer_fc *fc,
+   __rte_unused const void *item_spec,
+   uint8_t *tag)
+{
+   const struct rte_flow_item_icmp6 spec = {RTE_ICMP6_ECHO_REPLY, 0, 0};
+   mlx5dr_definer_icmp6_dw1_set(fc, &spec, tag);
+}
+
+static void
+mlx5dr_definer_icmp6_echo_dw2_set(struct mlx5dr_definer_fc *fc,
+ const void *item_spec,
+ uint8_t *tag)
+{
+   const struct rte_flow_item_icmp6_echo *v = item_spec;
+   rte_be32_t dw2;
+
+   dw2 = (rte_be_to_cpu_16(v->echo.identifier) << 
__mlx5_dw_bit_off(header_icmp, ident)) |
+ (rte_be_to_cpu_16(v->echo.sequence) << 
__mlx5_dw_bit_off(header_icmp, seq_nb));
+
+   DR_SET(tag, dw2, fc->byte_off, fc->bit_off, fc->bit_mask);
+}
+
 static void
 mlx5dr_definer_ipv6_flow_label_set(struct mlx5dr_definer_fc *fc,
   const void *item_spec,
@@ -1441,6 +1482,48 @@ mlx5dr_definer_conv_item_icmp6(struct 
mlx5dr_definer_conv_data *cd,
return 0;
 }
 
+static int
+mlx5dr_definer_conv_item_icmp6_echo(struct mlx5dr_definer_conv_data *cd,
+   struct rte_flow_item *item,
+   int item_idx)
+{
+   const struct rte_flow_item_icmp6_echo *m = item->mask;
+   struct mlx5dr_definer_fc *fc;
+   bool inner = cd->tunnel;
+
+   if (!cd->relaxed) {
+   /* Overwrite match on L4 type ICMP6 */
+   fc = &cd->fc[DR_CALC_FNAME(IP_PROTOCOL, inner)];
+   fc->item_idx = item_idx;
+   fc->tag_set = &mlx5dr_definer_icmp_protocol_set;
+   fc->tag_mask_set = &mlx5dr_definer_ones_set;
+   DR_CALC_SET(fc, eth_l2, l4_type, inner);
+
+   /* Set fixed type and code for icmp6 echo request/reply */
+   fc = &cd->fc[MLX5DR_DEFINER_FNAME_ICMP_DW1];
+   fc->item_idx = item_idx;
+   fc->tag_mask_set = &mlx5dr_definer_icmp6_echo_dw1_mask_set;
+   if (item->type == RTE_FLOW_ITEM_TYPE_ICMP6_ECHO_REQUEST)
+   fc->tag_set = 
&mlx5dr_definer_icmp6_echo_request_dw1_set;
+   else /* RTE_FLOW_ITEM_TYPE_ICMP6_ECHO_REPLY */
+   fc->tag_set = &mlx5dr_definer_icmp6_echo_reply_dw1_set;
+   DR_CALC_SET_HDR(fc, tcp_icmp, icmp_dw1);
+   }
+
+   if (!m)
+   return 0;
+
+   /* Set identifier & sequence into icmp_dw2 */
+   if (m->echo.identifier || m->echo.sequence) {
+   fc = &cd->fc[MLX5DR_DEFINER_FNAME_ICMP_DW2];
+   fc->item_idx = item_idx;
+   fc->tag_set = &mlx5dr_definer_icmp6_echo_dw2_set;
+   DR_CALC_SET_HDR(fc, tcp_icmp, icmp_dw2);
+   }
+
+   return 0;
+}
+
 static int
 mlx5dr_definer_conv_item_meter_color(struct mlx5dr_definer_conv_data *cd,
 struct rte_flow_item *item,
@@ -1577,6 +1660,11