[PATCH V1 00/11] Support resource sharing among ibv_devices
Add the option to use resources (Tables, Matchers, Actions, etc.) from one gvmi (AKA ibv_cntext) to other gvmi's. When specific gvmi allows other to use its resources, steering objects Will created and used on that gvmi. It is done by aliases objects that map between local resources to shared/remote resourses. That in order to allow sharing between few ports. Erez Shitrit (6): mailmap: add new contributors to the list net/mlx5/hws: add capabilities fields for vhca access net/mlx5/hws: add vhca identifier ID to the caps net/mlx5/hws: support shared ibv-context with local one net/mlx5/hws: support actions while shared resources is used net/mlx5/hws: add debug details for cross gvmi Yevgeny Kliteynik (5): net/mlx5/hws: remove wrong PRM capability macros net/mlx5/hws: add PRM definitions for cross-vhca capabilities net/mlx5/hws: read cross-vhca capabilities net/mlx5/hws: added allow-other-vhca-access command net/mlx5/hws: added command to create alias objects .mailmap | 1 + drivers/common/mlx5/mlx5_prm.h | 83 -- drivers/net/mlx5/hws/mlx5dr.h | 7 +- drivers/net/mlx5/hws/mlx5dr_action.c | 69 +++- drivers/net/mlx5/hws/mlx5dr_action.h | 3 + drivers/net/mlx5/hws/mlx5dr_cmd.c | 118 - drivers/net/mlx5/hws/mlx5dr_cmd.h | 25 +++ drivers/net/mlx5/hws/mlx5dr_context.c | 34 +++- drivers/net/mlx5/hws/mlx5dr_context.h | 22 +++ drivers/net/mlx5/hws/mlx5dr_debug.c| 24 ++- drivers/net/mlx5/hws/mlx5dr_internal.h | 1 + drivers/net/mlx5/hws/mlx5dr_matcher.c | 218 +++-- drivers/net/mlx5/hws/mlx5dr_matcher.h | 9 + drivers/net/mlx5/hws/mlx5dr_table.c| 191 +- drivers/net/mlx5/hws/mlx5dr_table.h| 4 +- drivers/net/mlx5/mlx5_devx.c | 2 +- 16 files changed, 750 insertions(+), 61 deletions(-) -- 2.18.2
[PATCH V1 01/11] mailmap: add new contributors to the list
Added Yevgeny Kliteynik to the list Signed-off-by: Erez Shitrit --- .mailmap | 1 + 1 file changed, 1 insertion(+) diff --git a/.mailmap b/.mailmap index 75884b6fe2..c6f34d18a6 100644 --- a/.mailmap +++ b/.mailmap @@ -1495,6 +1495,7 @@ Yash Sharma Yasufumi Ogawa Yelena Krivosheev Yerden Zhumabekov +Yevgeny Kliteynik Yicai Lu Yiding Zhou Yi Li -- 2.18.2
[PATCH V1 02/11] net/mlx5/hws: add capabilities fields for vhca access
The new fields define the ability to access from one vhca to other one. Signed-off-by: Erez Shitrit Reviewed-by: Alex Vesker --- drivers/common/mlx5/mlx5_prm.h | 6 -- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/common/mlx5/mlx5_prm.h b/drivers/common/mlx5/mlx5_prm.h index 2b5c43ee6e..19c00ad913 100644 --- a/drivers/common/mlx5/mlx5_prm.h +++ b/drivers/common/mlx5/mlx5_prm.h @@ -2115,8 +2115,10 @@ struct mlx5_ifc_cmd_hca_cap_2_bits { u8 log_conn_track_max_alloc[0x5]; u8 reserved_at_d8[0x3]; u8 log_max_conn_track_offload[0x5]; - u8 reserved_at_e0[0x20]; /* End of DW7. */ - u8 reserved_at_100[0x60]; + u8 cross_vhca_object_to_object_supported[0x20]; /* End of DW7. */ + u8 allowed_object_for_other_vhca_access_high[0x20]; + u8 allowed_object_for_other_vhca_access[0x20]; + u8 reserved_at_140[0x20]; u8 reserved_at_160[0x3]; u8 hairpin_sq_wqe_bb_size[0x5]; u8 hairpin_sq_wq_in_host_mem[0x1]; -- 2.18.2
[PATCH V1 03/11] net/mlx5/hws: remove wrong PRM capability macros
From: Yevgeny Kliteynik Removing macros for wrong capability checks. Signed-off-by: Yevgeny Kliteynik Reviewed-by: Alex Vesker --- drivers/common/mlx5/mlx5_prm.h | 6 -- 1 file changed, 6 deletions(-) diff --git a/drivers/common/mlx5/mlx5_prm.h b/drivers/common/mlx5/mlx5_prm.h index 19c00ad913..3d9d69d9cf 100644 --- a/drivers/common/mlx5/mlx5_prm.h +++ b/drivers/common/mlx5/mlx5_prm.h @@ -1332,12 +1332,6 @@ enum { (1ULL << MLX5_GENERAL_OBJ_TYPE_GENEVE_TLV_OPT) #define MLX5_GENERAL_OBJ_TYPES_CAP_CONN_TRACK_OFFLOAD \ (1ULL << MLX5_GENERAL_OBJ_TYPE_CONN_TRACK_OFFLOAD) -#define MLX5_GENERAL_OBJ_TYPES_CAP_RTC \ - (1ULL << MLX5_GENERAL_OBJ_TYPE_RTC) -#define MLX5_GENERAL_OBJ_TYPES_CAP_STC \ - (1ULL << MLX5_GENERAL_OBJ_TYPE_STC) -#define MLX5_GENERAL_OBJ_TYPES_CAP_STE \ - (1ULL << MLX5_GENERAL_OBJ_TYPE_STE) #define MLX5_GENERAL_OBJ_TYPES_CAP_DEFINER \ (1ULL << MLX5_GENERAL_OBJ_TYPE_DEFINER) #define MLX5_GENERAL_OBJ_TYPES_CAP_DEK \ -- 2.18.2
[PATCH V1 05/11] net/mlx5/hws: read cross-vhca capabilities
From: Yevgeny Kliteynik And keep them for future processing. Signed-off-by: Yevgeny Kliteynik Reviewed-by: Alex Vesker --- drivers/net/mlx5/hws/mlx5dr_cmd.c | 24 drivers/net/mlx5/hws/mlx5dr_cmd.h | 1 + 2 files changed, 25 insertions(+) diff --git a/drivers/net/mlx5/hws/mlx5dr_cmd.c b/drivers/net/mlx5/hws/mlx5dr_cmd.c index 721376b8da..2156fd6643 100644 --- a/drivers/net/mlx5/hws/mlx5dr_cmd.c +++ b/drivers/net/mlx5/hws/mlx5dr_cmd.c @@ -723,6 +723,7 @@ int mlx5dr_cmd_query_caps(struct ibv_context *ctx, uint32_t in[MLX5_ST_SZ_DW(query_hca_cap_in)] = {0}; const struct flow_hw_port_info *port_info; struct ibv_device_attr_ex attr_ex; + u32 res; int ret; MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP); @@ -798,6 +799,23 @@ int mlx5dr_cmd_query_caps(struct ibv_context *ctx, capability.cmd_hca_cap_2. format_select_dw_gtpu_first_ext_dw_0); + /* check cross-VHCA support in cap2 */ + res = + MLX5_GET(query_hca_cap_out, out, + capability.cmd_hca_cap_2.cross_vhca_object_to_object_supported); + + caps->cross_vhca_resources = (res & MLX5_CROSS_VHCA_OBJ_TO_OBJ_TYPE_STC_TO_TIR) && +(res & MLX5_CROSS_VHCA_OBJ_TO_OBJ_TYPE_STC_TO_FT) && +(res & MLX5_CROSS_VHCA_OBJ_TO_OBJ_TYPE_FT_TO_RTC); + + res = + MLX5_GET(query_hca_cap_out, out, + capability.cmd_hca_cap_2.allowed_object_for_other_vhca_access); + + caps->cross_vhca_resources &= (res & MLX5_CROSS_VHCA_ALLOWED_OBJS_TIR) && + (res & MLX5_CROSS_VHCA_ALLOWED_OBJS_FT) && + (res & MLX5_CROSS_VHCA_ALLOWED_OBJS_RTC); + MLX5_SET(query_hca_cap_in, in, op_mod, MLX5_GET_HCA_CAP_OP_MOD_NIC_FLOW_TABLE | MLX5_HCA_CAP_OPMOD_GET_CUR); @@ -817,6 +835,12 @@ int mlx5dr_cmd_query_caps(struct ibv_context *ctx, capability.flow_table_nic_cap. flow_table_properties_nic_receive.reparse); + /* check cross-VHCA support in flow table properties */ + res = + MLX5_GET(query_hca_cap_out, out, + capability.flow_table_nic_cap.flow_table_properties_nic_receive.cross_vhca_object); + caps->cross_vhca_resources &= res; + if (caps->wqe_based_update) { MLX5_SET(query_hca_cap_in, in, op_mod, MLX5_GET_HCA_CAP_OP_MOD_WQE_BASED_FLOW_TABLE | diff --git a/drivers/net/mlx5/hws/mlx5dr_cmd.h b/drivers/net/mlx5/hws/mlx5dr_cmd.h index 2b3b47f473..ab61e27fd8 100644 --- a/drivers/net/mlx5/hws/mlx5dr_cmd.h +++ b/drivers/net/mlx5/hws/mlx5dr_cmd.h @@ -163,6 +163,7 @@ struct mlx5dr_cmd_query_caps { uint8_t sq_ts_format; uint64_t definer_format_sup; uint32_t trivial_match_definer; + bool cross_vhca_resources; char fw_ver[64]; }; -- 2.18.2
[PATCH V1 04/11] net/mlx5/hws: add PRM definitions for cross-vhca capabilities
From: Yevgeny Kliteynik Each new cap was defined. Signed-off-by: Yevgeny Kliteynik Reviewed-by: Alex Vesker --- drivers/common/mlx5/mlx5_prm.h | 19 ++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/drivers/common/mlx5/mlx5_prm.h b/drivers/common/mlx5/mlx5_prm.h index 3d9d69d9cf..dfa25c2b49 100644 --- a/drivers/common/mlx5/mlx5_prm.h +++ b/drivers/common/mlx5/mlx5_prm.h @@ -1930,7 +1930,9 @@ struct mlx5_ifc_flow_table_prop_layout_bits { u8 metadata_reg_a_width[0x8]; u8 reserved_at_60[0xa]; u8 reparse[0x1]; - u8 reserved_at_6b[0xd]; + u8 reserved_at_6b[0x1]; + u8 cross_vhca_object[0x1]; + u8 reserved_at_6d[0xb]; u8 log_max_ft_num[0x8]; u8 reserved_at_80[0x10]; u8 log_max_flow_counter[0x8]; @@ -2084,6 +2086,19 @@ struct mlx5_ifc_flow_table_esw_cap_bits { u8 reserved_at_C00[0x7400]; }; +enum mlx5_ifc_cross_vhca_object_to_object_supported_types { + MLX5_CROSS_VHCA_OBJ_TO_OBJ_TYPE_STC_TO_TIR = 1 << 10, + MLX5_CROSS_VHCA_OBJ_TO_OBJ_TYPE_STC_TO_FT = 1 << 11, + MLX5_CROSS_VHCA_OBJ_TO_OBJ_TYPE_FT_TO_FT = 1 << 12, + MLX5_CROSS_VHCA_OBJ_TO_OBJ_TYPE_FT_TO_RTC = 1 << 13, +}; + +enum mlx5_ifc_cross_vhca_allowed_objects_types { + MLX5_CROSS_VHCA_ALLOWED_OBJS_TIR = 1 << 0x8, + MLX5_CROSS_VHCA_ALLOWED_OBJS_FT = 1 << 0x9, + MLX5_CROSS_VHCA_ALLOWED_OBJS_RTC = 1 << 0xa, +}; + /* * HCA Capabilities 2 */ @@ -3108,6 +3123,8 @@ enum { MLX5_GENERAL_OBJ_TYPE_RTC = 0x0041, MLX5_GENERAL_OBJ_TYPE_STE = 0x0042, MLX5_GENERAL_OBJ_TYPE_MODIFY_HEADER_PATTERN = 0x0043, + MLX5_GENERAL_OBJ_TYPE_FT_ALIAS = 0xff15, + MLX5_GENERAL_OBJ_TYPE_TIR_ALIAS = 0xff16, }; struct mlx5_ifc_general_obj_in_cmd_hdr_bits { -- 2.18.2
[PATCH V1 06/11] net/mlx5/hws: added allow-other-vhca-access command
From: Yevgeny Kliteynik Added FW command to allow creation of alias objects. Signed-off-by: Yevgeny Kliteynik Reviewed-by: Alex Vesker --- drivers/common/mlx5/mlx5_prm.h| 23 +++ drivers/net/mlx5/hws/mlx5dr_cmd.c | 28 drivers/net/mlx5/hws/mlx5dr_cmd.h | 9 + 3 files changed, 60 insertions(+) diff --git a/drivers/common/mlx5/mlx5_prm.h b/drivers/common/mlx5/mlx5_prm.h index dfa25c2b49..9d36645949 100644 --- a/drivers/common/mlx5/mlx5_prm.h +++ b/drivers/common/mlx5/mlx5_prm.h @@ -1133,6 +1133,7 @@ enum { MLX5_CMD_SET_REGEX_REGISTERS = 0xb06, MLX5_CMD_QUERY_REGEX_REGISTERS = 0xb07, MLX5_CMD_OP_ACCESS_REGISTER_USER = 0xb0c, + MLX5_CMD_OP_ALLOW_OTHER_VHCA_ACCESS = 0xb16, }; enum { @@ -3150,6 +3151,28 @@ struct mlx5_ifc_general_obj_out_cmd_hdr_bits { u8 reserved_at_60[0x20]; }; +struct mlx5_ifc_allow_other_vhca_access_in_bits { + u8 opcode[0x10]; + u8 uid[0x10]; + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + u8 reserved_at_40[0x50]; + u8 object_type_to_be_accessed[0x10]; + u8 object_id_to_be_accessed[0x20]; + u8 reserved_at_c0[0x40]; + union { + u8 access_key_raw[0x100]; + u8 access_key[8][0x20]; + }; +}; + +struct mlx5_ifc_allow_other_vhca_access_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + u8 syndrome[0x20]; + u8 reserved_at_40[0x40]; +}; + struct mlx5_ifc_virtio_q_counters_bits { u8 modify_field_select[0x40]; u8 reserved_at_40[0x40]; diff --git a/drivers/net/mlx5/hws/mlx5dr_cmd.c b/drivers/net/mlx5/hws/mlx5dr_cmd.c index 2156fd6643..b120be2d88 100644 --- a/drivers/net/mlx5/hws/mlx5dr_cmd.c +++ b/drivers/net/mlx5/hws/mlx5dr_cmd.c @@ -716,6 +716,34 @@ int mlx5dr_cmd_sq_modify_rdy(struct mlx5dr_devx_obj *devx_obj) return ret; } +int mlx5dr_cmd_allow_other_vhca_access(struct ibv_context *ctx, + struct mlx5dr_cmd_allow_other_vhca_access_attr *attr) +{ + uint32_t out[MLX5_ST_SZ_DW(allow_other_vhca_access_out)] = {0}; + uint32_t in[MLX5_ST_SZ_DW(allow_other_vhca_access_in)] = {0}; + void *key; + int ret; + + MLX5_SET(allow_other_vhca_access_in, +in, opcode, MLX5_CMD_OP_ALLOW_OTHER_VHCA_ACCESS); + MLX5_SET(allow_other_vhca_access_in, +in, object_type_to_be_accessed, attr->obj_type); + MLX5_SET(allow_other_vhca_access_in, +in, object_id_to_be_accessed, attr->obj_id); + + key = MLX5_ADDR_OF(allow_other_vhca_access_in, in, access_key); + memcpy(key, attr->access_key, sizeof(attr->access_key)); + + ret = mlx5_glue->devx_general_cmd(ctx, in, sizeof(in), out, sizeof(out)); + if (ret) { + DR_LOG(ERR, "Failed to execute ALLOW_OTHER_VHCA_ACCESS command"); + rte_errno = errno; + return rte_errno; + } + + return 0; +} + int mlx5dr_cmd_query_caps(struct ibv_context *ctx, struct mlx5dr_cmd_query_caps *caps) { diff --git a/drivers/net/mlx5/hws/mlx5dr_cmd.h b/drivers/net/mlx5/hws/mlx5dr_cmd.h index ab61e27fd8..ea6ced9d27 100644 --- a/drivers/net/mlx5/hws/mlx5dr_cmd.h +++ b/drivers/net/mlx5/hws/mlx5dr_cmd.h @@ -124,6 +124,12 @@ struct mlx5dr_cmd_sq_create_attr { uint32_t ts_format; }; +struct mlx5dr_cmd_allow_other_vhca_access_attr { + uint16_t obj_type; + uint32_t obj_id; + uint8_t access_key[32]; +}; + struct mlx5dr_cmd_query_ft_caps { uint8_t max_level; uint8_t reparse; @@ -230,4 +236,7 @@ void mlx5dr_cmd_set_attr_connect_miss_tbl(struct mlx5dr_context *ctx, uint32_t fw_ft_type, enum mlx5dr_table_type type, struct mlx5dr_cmd_ft_modify_attr *ft_attr); + +int mlx5dr_cmd_allow_other_vhca_access(struct ibv_context *ctx, + struct mlx5dr_cmd_allow_other_vhca_access_attr *attr); #endif /* MLX5DR_CMD_H_ */ -- 2.18.2
[PATCH V1 11/11] net/mlx5/hws: add debug details for cross gvmi
For context, table and matcher. Signed-off-by: Erez Shitrit Signed-off-by: Hamdan Igbaria Reviewed-by: Alex Vesker --- drivers/net/mlx5/hws/mlx5dr_debug.c | 22 -- 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/drivers/net/mlx5/hws/mlx5dr_debug.c b/drivers/net/mlx5/hws/mlx5dr_debug.c index 55011c208d..239dfcae46 100644 --- a/drivers/net/mlx5/hws/mlx5dr_debug.c +++ b/drivers/net/mlx5/hws/mlx5dr_debug.c @@ -177,6 +177,7 @@ mlx5dr_debug_dump_matcher_attr(FILE *f, struct mlx5dr_matcher *matcher) static int mlx5dr_debug_dump_matcher(FILE *f, struct mlx5dr_matcher *matcher) { + bool is_shared = mlx5dr_context_shared_gvmi_used(matcher->tbl->ctx); bool is_root = matcher->tbl->level == MLX5DR_ROOT_LEVEL; enum mlx5dr_table_type tbl_type = matcher->tbl->type; struct mlx5dr_devx_obj *ste_0, *ste_1 = NULL; @@ -205,11 +206,13 @@ static int mlx5dr_debug_dump_matcher(FILE *f, struct mlx5dr_matcher *matcher) ste_1 = NULL; } - ret = fprintf(f, ",%d,%d,%d,%d", + ret = fprintf(f, ",%d,%d,%d,%d,%d", matcher->match_ste.rtc_0 ? matcher->match_ste.rtc_0->id : 0, ste_0 ? (int)ste_0->id : -1, matcher->match_ste.rtc_1 ? matcher->match_ste.rtc_1->id : 0, - ste_1 ? (int)ste_1->id : -1); + ste_1 ? (int)ste_1->id : -1, + is_shared && !is_root ? + matcher->match_ste.aliased_rtc_0->id : 0); if (ret < 0) goto out_err; @@ -253,18 +256,20 @@ static int mlx5dr_debug_dump_matcher(FILE *f, struct mlx5dr_matcher *matcher) static int mlx5dr_debug_dump_table(FILE *f, struct mlx5dr_table *tbl) { + bool is_shared = mlx5dr_context_shared_gvmi_used(tbl->ctx); bool is_root = tbl->level == MLX5DR_ROOT_LEVEL; struct mlx5dr_matcher *matcher; int ret; - ret = fprintf(f, "%d,0x%" PRIx64 ",0x%" PRIx64 ",%d,%d,%d,%d\n", + ret = fprintf(f, "%d,0x%" PRIx64 ",0x%" PRIx64 ",%d,%d,%d,%d,%d\n", MLX5DR_DEBUG_RES_TYPE_TABLE, (uint64_t)(uintptr_t)tbl, (uint64_t)(uintptr_t)tbl->ctx, is_root ? 0 : tbl->ft->id, tbl->type, is_root ? 0 : tbl->fw_ft_type, - tbl->level); + tbl->level, + is_shared && !is_root ? tbl->local_ft->id : 0); if (ret < 0) { rte_errno = EINVAL; return rte_errno; @@ -383,12 +388,17 @@ static int mlx5dr_debug_dump_context_attr(FILE *f, struct mlx5dr_context *ctx) { int ret; - ret = fprintf(f, "%u,0x%" PRIx64 ",%d,%zu,%d\n", + ret = fprintf(f, "%u,0x%" PRIx64 ",%d,%zu,%d,%s,%d,%d\n", MLX5DR_DEBUG_RES_TYPE_CONTEXT_ATTR, (uint64_t)(uintptr_t)ctx, ctx->pd_num, ctx->queues, - ctx->send_queue->num_entries); + ctx->send_queue->num_entries, + mlx5dr_context_shared_gvmi_used(ctx) ? + mlx5_glue->get_device_name(ctx->ibv_ctx->device) : "None", + ctx->caps->vhca_id, + mlx5dr_context_shared_gvmi_used(ctx) ? + ctx->caps->shared_vhca_id : 0x); if (ret < 0) { rte_errno = EINVAL; return rte_errno; -- 2.18.2
[PATCH V1 07/11] net/mlx5/hws: added command to create alias objects
From: Yevgeny Kliteynik Added support for a command that can create alias objects - objects that are accessed across VHCA. Signed-off-by: Yevgeny Kliteynik Reviewed-by: Alex Vesker --- drivers/common/mlx5/mlx5_prm.h| 23 ++-- drivers/net/mlx5/hws/mlx5dr_cmd.c | 44 +++ drivers/net/mlx5/hws/mlx5dr_cmd.h | 11 3 files changed, 76 insertions(+), 2 deletions(-) diff --git a/drivers/common/mlx5/mlx5_prm.h b/drivers/common/mlx5/mlx5_prm.h index 9d36645949..fd1ad55f1f 100644 --- a/drivers/common/mlx5/mlx5_prm.h +++ b/drivers/common/mlx5/mlx5_prm.h @@ -3135,9 +3135,10 @@ struct mlx5_ifc_general_obj_in_cmd_hdr_bits { u8 obj_id[0x20]; union { struct { - u8 reserved_at_60[0x3]; + u8 alias_object[0x1]; + u8 reserved_at_61[0x2]; u8 log_obj_range[0x5]; - u8 reserved_at_58[0x18]; + u8 reserved_at_68[0x18]; }; u8 obj_offset[0x20]; }; @@ -3234,6 +3235,19 @@ struct mlx5_ifc_rtc_bits { u8 reserved_at_180[0x280]; }; +struct mlx5_ifc_alias_context_bits { + u8 vhca_id_to_be_accessed[0x10]; + u8 reserved_at_10[0xd]; + u8 status[0x3]; + u8 object_id_to_be_accessed[0x20]; + u8 reserved_at_40[0x40]; + union { + u8 access_key_raw[0x100]; + u8 access_key[8][0x20]; + }; + u8 metadata[0x80]; +}; + enum mlx5_ifc_stc_action_type { MLX5_IFC_STC_ACTION_TYPE_NOP = 0x00, MLX5_IFC_STC_ACTION_TYPE_COPY = 0x05, @@ -3478,6 +3492,11 @@ struct mlx5_ifc_create_header_modify_pattern_in_bits { struct mlx5_ifc_header_modify_pattern_in_bits pattern; }; +struct mlx5_ifc_create_alias_obj_in_bits { + struct mlx5_ifc_general_obj_in_cmd_hdr_bits hdr; + struct mlx5_ifc_alias_context_bits alias_ctx; +}; + enum { MLX5_CRYPTO_KEY_SIZE_128b = 0x0, MLX5_CRYPTO_KEY_SIZE_256b = 0x1, diff --git a/drivers/net/mlx5/hws/mlx5dr_cmd.c b/drivers/net/mlx5/hws/mlx5dr_cmd.c index b120be2d88..9b9f70c933 100644 --- a/drivers/net/mlx5/hws/mlx5dr_cmd.c +++ b/drivers/net/mlx5/hws/mlx5dr_cmd.c @@ -744,6 +744,50 @@ int mlx5dr_cmd_allow_other_vhca_access(struct ibv_context *ctx, return 0; } +struct mlx5dr_devx_obj * +mlx5dr_cmd_alias_obj_create(struct ibv_context *ctx, + struct mlx5dr_cmd_alias_obj_create_attr *alias_attr) +{ + uint32_t out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0}; + uint32_t in[MLX5_ST_SZ_DW(create_alias_obj_in)] = {0}; + struct mlx5dr_devx_obj *devx_obj; + void *attr; + void *key; + + devx_obj = simple_malloc(sizeof(*devx_obj)); + if (!devx_obj) { + DR_LOG(ERR, "Failed to allocate memory for ALIAS general object"); + rte_errno = ENOMEM; + return NULL; + } + + attr = MLX5_ADDR_OF(create_alias_obj_in, in, hdr); + MLX5_SET(general_obj_in_cmd_hdr, +attr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, +attr, obj_type, alias_attr->obj_type); + MLX5_SET(general_obj_in_cmd_hdr, attr, alias_object, 1); + + attr = MLX5_ADDR_OF(create_alias_obj_in, in, alias_ctx); + MLX5_SET(alias_context, attr, vhca_id_to_be_accessed, alias_attr->vhca_id); + MLX5_SET(alias_context, attr, object_id_to_be_accessed, alias_attr->obj_id); + + key = MLX5_ADDR_OF(alias_context, attr, access_key); + memcpy(key, alias_attr->access_key, sizeof(alias_attr->access_key)); + + devx_obj->obj = mlx5_glue->devx_obj_create(ctx, in, sizeof(in), out, sizeof(out)); + if (!devx_obj->obj) { + DR_LOG(ERR, "Failed to create ALIAS OBJ"); + simple_free(devx_obj); + rte_errno = errno; + return NULL; + } + + devx_obj->id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); + + return devx_obj; +} + int mlx5dr_cmd_query_caps(struct ibv_context *ctx, struct mlx5dr_cmd_query_caps *caps) { diff --git a/drivers/net/mlx5/hws/mlx5dr_cmd.h b/drivers/net/mlx5/hws/mlx5dr_cmd.h index ea6ced9d27..824ca5e846 100644 --- a/drivers/net/mlx5/hws/mlx5dr_cmd.h +++ b/drivers/net/mlx5/hws/mlx5dr_cmd.h @@ -46,6 +46,13 @@ struct mlx5dr_cmd_rtc_create_attr { bool is_jumbo; }; +struct mlx5dr_cmd_alias_obj_create_attr { + uint32_t obj_id; + uint16_t vhca_id; + uint16_t obj_type; + uint8_t access_key[32]; +}; + struct mlx5dr_cmd_stc_create_attr { uint8_t log_obj_range; uint8_t table_type; @@ -217,6 +224,10 @@ mlx5dr_cmd_header_modify_pattern_create(struct ibv_context *ctx, uint32_t pattern_length, uint8_t *actions); +struct mlx5dr_devx_obj * +mlx5dr
[PATCH V1 08/11] net/mlx5/hws: add vhca identifier ID to the caps
And read it in the query_cap function Signed-off-by: Erez Shitrit Reviewed-by: Alex Vesker --- drivers/net/mlx5/hws/mlx5dr_cmd.c | 3 +++ drivers/net/mlx5/hws/mlx5dr_cmd.h | 1 + 2 files changed, 4 insertions(+) diff --git a/drivers/net/mlx5/hws/mlx5dr_cmd.c b/drivers/net/mlx5/hws/mlx5dr_cmd.c index 9b9f70c933..d525867de5 100644 --- a/drivers/net/mlx5/hws/mlx5dr_cmd.c +++ b/drivers/net/mlx5/hws/mlx5dr_cmd.c @@ -837,6 +837,9 @@ int mlx5dr_cmd_query_caps(struct ibv_context *ctx, MLX5_GET64(query_hca_cap_out, out, capability.cmd_hca_cap.match_definer_format_supported); + caps->vhca_id = MLX5_GET(query_hca_cap_out, out, +capability.cmd_hca_cap.vhca_id); + caps->sq_ts_format = MLX5_GET(query_hca_cap_out, out, capability.cmd_hca_cap.sq_ts_format); diff --git a/drivers/net/mlx5/hws/mlx5dr_cmd.h b/drivers/net/mlx5/hws/mlx5dr_cmd.h index 824ca5e846..8b8d5d00b0 100644 --- a/drivers/net/mlx5/hws/mlx5dr_cmd.h +++ b/drivers/net/mlx5/hws/mlx5dr_cmd.h @@ -176,6 +176,7 @@ struct mlx5dr_cmd_query_caps { uint8_t sq_ts_format; uint64_t definer_format_sup; uint32_t trivial_match_definer; + uint32_t vhca_id; bool cross_vhca_resources; char fw_ver[64]; }; -- 2.18.2
[PATCH V1 09/11] net/mlx5/hws: support shared ibv-context with local one
The idea is to have a shared ibv_context that all the resources allocated on it (FT + TIR are exceptions) When ever a resource created locally an alias object to that resource allocated and it used in the other context. The connections between the resources are done according to each type of the resource, to the original resource or to its alias resource. Signed-off-by: Erez Shitrit Reviewed-by: Alex Vesker --- drivers/common/mlx5/mlx5_prm.h | 6 + drivers/net/mlx5/hws/mlx5dr.h | 2 + drivers/net/mlx5/hws/mlx5dr_action.c | 9 +- drivers/net/mlx5/hws/mlx5dr_cmd.c | 19 ++- drivers/net/mlx5/hws/mlx5dr_cmd.h | 7 +- drivers/net/mlx5/hws/mlx5dr_context.c | 34 +++- drivers/net/mlx5/hws/mlx5dr_context.h | 22 +++ drivers/net/mlx5/hws/mlx5dr_debug.c| 2 +- drivers/net/mlx5/hws/mlx5dr_internal.h | 1 + drivers/net/mlx5/hws/mlx5dr_matcher.c | 218 +++-- drivers/net/mlx5/hws/mlx5dr_matcher.h | 9 + drivers/net/mlx5/hws/mlx5dr_table.c| 191 +- drivers/net/mlx5/hws/mlx5dr_table.h| 4 +- 13 files changed, 484 insertions(+), 40 deletions(-) diff --git a/drivers/common/mlx5/mlx5_prm.h b/drivers/common/mlx5/mlx5_prm.h index fd1ad55f1f..a716a33e84 100644 --- a/drivers/common/mlx5/mlx5_prm.h +++ b/drivers/common/mlx5/mlx5_prm.h @@ -291,6 +291,12 @@ #define MAX_ACTIONS_DATA_IN_HEADER_MODIFY 512 +/* Alias FT id passed to the ALLOW_OTHER_VHCA_ACCESS & CREATE_GENERAL_OBJECT + * commands should have the following format: + * {table_type: 8bits, table_id: 24bits}. + */ +#define FT_ID_FT_TYPE_OFFSET 24 + /* Completion mode. */ enum mlx5_completion_mode { MLX5_COMP_ONLY_ERR = 0x0, diff --git a/drivers/net/mlx5/hws/mlx5dr.h b/drivers/net/mlx5/hws/mlx5dr.h index f8de27c615..aa36e3111f 100644 --- a/drivers/net/mlx5/hws/mlx5dr.h +++ b/drivers/net/mlx5/hws/mlx5dr.h @@ -96,6 +96,8 @@ struct mlx5dr_context_attr { size_t initial_log_ste_memory; /* Currently not in use */ /* Optional PD used for allocating res ources */ struct ibv_pd *pd; + /* Optional other ctx for resources allocation, all objects will be created on it */ + struct ibv_context *shared_ibv_ctx; }; struct mlx5dr_table_attr { diff --git a/drivers/net/mlx5/hws/mlx5dr_action.c b/drivers/net/mlx5/hws/mlx5dr_action.c index b0ae4e7693..da19c1ca7d 100644 --- a/drivers/net/mlx5/hws/mlx5dr_action.c +++ b/drivers/net/mlx5/hws/mlx5dr_action.c @@ -1147,6 +1147,7 @@ mlx5dr_action_create_reformat_root(struct mlx5dr_action *action, { enum mlx5dv_flow_table_type ft_type = 0; /*fix compilation warn*/ uint32_t verb_reformat_type = 0; + struct ibv_context *ibv_ctx; int ret; /* Convert action to FT type and verbs reformat type */ @@ -1157,8 +1158,9 @@ mlx5dr_action_create_reformat_root(struct mlx5dr_action *action, mlx5dr_action_conv_reformat_to_verbs(action->type, &verb_reformat_type); /* Create the reformat type for root table */ + ibv_ctx = mlx5dr_context_get_local_ibv(action->ctx); action->flow_action = - mlx5_glue->dv_create_flow_action_packet_reformat_root(action->ctx->ibv_ctx, + mlx5_glue->dv_create_flow_action_packet_reformat_root(ibv_ctx, data_sz, data, verb_reformat_type, @@ -1496,14 +1498,17 @@ mlx5dr_action_create_modify_header_root(struct mlx5dr_action *action, __be64 *actions) { enum mlx5dv_flow_table_type ft_type = 0; + struct ibv_context *local_ibv_ctx; int ret; ret = mlx5dr_action_conv_flags_to_ft_type(action->flags, &ft_type); if (ret) return rte_errno; + local_ibv_ctx = mlx5dr_context_get_local_ibv(action->ctx); + action->flow_action = - mlx5_glue->dv_create_flow_action_modify_header_root(action->ctx->ibv_ctx, + mlx5_glue->dv_create_flow_action_modify_header_root(local_ibv_ctx, actions_sz, (uint64_t *)actions, ft_type); diff --git a/drivers/net/mlx5/hws/mlx5dr_cmd.c b/drivers/net/mlx5/hws/mlx5dr_cmd.c index d525867de5..754a424bd7 100644 --- a/drivers/net/mlx5/hws/mlx5dr_cmd.c +++ b/drivers/net/mlx5/hws/mlx5dr_cmd.c @@ -217,18 +217,23 @@ void mlx5dr_cmd_set_attr_connect_miss_tbl(struct mlx5dr_context *ctx, { struct mlx5dr_devx_obj *default_miss_tbl; - if (type != MLX5DR_TABLE_TYPE_FDB) + if (type != MLX5DR_TABLE_TYPE_FDB && !mlx5dr_context_shared_gvmi_used(ctx)) return; - default_miss_tbl = ctx->common_res[type
[PATCH V1 10/11] net/mlx5/hws: support actions while shared resources is used
TIR/FT actions are different in the context of shared ibv resource, it created on the local ibv_context and aliased to the shared ibv_context. Other actions should be created on the shared ibv resource, new flag was added to indicates where this object came from. Signed-off-by: Erez Shitrit Reviewed-by: Alex Vesker --- drivers/net/mlx5/hws/mlx5dr.h| 5 ++- drivers/net/mlx5/hws/mlx5dr_action.c | 60 ++-- drivers/net/mlx5/hws/mlx5dr_action.h | 3 ++ drivers/net/mlx5/mlx5_devx.c | 2 +- 4 files changed, 64 insertions(+), 6 deletions(-) diff --git a/drivers/net/mlx5/hws/mlx5dr.h b/drivers/net/mlx5/hws/mlx5dr.h index aa36e3111f..c0f9a5e984 100644 --- a/drivers/net/mlx5/hws/mlx5dr.h +++ b/drivers/net/mlx5/hws/mlx5dr.h @@ -410,12 +410,15 @@ mlx5dr_action_create_dest_vport(struct mlx5dr_context *ctx, * Direct rule TIR devx object. * @param[in] flags * Action creation flags. (enum mlx5dr_action_flags) + * @param[in] is_local + * indicates where the tir object was created, local gvmi or other gvmi * @return pointer to mlx5dr_action on success NULL otherwise. */ struct mlx5dr_action * mlx5dr_action_create_dest_tir(struct mlx5dr_context *ctx, struct mlx5dr_devx_obj *obj, - uint32_t flags); + uint32_t flags, + bool is_local); /* Create direct rule TAG action. * diff --git a/drivers/net/mlx5/hws/mlx5dr_action.c b/drivers/net/mlx5/hws/mlx5dr_action.c index da19c1ca7d..2db62635c1 100644 --- a/drivers/net/mlx5/hws/mlx5dr_action.c +++ b/drivers/net/mlx5/hws/mlx5dr_action.c @@ -744,7 +744,10 @@ mlx5dr_action_create_dest_table(struct mlx5dr_context *ctx, return NULL; if (mlx5dr_action_is_root_flags(flags)) { - action->devx_obj = tbl->ft->obj; + if (mlx5dr_context_shared_gvmi_used(ctx)) + action->devx_obj = tbl->local_ft->obj; + else + action->devx_obj = tbl->ft->obj; } else { ret = mlx5dr_action_create_stcs(action, tbl->ft); if (ret) @@ -758,10 +761,38 @@ mlx5dr_action_create_dest_table(struct mlx5dr_context *ctx, return NULL; } +static int mlx5dr_action_get_dest_tir_obj(struct mlx5dr_context *ctx, + struct mlx5dr_action *action, + struct mlx5dr_devx_obj *obj, + struct mlx5dr_devx_obj **ret_obj) +{ + int ret; + + if (mlx5dr_context_shared_gvmi_used(ctx)) { + ret = mlx5dr_matcher_create_aliased_obj(ctx, + ctx->local_ibv_ctx, + ctx->ibv_ctx, + ctx->caps->vhca_id, + obj->id, + MLX5_GENERAL_OBJ_TYPE_TIR_ALIAS, + &action->alias.devx_obj); + if (ret) { + DR_LOG(ERR, "Failed to create tir alias"); + return rte_errno; + } + *ret_obj = action->alias.devx_obj; + } else { + *ret_obj = obj; + } + + return 0; +} + struct mlx5dr_action * mlx5dr_action_create_dest_tir(struct mlx5dr_context *ctx, struct mlx5dr_devx_obj *obj, - uint32_t flags) + uint32_t flags, + bool is_local) { struct mlx5dr_action *action; int ret; @@ -773,6 +804,13 @@ mlx5dr_action_create_dest_tir(struct mlx5dr_context *ctx, return NULL; } + if (!is_local) { + DR_LOG(ERR, "TIR should be created on local ibv_device, flags: 0x%x", + flags); + rte_errno = ENOTSUP; + return NULL; + } + action = mlx5dr_action_create_generic(ctx, flags, MLX5DR_ACTION_TYP_TIR); if (!action) return NULL; @@ -780,13 +818,23 @@ mlx5dr_action_create_dest_tir(struct mlx5dr_context *ctx, if (mlx5dr_action_is_root_flags(flags)) { action->devx_obj = obj->obj; } else { - ret = mlx5dr_action_create_stcs(action, obj); - if (ret) + struct mlx5dr_devx_obj *cur_obj = NULL; /*compilation warn*/ + + ret = mlx5dr_action_get_dest_tir_obj(ctx, action, obj, &cur_obj); + if (ret) { + DR_LOG(ERR, "Failed to create tir alias (flags: %d)", flags); goto free_action; + } + + ret = mlx5dr_action_create_stcs(action, cur_obj); + if (ret) +
RE: [PATCH v2] mempool: micro-optimize put function
PING mempool maintainers. If you don't provide ACK or Review to patches, how should Thomas know that it is ready for merging? -Morten > From: Morten Brørup [mailto:m...@smartsharesystems.com] > Sent: Wednesday, 16 November 2022 18.39 > > > From: Honnappa Nagarahalli [mailto:honnappa.nagaraha...@arm.com] > > Sent: Wednesday, 16 November 2022 17.27 > > > > > > > > > > > > > > > > Micro-optimization: > > > > > Reduced the most likely code path in the generic put function > by > > > > moving an > > > > > unlikely check out of the most likely code path and further > down. > > > > > > > > > > Also updated the comments in the function. > > > > > > > > > > v2 (feedback from Andrew Rybchenko): > > > > > * Modified comparison to prevent overflow if n is really huge > and > > > > > len > > > > is > > > > > non-zero. > > > > > * Added assertion about the invariant preventing overflow in > the > > > > > comparison. > > > > > * Crossing the threshold is not extremely unlikely, so removed > > > > likely() > > > > > from that comparison. > > > > > The compiler will generate code with optimal static branch > > > > prediction > > > > > here anyway. > > > > > > > > > > Signed-off-by: Morten Brørup > > > > > --- > > > > > lib/mempool/rte_mempool.h | 36 --- > -- > > --- > > > > > 1 file changed, 20 insertions(+), 16 deletions(-) > > > > > > > > > > diff --git a/lib/mempool/rte_mempool.h > > b/lib/mempool/rte_mempool.h > > > > > index 9f530db24b..dd1a3177d6 100644 > > > > > --- a/lib/mempool/rte_mempool.h > > > > > +++ b/lib/mempool/rte_mempool.h > > > > > @@ -1364,32 +1364,36 @@ rte_mempool_do_generic_put(struct > > > > > rte_mempool *mp, void * const *obj_table, { > > > > > void **cache_objs; > > > > > > > > > > - /* No cache provided */ > > > > > + /* No cache provided? */ > > > > > if (unlikely(cache == NULL)) > > > > > goto driver_enqueue; > > > > > > > > > > - /* increment stat now, adding in mempool always success */ > > > > > + /* Increment stats now, adding in mempool always succeeds. > > */ > > > > > RTE_MEMPOOL_CACHE_STAT_ADD(cache, put_bulk, 1); > > > > > RTE_MEMPOOL_CACHE_STAT_ADD(cache, put_objs, n); > > > > > > > > > > - /* The request itself is too big for the cache */ > > > > > - if (unlikely(n > cache->flushthresh)) > > > > > - goto driver_enqueue_stats_incremented; > > > > > - > > > > > - /* > > > > > - * The cache follows the following algorithm: > > > > > - * 1. If the objects cannot be added to the cache without > > > > crossing > > > > > - * the flush threshold, flush the cache to the > > backend. > > > > > - * 2. Add the objects to the cache. > > > > > - */ > > > > > + /* Assert the invariant preventing overflow in the > > comparison > > > > below. > > > > > */ > > > > > + RTE_ASSERT(cache->len <= cache->flushthresh); > > > > > > > > > > - if (cache->len + n <= cache->flushthresh) { > > > > > + if (n <= cache->flushthresh - cache->len) { > > > > > + /* > > > > > + * The objects can be added to the cache without > > crossing > > > > the > > > > > + * flush threshold. > > > > > + */ > > > > > cache_objs = &cache->objs[cache->len]; > > > > > cache->len += n; > > > > > - } else { > > > > > + } else if (likely(n <= cache->flushthresh)) { > > > > IMO, this is a misconfiguration on the application part. In the > > PMDs I > > > > have looked at, max value of 'n' is controlled by compile time > > > > constants. Application could do a compile time check on the cache > > > > threshold or we could have another RTE_ASSERT on this. > > > > > > There could be applications using a mempool for something else than > > mbufs. > > Agree > > > > > > > > In that case, the application should be allowed to get/put many > > objects in > > > one transaction. > > Still, this is a misconfiguration on the application. On one hand the > > threshold is configured for 'x' but they are sending a request which > is > > more than 'x'. It should be possible to change the threshold > > configuration or reduce the request size. > > > > If the application does not fix the misconfiguration, it is possible > > that it will always hit this case and does not get the benefit of > using > > the per-core cache. > > Correct. I suppose this is the intended behavior of this API. > > The zero-copy API proposed in another patch [1] has stricter > requirements to the bulk size. > > [1]: http://inbox.dpdk.org/dev/20221115161822.70886-1- > m...@smartsharesystems.com/T/#u > > > > > With this check, we are introducing an additional memcpy as well. I > am > > not sure if reusing the latest buffers is better than having an > memcpy. > > There is no additional memcpy. The large bulk transfer is stored > directly in the backend pool, bypassing the mempool cache. > > Please note that this check is no
RE: [PATCH v19.11 LTS] kni: fix build for Suse
> From: Ferruh Yigit > Sent: 2022年12月16日 20:11 > To: Thomas Monjalon > Cc: dev@dpdk.org; David Marchand ; > sta...@dpdk.org; christian.ehrha...@canonical.com; Gao, DaxueX > > Subject: [PATCH v19.11 LTS] kni: fix build for Suse > > Wrong macro is used in the patch that detects if '.ndo_tx_timeout' > has 'txqueue' parameter or not, which is causing build error. > > Fixed by using correct macro. > > Bugzilla ID: 1141 > Fixes: d43fa3e198c0 ("kni: fix build for SLES15-SP3 (Make)") > Cc: sta...@dpdk.org > > Signed-off-by: Ferruh Yigit Tested-by: Daxue Gao
[PATCH v5 0/5] support dmadev/ethdev stats reset
This patchset contains dmadev/ethdev stats reset, and also support hide zero for ethdev xstats. Chengwen Feng (5): dmadev: support stats reset telemetry command telemetry: fix repeated display when callback don't set dict ethdev: support xstats reset telemetry command ethdev: telemetry xstats support hide zero ethdev: add newline to telemetry log string lib/dmadev/rte_dmadev.c | 40 +++ lib/ethdev/rte_ethdev.c | 57 --- lib/telemetry/telemetry.c | 4 ++- 3 files changed, 91 insertions(+), 10 deletions(-) -- 2.17.1
[PATCH v5 1/5] dmadev: support stats reset telemetry command
The stats reset is useful for debugging, so add it to the dmadev telemetry command lists. Signed-off-by: Chengwen Feng --- lib/dmadev/rte_dmadev.c | 40 1 file changed, 40 insertions(+) diff --git a/lib/dmadev/rte_dmadev.c b/lib/dmadev/rte_dmadev.c index 4da653eec7..93e15d4972 100644 --- a/lib/dmadev/rte_dmadev.c +++ b/lib/dmadev/rte_dmadev.c @@ -994,6 +994,44 @@ dmadev_handle_dev_stats(const char *cmd __rte_unused, return 0; } +static int +dmadev_handle_dev_stats_reset(const char *cmd __rte_unused, + const char *params, + struct rte_tel_data *d __rte_unused) +{ + struct rte_dma_info dma_info; + int dev_id, ret, vchan_id; + const char *vchan_param; + char *end_param; + + if (params == NULL || strlen(params) == 0 || !isdigit(*params)) + return -EINVAL; + + dev_id = strtoul(params, &end_param, 0); + + /* Function info_get validates dev_id so we don't need to. */ + ret = rte_dma_info_get(dev_id, &dma_info); + if (ret < 0) + return -EINVAL; + + /* If the device has one vchan the user does not need to supply the +* vchan id and only the device id is needed, no extra parameters. +*/ + if (dma_info.nb_vchans == 1 && *end_param == '\0') + vchan_id = 0; + else { + vchan_param = strtok(end_param, ","); + if (!vchan_param || strlen(vchan_param) == 0 || !isdigit(*vchan_param)) + return -EINVAL; + + vchan_id = strtoul(vchan_param, &end_param, 0); + } + if (*end_param != '\0') + RTE_DMA_LOG(WARNING, "Extra parameters passed to dmadev telemetry command, ignoring"); + + return rte_dma_stats_reset(dev_id, vchan_id); +} + #ifndef RTE_EXEC_ENV_WINDOWS static int dmadev_handle_dev_dump(const char *cmd __rte_unused, @@ -1041,6 +1079,8 @@ RTE_INIT(dmadev_init_telemetry) "Returns information for a dmadev. Parameters: int dev_id"); rte_telemetry_register_cmd("/dmadev/stats", dmadev_handle_dev_stats, "Returns the stats for a dmadev vchannel. Parameters: int dev_id, vchan_id (Optional if only one vchannel)"); + rte_telemetry_register_cmd("/dmadev/stats_reset", dmadev_handle_dev_stats_reset, + "Reset the stats for a dmadev vchannel. Parameters: int dev_id, vchan_id (Optional if only one vchannel)"); #ifndef RTE_EXEC_ENV_WINDOWS rte_telemetry_register_cmd("/dmadev/dump", dmadev_handle_dev_dump, "Returns dump information for a dmadev. Parameters: int dev_id"); -- 2.17.1
[PATCH v5 2/5] telemetry: fix repeated display when callback don't set dict
When telemetry callback didn't set dict and return a non-negative number, the telemetry will repeat to display the last result. Fixes: 6dd571fd07c3 ("telemetry: introduce new functionality") Cc: sta...@dpdk.org Signed-off-by: Chengwen Feng --- lib/telemetry/telemetry.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/telemetry/telemetry.c b/lib/telemetry/telemetry.c index 8fbb4f3060..a6c84e3499 100644 --- a/lib/telemetry/telemetry.c +++ b/lib/telemetry/telemetry.c @@ -334,8 +334,10 @@ static void perform_command(telemetry_cb fn, const char *cmd, const char *param, int s) { struct rte_tel_data data; + int ret; - int ret = fn(cmd, param, &data); + rte_tel_data_start_dict(&data); + ret = fn(cmd, param, &data); if (ret < 0) { char out_buf[MAX_CMD_LEN + 10]; int used = snprintf(out_buf, sizeof(out_buf), "{\"%.*s\":null}", -- 2.17.1
[PATCH v5 3/5] ethdev: support xstats reset telemetry command
The xstats reset is useful for debugging, so add it to the ethdev telemetry command lists. Signed-off-by: Chengwen Feng --- lib/ethdev/rte_ethdev.c | 23 +++ 1 file changed, 23 insertions(+) diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c index 5d5e18db1e..0774de81b0 100644 --- a/lib/ethdev/rte_ethdev.c +++ b/lib/ethdev/rte_ethdev.c @@ -5915,6 +5915,27 @@ eth_dev_handle_port_xstats(const char *cmd __rte_unused, return 0; } +static int +eth_dev_handle_port_xstats_reset(const char *cmd __rte_unused, + const char *params, + struct rte_tel_data *d __rte_unused) +{ + char *end_param; + int port_id; + + if (params == NULL || strlen(params) == 0 || !isdigit(*params)) + return -1; + + port_id = strtoul(params, &end_param, 0); + if (*end_param != '\0') + RTE_ETHDEV_LOG(NOTICE, + "Extra parameters passed to ethdev telemetry command, ignoring\n"); + if (!rte_eth_dev_is_valid_port(port_id)) + return -1; + + return rte_eth_xstats_reset(port_id); +} + #ifndef RTE_EXEC_ENV_WINDOWS static int eth_dev_handle_port_dump_priv(const char *cmd __rte_unused, @@ -6328,6 +6349,8 @@ RTE_INIT(ethdev_init_telemetry) "Returns the common stats for a port. Parameters: int port_id"); rte_telemetry_register_cmd("/ethdev/xstats", eth_dev_handle_port_xstats, "Returns the extended stats for a port. Parameters: int port_id"); + rte_telemetry_register_cmd("/ethdev/xstats_reset", eth_dev_handle_port_xstats_reset, + "Reset the extended stats for a port. Parameters: int port_id"); #ifndef RTE_EXEC_ENV_WINDOWS rte_telemetry_register_cmd("/ethdev/dump_priv", eth_dev_handle_port_dump_priv, "Returns dump private information for a port. Parameters: int port_id"); -- 2.17.1
[PATCH v5 5/5] ethdev: add newline to telemetry log string
The telemetry related code may invoke RTE_ETHDEV_LOG to display information, the newline character is not added automatically to the RTE_ETHDEV_LOG, therefore, the newline character must be explicitly added to the log string. Cc: sta...@dpdk.org Signed-off-by: Chengwen Feng --- lib/ethdev/rte_ethdev.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c index 2075086eeb..30f79b7d4c 100644 --- a/lib/ethdev/rte_ethdev.c +++ b/lib/ethdev/rte_ethdev.c @@ -5968,7 +5968,7 @@ eth_dev_handle_port_dump_priv(const char *cmd __rte_unused, port_id = strtoul(params, &end_param, 0); if (*end_param != '\0') RTE_ETHDEV_LOG(NOTICE, - "Extra parameters passed to ethdev telemetry command, ignoring"); + "Extra parameters passed to ethdev telemetry command, ignoring\n"); if (!rte_eth_dev_is_valid_port(port_id)) return -EINVAL; @@ -6010,7 +6010,7 @@ eth_dev_handle_port_link_status(const char *cmd __rte_unused, port_id = strtoul(params, &end_param, 0); if (*end_param != '\0') RTE_ETHDEV_LOG(NOTICE, - "Extra parameters passed to ethdev telemetry command, ignoring"); + "Extra parameters passed to ethdev telemetry command, ignoring\n"); if (!rte_eth_dev_is_valid_port(port_id)) return -1; @@ -6048,7 +6048,7 @@ eth_dev_handle_port_info(const char *cmd __rte_unused, port_id = strtoul(params, &end_param, 0); if (*end_param != '\0') RTE_ETHDEV_LOG(NOTICE, - "Extra parameters passed to ethdev telemetry command, ignoring"); + "Extra parameters passed to ethdev telemetry command, ignoring\n"); if (!rte_eth_dev_is_valid_port(port_id)) return -EINVAL; -- 2.17.1
[PATCH v5 4/5] ethdev: telemetry xstats support hide zero
The number of xstats may be large, after the hide zero option is added, only non-zero values can be displayed. Signed-off-by: Chengwen Feng --- lib/ethdev/rte_ethdev.c | 28 ++-- 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c index 0774de81b0..2075086eeb 100644 --- a/lib/ethdev/rte_ethdev.c +++ b/lib/ethdev/rte_ethdev.c @@ -5870,20 +5870,33 @@ eth_dev_handle_port_xstats(const char *cmd __rte_unused, { struct rte_eth_xstat *eth_xstats; struct rte_eth_xstat_name *xstat_names; + char *end_param, *hide_param; int port_id, num_xstats; + int hide_zero = 0; int i, ret; - char *end_param; if (params == NULL || strlen(params) == 0 || !isdigit(*params)) return -1; port_id = strtoul(params, &end_param, 0); - if (*end_param != '\0') - RTE_ETHDEV_LOG(NOTICE, - "Extra parameters passed to ethdev telemetry command, ignoring"); if (!rte_eth_dev_is_valid_port(port_id)) return -1; + if (*end_param != '\0') { + hide_param = strtok(end_param, ","); + if (!hide_param || strlen(hide_param) == 0 || !isdigit(*hide_param)) + return -EINVAL; + hide_zero = strtoul(hide_param, &end_param, 0); + if (*end_param != '\0') + RTE_ETHDEV_LOG(NOTICE, + "Extra parameters passed to ethdev telemetry command, ignoring\n"); + if (hide_zero != 0 && hide_zero != 1) { + hide_zero = !!hide_zero; + RTE_ETHDEV_LOG(NOTICE, + "Hide zero parameter is non-boolean, cast to boolean\n"); + } + } + num_xstats = rte_eth_xstats_get(port_id, NULL, 0); if (num_xstats < 0) return -1; @@ -5908,9 +5921,12 @@ eth_dev_handle_port_xstats(const char *cmd __rte_unused, } rte_tel_data_start_dict(d); - for (i = 0; i < num_xstats; i++) + for (i = 0; i < num_xstats; i++) { + if (hide_zero && eth_xstats[i].value == 0) + continue; rte_tel_data_add_dict_u64(d, xstat_names[i].name, eth_xstats[i].value); + } free(eth_xstats); return 0; } @@ -6348,7 +6364,7 @@ RTE_INIT(ethdev_init_telemetry) rte_telemetry_register_cmd("/ethdev/stats", eth_dev_handle_port_stats, "Returns the common stats for a port. Parameters: int port_id"); rte_telemetry_register_cmd("/ethdev/xstats", eth_dev_handle_port_xstats, - "Returns the extended stats for a port. Parameters: int port_id"); + "Returns the extended stats for a port. Parameters: int port_id, hide_zero (Optional, specify whether to hide zero values)"); rte_telemetry_register_cmd("/ethdev/xstats_reset", eth_dev_handle_port_xstats_reset, "Reset the extended stats for a port. Parameters: int port_id"); #ifndef RTE_EXEC_ENV_WINDOWS -- 2.17.1
Re: [PATCH V8 6/8] telemetry: support adding integer value as hexadecimal
On Mon, Dec 19, 2022 at 03:06:46PM +0800, Huisong Li wrote: > Sometimes displaying a unsigned integer value as hexadecimal encoded style > is more expected for human consumption, such as, offload capability and > device flag. This patch introduces two APIs to add unsigned integer value > as hexadecimal encoded string to array or dictionary. And user can choose > whether the stored value is padding to the specified width. > > Signed-off-by: Huisong Li > Acked-by: Morten Brørup > Acked-by: Chengwen Feng Acked-by: Bruce Richardson
Re: NIC support for IOVA as VA
Hi Morten, +1 for features. Also this is a compilation option. Can you add compilation and related test cases to the DPDK CI? In this way, we can check whether the patch affects the code(IOVA_AS_PA). On 2022/12/7 16:06, Morten Brørup wrote: > Should support for IOVA as VA be listed under NIC features [1]? > > [1]: http://doc.dpdk.org/guides/nics/ > > > Med venlig hilsen / Kind regards, > -Morten Brørup > > > > . >
Re: [PATCH v5 2/5] telemetry: fix repeated display when callback don't set dict
On Mon, Dec 19, 2022 at 09:07:20AM +, Chengwen Feng wrote: > When telemetry callback didn't set dict and return a non-negative > number, the telemetry will repeat to display the last result. > > Fixes: 6dd571fd07c3 ("telemetry: introduce new functionality") > Cc: sta...@dpdk.org > > Signed-off-by: Chengwen Feng > --- Hi Chengwen, I'm a little curious about this bug. Can you describe some steps to reproduce it as I'm curious as to exactly what is happening. The fix seems a little strange to me so I'd like to investigate a little more to see if other approaches might work. Thanks, /Bruce
Re: [RFC PATCH 2/7] telemetry: add uint type as alias for u64
15/12/2022 14:58, Bruce Richardson: > On Thu, Dec 15, 2022 at 02:36:51PM +0100, Thomas Monjalon wrote: > > 15/12/2022 10:44, Bruce Richardson: > > > On Wed, Dec 14, 2022 at 09:38:45AM -0800, Tyler Retzlaff wrote: > > > > On Tue, Dec 13, 2022 at 06:27:25PM +, Bruce Richardson wrote: > > > > > For future standardization on the "uint" name for unsigned values > > > > > rather > > > > > than the existing "u64" one, we can for now: > > > > > * rename all internal values to use uint rather than u64 > > > > > * add new function names to alias the existing u64 ones > > > > > > > > > > Suggested-by: Morten Brørup > > > > > Signed-off-by: Bruce Richardson > > > > > > > > when adding __rte_experimental api i have been asked to add the > > > > following boilerplate documentation block. i'm not pushing it, just > > > > recalling it is what i get asked for, so in case it's something we do? > > > > see lib/eal/include/rte_thread.h as an example > > > > > > > > > > > > ``` > > > > * @warning > > > > * @b EXPERIMENTAL: this API may change without prior notice. > > > > ``` > > > > > > > > > > Ok, thanks for the notice. > > > > > > Actually, related to this, since we are adding these functions as aliases > > > for existing stable functions, I would like to see these being added not > > > as > > > experimental. The reason for that, is that while they are experimental, we > > > cannot feasibly mark the old function names as deprecated. :-( > > > > > > Adding Thomas and David on CC for their thoughts. > > > > Is it related to telemetry? > > > > In general, yes we cannot deprecate something if there is no stable > > replacement. > > The recommended step is to introduce a new experimental API > > and deprecate the old one when the new API is stable. > > > Yes, understood. > What we are really trying to do here is to rename an API, by process of > adding the new API and then marking the old one as deprecated. The small > issue is that adding the new one it is by default experimental, meaning we > need to wait for deprecating old one. Ideally, as soon as the new API is > added, we would like to point people to use that, but can't really do so > while it is experimental. > > --- > > By way of explicit detail, Morten pointed out the inconsistency in the > telemetry APIs and types: > > * we have add_*_int, which takes a 32-bit signed value > * we have add_*_u64 which takes 64-bit unsigned (as name suggests). > > The ideal end-state is to always use 64-bit values (since there is no space > saving from 32-bit as a union is used), and just name everything as "int" > or "uint" for signed/unsigned. The two big steps here are: > > * expanding type of the "int" functions to take 64-bit parameters - this is > ABI change but not API one, since existing code will happily promote > values on compile. Therefore, we just use ABI versioning to have a 32-bit > version for older linked binaries. > * the rename of the rte_tel_data_add_array_u64 and > rte_tel_data_add_dict_u64 to *_uint variants. Though keeping > compatibility is easier, as we can just add new functions, the overall > process is slower since the new functions technically should be added as > experimental - hence the email. For the case of function renaming, do we > still need to have the "renamed" versions as experimental initially? If a function is simply renamed, I think there is no need for the experimental step. Would we keep an alias with the old name for some time?
Re: 21.11.3 patches review and test
On 15/12/2022 17:51, Ali Alnubani wrote: -Original Message- From: Kevin Traynor Sent: Tuesday, December 6, 2022 1:30 PM To: sta...@dpdk.org Cc: dev@dpdk.org; Abhishek Marathe ; Ali Alnubani ; benjamin.wal...@intel.com; David Christensen ; Hemant Agrawal ; Ian Stokes ; Jerin Jacob ; John McNamara ; Ju-Hyoung Lee ; Kevin Traynor ; Luca Boccassi ; Pei Zhang ; qian.q...@intel.com; Raslan Darawsheh ; NBU-Contact-Thomas Monjalon (EXTERNAL) ; yangh...@redhat.com; yuan.p...@intel.com; zhaoyan.c...@intel.com Subject: 21.11.3 patches review and test Hi all, Here is a list of patches targeted for stable release 21.11.3. The planned date for the final release is 19th December. Please help with testing and validation of your use cases and report any issues/results with reply-all to this mail. For the final release the fixes and reported validations will be added to the release notes. A release candidate tarball can be found at: https://dpdk.org/browse/dpdk-stable/tag/?id=v21.11.3-rc1 These patches are located at branch 21.11 of dpdk-stable repo: https://dpdk.org/browse/dpdk-stable/ Thanks. Kevin --- Hello, We ran the following functional tests with Nvidia hardware on 21.11.3-rc1: - Basic functionality: Send and receive multiple types of traffic. - testpmd xstats counter test. - testpmd timestamp test. - Changing/checking link status through testpmd. - RTE flow tests: Items: - ecpri - eth - flex - geneve - geneve_opt - gre - gre_key - gre_option - gtp - gtp_psc - icmp - icmp6 - integrity - ipv4 - ipv6 - ipv6_frag_ext - mark - meta - mpls - nvgre - tag - tcp - udp - vlan - vxlan - vxlan_gpe Actions: - age - count - dec_tcp_ack - dec_tcp_seq - dec_ttl - drop - flag - inc_tcp_ack - inc_tcp_seq - jump - mark - meter - modify_field - nvgre_decap - nvgre_encap - of_pop_vlan - of_push_vlan - of_set_vlan_pcp - of_set_vlan_vid - queue - raw_decap - raw_encap - rss - sample - set_ipv4_dscp - set_ipv4_dst - set_ipv4_src - set_ipv6_dscp - set_ipv6_dst - set_ipv6_src - set_mac_dst - set_mac_src - set_meta - set_tag - set_tp_dst - set_tp_src - set_ttl - vxlan_decap - vxlan_encap - Some RSS tests. - VLAN filtering, stripping and insertion tests. - Checksum and TSO tests. - ptype tests. - link_status_interrupt example application tests. - l3fwd-power example application tests. - Multi-process example applications tests. - Hardware LRO tests. - Regex application tests. - Buffer Split tests. - Tx scheduling tests. Functional tests ran on: - NIC: ConnectX-4 Lx / OS: Ubuntu 20.04 LTS / Driver: MLNX_OFED_LINUX-5.8-1.0.1.1 / Firmware: 14.32.1010 - NIC: ConnectX-5 / OS: Ubuntu 20.04 LTS / Driver: MLNX_OFED_LINUX-5.8-1.0.1.1 / Firmware: 16.35.1012 - NIC: ConnectX-6 Dx / OS: Ubuntu 20.04 LTS / Driver: MLNX_OFED_LINUX-5.7-1.0.2.0 / Firmware: 22.35.1012 - DPU: BlueField-2 / DOCA SW version: 1.5.0 / Firmware: 24.35.1012 We don't see any new issues in functional testing caused by changes in this release. Additionally, we ran compilation tests with multiple configurations on the following OS/driver combinations: - Ubuntu 22.04.1 with MLNX_OFED_LINUX-5.8-1.1.2.1. - Ubuntu 20.04.5 with MLNX_OFED_LINUX-5.8-1.1.2.1. - Ubuntu 20.04.5 with rdma-core master (76cfaa1). - Ubuntu 20.04.5 with rdma-core v28.0. - Ubuntu 18.04.6 with rdma-core v17.1. - Ubuntu 18.04.6 with rdma-core master (76cfaa1) (i386). - Ubuntu 16.04.7 with rdma-core v22.7. - Fedora 37 with rdma-core v41.0. - Fedora 38 (Rawhide) with rdma-core v41.0. - CentOS 7 7.9.2009 with rdma-core master (76cfaa1). - CentOS 7 7.9.2009 with MLNX_OFED_LINUX-5.8-1.0.1.1. - CentOS 8 8.4.2105 with rdma-core master (76cfaa1). - OpenSUSE Leap 15.4 with rdma-core v38.1. - Windows Server 2019 with Clang 11.0.0. The builds are passing on most OS except for Fedora 37 and 38, where I have the following issues: - Bug 1149 - [21.11] lib/ring build failure with gcc 12 and debug enabled (https://bugs.dpdk.org/show_bug.cgi?id=1149) - Bug 1150 - [21.11] failure to build API's html docs on Fedora 37 (https://bugs.dpdk.org/show_bug.cgi?id=1150) Thanks for testing Ali. I will take a look at these. Kevin. Thanks, Ali
[dpdk-dev] [PATCH] net/cnxk: support flow info API
From: Satheesh Paul Implement rte_flow_info_get API to get the maximum number of counters and meters. Signed-off-by: Satheesh Paul Reviewed-by: Kiran Kumar K --- Depends-on: patch-26075 ("common/cnxk: fix dual VLAN parsing issue") drivers/net/cnxk/cn10k_ethdev.c| 1 + drivers/net/cnxk/cn10k_flow.c | 16 drivers/net/cnxk/cn10k_flow.h | 8 ++-- drivers/net/cnxk/cn9k_ethdev.c | 1 + drivers/net/cnxk/cn9k_flow.c | 15 +++ drivers/net/cnxk/cn9k_flow.h | 10 ++ drivers/net/cnxk/cnxk_ethdev.h | 1 + drivers/net/cnxk/cnxk_ethdev_mtr.c | 1 - 8 files changed, 46 insertions(+), 7 deletions(-) diff --git a/drivers/net/cnxk/cn10k_ethdev.c b/drivers/net/cnxk/cn10k_ethdev.c index 4658713591..ef1637ec08 100644 --- a/drivers/net/cnxk/cn10k_ethdev.c +++ b/drivers/net/cnxk/cn10k_ethdev.c @@ -754,6 +754,7 @@ npc_flow_ops_override(void) /* Update platform specific ops */ cnxk_flow_ops.create = cn10k_flow_create; cnxk_flow_ops.destroy = cn10k_flow_destroy; + cnxk_flow_ops.info_get = cn10k_flow_info_get; } static int diff --git a/drivers/net/cnxk/cn10k_flow.c b/drivers/net/cnxk/cn10k_flow.c index 7df879a2bb..2ce9e1de74 100644 --- a/drivers/net/cnxk/cn10k_flow.c +++ b/drivers/net/cnxk/cn10k_flow.c @@ -207,6 +207,22 @@ cn10k_flow_create(struct rte_eth_dev *eth_dev, const struct rte_flow_attr *attr, return (struct rte_flow *)flow; } +int +cn10k_flow_info_get(struct rte_eth_dev *dev, struct rte_flow_port_info *port_info, + struct rte_flow_queue_info *queue_info, struct rte_flow_error *err) +{ + RTE_SET_USED(dev); + RTE_SET_USED(err); + + memset(port_info, 0, sizeof(*port_info)); + memset(queue_info, 0, sizeof(*queue_info)); + + port_info->max_nb_counters = CN10K_NPC_COUNTERS_MAX; + port_info->max_nb_meters = CNXK_NIX_MTR_COUNT_MAX; + + return 0; +} + int cn10k_flow_destroy(struct rte_eth_dev *eth_dev, struct rte_flow *rte_flow, struct rte_flow_error *error) diff --git a/drivers/net/cnxk/cn10k_flow.h b/drivers/net/cnxk/cn10k_flow.h index f64fcf2a5e..316b74e6a6 100644 --- a/drivers/net/cnxk/cn10k_flow.h +++ b/drivers/net/cnxk/cn10k_flow.h @@ -6,12 +6,16 @@ #include -struct rte_flow *cn10k_flow_create(struct rte_eth_dev *dev, - const struct rte_flow_attr *attr, +struct rte_flow *cn10k_flow_create(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, const struct rte_flow_item pattern[], const struct rte_flow_action actions[], struct rte_flow_error *error); int cn10k_flow_destroy(struct rte_eth_dev *dev, struct rte_flow *flow, struct rte_flow_error *error); +int cn10k_flow_info_get(struct rte_eth_dev *dev, struct rte_flow_port_info *port_info, + struct rte_flow_queue_info *queue_info, struct rte_flow_error *err); + +#define CN10K_NPC_COUNTERS_MAX 512 + #endif /* __CN10K_RTE_FLOW_H__ */ diff --git a/drivers/net/cnxk/cn9k_ethdev.c b/drivers/net/cnxk/cn9k_ethdev.c index 3b702d9696..f08e986bd1 100644 --- a/drivers/net/cnxk/cn9k_ethdev.c +++ b/drivers/net/cnxk/cn9k_ethdev.c @@ -674,6 +674,7 @@ npc_flow_ops_override(void) /* Update platform specific ops */ cnxk_flow_ops.create = cn9k_flow_create; cnxk_flow_ops.destroy = cn9k_flow_destroy; + cnxk_flow_ops.info_get = cn9k_flow_info_get; } static int diff --git a/drivers/net/cnxk/cn9k_flow.c b/drivers/net/cnxk/cn9k_flow.c index 15ccdf8919..a418af185d 100644 --- a/drivers/net/cnxk/cn9k_flow.c +++ b/drivers/net/cnxk/cn9k_flow.c @@ -54,3 +54,18 @@ cn9k_flow_destroy(struct rte_eth_dev *eth_dev, struct rte_flow *rte_flow, return cnxk_flow_destroy(eth_dev, flow, error); } + +int +cn9k_flow_info_get(struct rte_eth_dev *dev, struct rte_flow_port_info *port_info, + struct rte_flow_queue_info *queue_info, struct rte_flow_error *err) +{ + RTE_SET_USED(dev); + RTE_SET_USED(err); + + memset(port_info, 0, sizeof(*port_info)); + memset(queue_info, 0, sizeof(*queue_info)); + + port_info->max_nb_counters = CN9K_NPC_COUNTERS_MAX; + + return 0; +} diff --git a/drivers/net/cnxk/cn9k_flow.h b/drivers/net/cnxk/cn9k_flow.h index 43d59e1eb2..26f93ea204 100644 --- a/drivers/net/cnxk/cn9k_flow.h +++ b/drivers/net/cnxk/cn9k_flow.h @@ -6,12 +6,14 @@ #include -struct rte_flow *cn9k_flow_create(struct rte_eth_dev *dev, - const struct rte_flow_attr *attr, +struct rte_flow *cn9k_flow_create(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, const struct rte_flow_item pattern[], const struct rte_flow_action actions[], struct rte_flow_error *error); -int cn9k_flow_destroy(
Re: [RFC PATCH 2/7] telemetry: add uint type as alias for u64
On Mon, Dec 19, 2022 at 11:37:19AM +0100, Thomas Monjalon wrote: > 15/12/2022 14:58, Bruce Richardson: > > On Thu, Dec 15, 2022 at 02:36:51PM +0100, Thomas Monjalon wrote: > > > 15/12/2022 10:44, Bruce Richardson: > > > > On Wed, Dec 14, 2022 at 09:38:45AM -0800, Tyler Retzlaff wrote: > > > > > On Tue, Dec 13, 2022 at 06:27:25PM +, Bruce Richardson wrote: > > > > > > For future standardization on the "uint" name for unsigned values > > > > > > rather > > > > > > than the existing "u64" one, we can for now: > > > > > > * rename all internal values to use uint rather than u64 > > > > > > * add new function names to alias the existing u64 ones > > > > > > > > > > > > Suggested-by: Morten Brørup > > > > > > Signed-off-by: Bruce Richardson > > > > > > > > > > when adding __rte_experimental api i have been asked to add the > > > > > following boilerplate documentation block. i'm not pushing it, just > > > > > recalling it is what i get asked for, so in case it's something we do? > > > > > see lib/eal/include/rte_thread.h as an example > > > > > > > > > > > > > > > ``` > > > > > * @warning > > > > > * @b EXPERIMENTAL: this API may change without prior notice. > > > > > ``` > > > > > > > > > > > > > Ok, thanks for the notice. > > > > > > > > Actually, related to this, since we are adding these functions as > > > > aliases > > > > for existing stable functions, I would like to see these being added > > > > not as > > > > experimental. The reason for that, is that while they are experimental, > > > > we > > > > cannot feasibly mark the old function names as deprecated. :-( > > > > > > > > Adding Thomas and David on CC for their thoughts. > > > > > > Is it related to telemetry? > > > > > > In general, yes we cannot deprecate something if there is no stable > > > replacement. > > > The recommended step is to introduce a new experimental API > > > and deprecate the old one when the new API is stable. > > > > > Yes, understood. > > What we are really trying to do here is to rename an API, by process of > > adding the new API and then marking the old one as deprecated. The small > > issue is that adding the new one it is by default experimental, meaning we > > need to wait for deprecating old one. Ideally, as soon as the new API is > > added, we would like to point people to use that, but can't really do so > > while it is experimental. > > > > --- > > > > By way of explicit detail, Morten pointed out the inconsistency in the > > telemetry APIs and types: > > > > * we have add_*_int, which takes a 32-bit signed value > > * we have add_*_u64 which takes 64-bit unsigned (as name suggests). > > > > The ideal end-state is to always use 64-bit values (since there is no space > > saving from 32-bit as a union is used), and just name everything as "int" > > or "uint" for signed/unsigned. The two big steps here are: > > > > * expanding type of the "int" functions to take 64-bit parameters - this is > > ABI change but not API one, since existing code will happily promote > > values on compile. Therefore, we just use ABI versioning to have a 32-bit > > version for older linked binaries. > > * the rename of the rte_tel_data_add_array_u64 and > > rte_tel_data_add_dict_u64 to *_uint variants. Though keeping > > compatibility is easier, as we can just add new functions, the overall > > process is slower since the new functions technically should be added as > > experimental - hence the email. For the case of function renaming, do we > > still need to have the "renamed" versions as experimental initially? > > If a function is simply renamed, I think there is no need for the > experimental step. > Would we keep an alias with the old name for some time? > Yes, the old name should go through the deprecation process. No hurry with removal. /Bruce
Re: [RFC] eal: per-thread constructors/destructors
On 2022-12-18 11:01, Morten Brørup wrote: > This RFC introduces per-thread constructors/destructors: > * Per-thread constructors are functions called as the first thing from newly > created threads. This can be used to initialize variables in thread-local > storage, variables depending on the (tid_t) thread id, and to call setup > functions that must be called from the thread itself. > * Per-thread destructors are functions called (from the thread) as the last > thing when a thread ends. > > At this time, I am seeking feedback on the concept and the proposed > limitations. > This seems like a useful mechanism, that would simplify things for libraries and PMDs that have dynamically allocated per-thread resources. It's unclear to me, how many such modules there are. > Processes have __attribute__(constructor/destructor) to set up functions to > be called before main(). Nothing similar exists for threads, so we have to > design it ourselves. > > The proposed per-thread constructors/destructors should not apply to all > threads - only to threads created through the DPDK threads API. Agree? > > DPDK has the RTE_INIT()/RTE_FINI() macros for adding process constructors/destructors at build time, so I propose a similar API, i.e. adding RTE_THREAD_INIT() and RTE_THREAD_FINI() macros to build a list of per-thread constructors and destructors at build time. > > Two functions that call the list of per-thread constructors respectively > destructors will be introduced. > > The per-thread constructor function will be called from the newly created > threads within DPDK: > 1. EAL/SERVICE threads: In eal_thread_loop() before the loop. > 2. CTRL threads: In ctrl_thread_init() before start_routine(). > 3. Non-EAL threads: In rte_thread_register(). > > Are any thread types missing in the list above? > Maybe all unregistered non-EAL threads should be excluded, including such created via rte_ctrl_thread_create(). In that case, you would have: 1. EAL threads (both the main and the worker lcores, and both lcores employed as service and non-service lcores). 2. Registered non-EAL threads (created using rte_ctrl_thread_create() or any other means, but having in common having called rte_thread_register()). Another thing which is unclear to me is if libraries/PMDs tend to want to differentiate between EAL threads and registered non-EAL threads. EAL threads are generally required to be safe from preemption and also generally do fast path/packet processing kind of work, while what registered non-EAL threads tend to do (and thus what kind of API access they require) is more open. Another thing to consider is how this mechanism should work in regards to scaling up/down a DPDK application. When an EAL thread is taken out of use, should the finalize function be called? It might complicate things, but on the other hand would allow libraries and PMDs to free resources no longer required. > > The per-thread destructor function will also be registered by the per-thread > constructor, using the POSIX pthread_cleanup_push() function. > > > Examples of where per-thread constructors are useful: > PRNG state initialization [1]: > https://protect2.fireeye.com/v1/url?k=31323334-501d5122-313273af-45444731-fc4c610a30810b5d&q=1&e=d8580fcf-a6bb-4e2b-aec4-a772629ece08&u=http%3A%2F%2Finbox.dpdk.org%2Fdev%2F2a5121c7-369f-afde-0898-d45a5b368c3a%40ericsson.com%2F > PMU event setup [2]: > https://protect2.fireeye.com/v1/url?k=31323334-501d5122-313273af-45444731-9d4f6edbd54a1ac0&q=1&e=d8580fcf-a6bb-4e2b-aec4-a772629ece08&u=http%3A%2F%2Finbox.dpdk.org%2Fdev%2Fa059c403-8c6c-79c3-c3e9-a8c1815f5a14%40ericsson.com%2FT%2F%23m3d29fbc301059f007425ac148a4113e66d2ef983 > > > Please also refer to the discussion [3] about lcore init callbacks for > further thoughts. Especially the de-facto inability to establish > constructors/destructors at runtime. > > [3]: > https://protect2.fireeye.com/v1/url?k=31323334-501d5122-313273af-45444731-94c23fe30fb99087&q=1&e=d8580fcf-a6bb-4e2b-aec4-a772629ece08&u=http%3A%2F%2Finbox.dpdk.org%2Fdev%2F98CBD80474FA8B44BF855DF32C47DC35D875B6%40smartserver.smartshare.dk%2FT%2F%23m23b9a930a4050dc6b0305d3653754bd152c09ab7 > > > Med venlig hilsen / Kind regards, > -Morten Brørup
Re: C++20 report error at file rte_spinlock.h
On Tue, Dec 13, 2022 at 06:11:06AM +, Zhou, Xiangyun wrote: > Dear dpdk dev, > > I'm using dpdk 21.11 LTS, when compile my program with CPP flag "-std=c++20", > the compiler report below errors. After checking file rte_spinlock.h, I think > the error report by compiler is valid, there should be a potential issue when > using functions rte_spinlock_recursive_lock, rte_spinlock_recursive_unlock > and rte_spinlock_recursive_trylock in multi-thread, we could either remove > "volatile" definition to ask users to handle the multi-thread issue, or using > atomic operatings instead of self-increment and self-decrement. > > > /home/dpdk/lib/eal/include/generic/rte_spinlock.h:221:12: error: increment of > object of volatile-qualified type 'volatile int' is deprecated > [-Werror,-Wdeprecated-volatile] > slr->count++; > ^ > /home/dpdk/lib/eal/include/generic/rte_spinlock.h:231:6: error: decrement of > object of volatile-qualified type 'volatile int' is deprecated > [-Werror,-Wdeprecated-volatile] > if (--(slr->count) == 0) { > ^ > /home/dpdk/lib/eal/include/generic/rte_spinlock.h:255:12: error: increment of > object of volatile-qualified type 'volatile int' is deprecated > [-Werror,-Wdeprecated-volatile] > slr->count++; > i have work in progress to optionally use standard atomics but in the meantime the correct thing to do here is to use the gcc builtins that match the requirements of the c++11 memory model. the code should be converted to use __atomic_fetch_{add,sub} or __atomic_{add,sub}_fetch as appropriate. ty.
RE: C++20 report error at file rte_spinlock.h
> On Tue, Dec 13, 2022 at 06:11:06AM +, Zhou, Xiangyun wrote: > > Dear dpdk dev, > > > > I'm using dpdk 21.11 LTS, when compile my program with CPP flag > > "-std=c++20", the compiler report below errors. After checking file > rte_spinlock.h, I think the error report by compiler is valid, there should > be a potential issue when using functions > rte_spinlock_recursive_lock, rte_spinlock_recursive_unlock and > rte_spinlock_recursive_trylock in multi-thread, we could either > remove "volatile" definition to ask users to handle the multi-thread issue, > or using atomic operatings instead of self-increment and > self-decrement. > > > > > > /home/dpdk/lib/eal/include/generic/rte_spinlock.h:221:12: error: increment > > of object of volatile-qualified type 'volatile int' is > deprecated [-Werror,-Wdeprecated-volatile] > > slr->count++; > > ^ > > /home/dpdk/lib/eal/include/generic/rte_spinlock.h:231:6: error: decrement > > of object of volatile-qualified type 'volatile int' is > deprecated [-Werror,-Wdeprecated-volatile] > > if (--(slr->count) == 0) { > > ^ > > /home/dpdk/lib/eal/include/generic/rte_spinlock.h:255:12: error: increment > > of object of volatile-qualified type 'volatile int' is > deprecated [-Werror,-Wdeprecated-volatile] > > slr->count++; > > > > i have work in progress to optionally use standard atomics but in the > meantime the correct thing to do here is to use the gcc builtins that > match the requirements of the c++11 memory model. > > the code should be converted to use __atomic_fetch_{add,sub} or > __atomic_{add,sub}_fetch as appropriate. > > ty. >From looking at the code, I don't think it is necessary: both 'user' and 'count' supposed to be protected by 'sl'. In fact, it looks safe just to remove 'volatile' qualifier here.
RE: [PATCH v4 4/4] eventdev/timer: change eventdev reconfig logic
Hi Harish, Adding a couple of comments inline: > -Original Message- > From: Naga Harish K, S V > Sent: Monday, December 19, 2022 12:29 AM > To: jer...@marvell.com; Carrillo, Erik G ; Gujjar, > Abhinandan S > Cc: dev@dpdk.org; Jayatheerthan, Jay > Subject: [PATCH v4 4/4] eventdev/timer: change eventdev reconfig logic > > When rte_event_timer_adapter_create() is used for creating adapter > instance, eventdev is reconfigured with additional > ``rte_event_dev_config::nb_event_ports`` parameter. > > This eventdev reconfig logic is enhanced to increment the > ``rte_event_dev_config::nb_single_link_event_port_queues`` > parameter if the adapter event port config is of type > ``RTE_EVENT_PORT_CFG_SINGLE_LINK``. > > With this change the application is no longer need to configure the eventdev > with ``rte_event_dev_config::nb_single_link_event_port_queues`` > parameter required for timer adapter when the adapter is created using > above mentioned api. > > Signed-off-by: Naga Harish K S V > Acked-by: Abhinandan Gujjar > --- > v2: > * fix build error in documentation > v3: > * update doxygen > v4: > * fix programmer guide > --- > --- > doc/guides/prog_guide/event_timer_adapter.rst | 17 ++ > lib/eventdev/rte_event_timer_adapter.c| 23 +++ > lib/eventdev/rte_event_timer_adapter.h| 13 +++ > 3 files changed, 43 insertions(+), 10 deletions(-) > > diff --git a/doc/guides/prog_guide/event_timer_adapter.rst > b/doc/guides/prog_guide/event_timer_adapter.rst > index d7307a29bb..b457c879b0 100644 > --- a/doc/guides/prog_guide/event_timer_adapter.rst > +++ b/doc/guides/prog_guide/event_timer_adapter.rst > @@ -139,6 +139,23 @@ This function is passed a callback function that will be > invoked if the adapter needs to create an event port, giving the application > the opportunity to control how it is done. > > +Event device configuration for service based adapter > + We can use '^' instead of '~' here to make it a subsection. > + > +When rte_event_timer_adapter_create() is used for creating adapter > +instance, eventdev is reconfigured with additional > +``rte_event_dev_config::nb_event_ports`` parameter. How about something along the lines of: "When rte_event_timer_adapter_create() is used to create an adapter instance, ``rte_event_dev_config::nb_event_ports`` is automatically incremented, and the eventdev is reconfigured with the additional port." > +This eventdev reconfig logic also increment the "increments" > +``rte_event_dev_config::nb_single_link_event_port_queues`` > +parameter if the adapter event port config is of type > +``RTE_EVENT_PORT_CFG_SINGLE_LINK``. > + > +So the application is no longer need to configure the event device with "application no longer needs" > +``rte_event_dev_config::nb_event_ports`` and > +``rte_event_dev_config::nb_single_link_event_port_queues`` > +parameters required for timer adapter when the adapter is created using > +above mentioned api. > + > Adapter modes > ^ > An event timer adapter can be configured in either periodic or non-periodic > mode diff --git a/lib/eventdev/rte_event_timer_adapter.c > b/lib/eventdev/rte_event_timer_adapter.c > index a0f14bf861..5ed233db00 100644 > --- a/lib/eventdev/rte_event_timer_adapter.c > +++ b/lib/eventdev/rte_event_timer_adapter.c > @@ -88,7 +88,20 @@ default_port_conf_cb(uint16_t id, uint8_t > event_dev_id, uint8_t *event_port_id, > rte_event_dev_stop(dev_id); > > port_id = dev_conf.nb_event_ports; > + if (conf_arg != NULL) > + port_conf = conf_arg; > + else { > + port_conf = &def_port_conf; > + ret = rte_event_port_default_conf_get(dev_id, port_id, > + port_conf); > + if (ret < 0) > + return ret; > + } > + > dev_conf.nb_event_ports += 1; > + if (port_conf->event_port_cfg & > RTE_EVENT_PORT_CFG_SINGLE_LINK) > + dev_conf.nb_single_link_event_port_queues += 1; > + > ret = rte_event_dev_configure(dev_id, &dev_conf); > if (ret < 0) { > EVTIM_LOG_ERR("failed to configure event dev %u\n", > dev_id); @@ -99,16 +112,6 @@ default_port_conf_cb(uint16_t id, uint8_t > event_dev_id, uint8_t *event_port_id, > return ret; > } > > - if (conf_arg != NULL) > - port_conf = conf_arg; > - else { > - port_conf = &def_port_conf; > - ret = rte_event_port_default_conf_get(dev_id, port_id, > - port_conf); > - if (ret < 0) > - return ret; > - } > - > ret = rte_event_port_setup(dev_id, port_id, port_conf); > if (ret < 0) { > EVTIM_LOG_ERR("failed to setup event port %u on event > dev %u\n", diff --git a/lib/eventdev/rte_event_timer_adapter.h > b/lib/eventdev/r
Re: [PATCH] net/gve: add support for basic stats
Hi all Josh just found out some inconsistencies in the Tx/Rx statistics sum for all ports. Not sure if we can screenshot here but it goes like this: Tx-dropped for port0: 447034 Tx-dropped for port1: 0 Accumulated forward statistics for all ports: 807630 Please note that this issue is only with Tx-dropped (not Tx-packets/Tx-total). On Wed, Dec 7, 2022 at 8:39 AM Stephen Hemminger wrote: > > On Wed, 7 Dec 2022 15:09:08 + > Ferruh Yigit wrote: > > > On 11/24/2022 7:33 AM, Junfeng Guo wrote: > > > Add support for dev_ops of stats_get and stats_reset. > > > > > > Queue stats update will be moved into xstat [1], but the basic stats > > > items may still be required. So we just keep the remaining ones and > > > will implement the queue stats via xstats in the coming release. > > > > > > [1] > > > https://elixir.bootlin.com/dpdk/v22.07/ \ > > > source/doc/guides/rel_notes/deprecation.rst#L118 > > > > > > Signed-off-by: Xiaoyun Li > > > Signed-off-by: Junfeng Guo > > > > <...> > > > > > +static int > > > +gve_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) > > > +{ > > > + uint16_t i; > > > + > > > + for (i = 0; i < dev->data->nb_tx_queues; i++) { > > > + struct gve_tx_queue *txq = dev->data->tx_queues[i]; > > > + if (txq == NULL) > > > + continue; > > > + > > > + stats->opackets += txq->packets; > > > + stats->obytes += txq->bytes; > > > + stats->oerrors += txq->errors; > > > > Hi Junfeng, Qi, Jingjing, Beilei, > > > > Above logic looks wrong to me, did you test it? > > > > If the 'gve_dev_stats_get()' called multiple times (without stats reset > > in between), same values will be keep added to stats. > > Some hw based implementations does this, because reading from stats > > registers automatically reset those registers but this shouldn't be case > > for this driver. > > > > I expect it to be something like: > > > > local_stats = 0 > > foreach queue > > local_stats += queue->stats > > stats = local_stats > > The zero of local_stats is unnecessary. > The only caller of the PMD stats_get is rte_ethdev_stats_get > and it zeros the stats structure before calling the PMD. > > > int > rte_eth_stats_get(uint16_t port_id, struct rte_eth_stats *stats) > { > struct rte_eth_dev *dev; > > RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); > dev = &rte_eth_devices[port_id]; > > memset(stats, 0, sizeof(*stats)); > ... > stats->rx_nombuf = dev->data->rx_mbuf_alloc_failed; > return eth_err(port_id, (*dev->dev_ops->stats_get)(dev, stats)); > > If any PMD has extra memset in their stats get that could be removed.
[PATCH v2] eventdev/timer: add API to get remaining ticks
Introduce an event timer adapter API which allows users to determine how many adapter ticks remain until an event timer fires. Signed-off-by: Erik Gabriel Carrillo --- v2: * Rename API to rte_event_timer_get_remaining_ticks * Assert that API out param is non-NULL instead of checking and returning error app/test/test_event_timer_adapter.c| 68 ++ lib/eventdev/event_timer_adapter_pmd.h | 7 +++ lib/eventdev/rte_event_timer_adapter.c | 52 lib/eventdev/rte_event_timer_adapter.h | 27 ++ lib/eventdev/version.map | 3 ++ 5 files changed, 157 insertions(+) diff --git a/app/test/test_event_timer_adapter.c b/app/test/test_event_timer_adapter.c index 1a440dfd10..1a1fb48b24 100644 --- a/app/test/test_event_timer_adapter.c +++ b/app/test/test_event_timer_adapter.c @@ -1920,6 +1920,72 @@ adapter_create_max(void) return TEST_SUCCESS; } +static inline int +test_timer_ticks_remaining(void) +{ + uint64_t ticks_remaining = UINT64_MAX; + struct rte_event_timer *ev_tim; + struct rte_event ev; + int ret, i; + const struct rte_event_timer tim = { + .ev.op = RTE_EVENT_OP_NEW, + .ev.queue_id = 0, + .ev.sched_type = RTE_SCHED_TYPE_ATOMIC, + .ev.priority = RTE_EVENT_DEV_PRIORITY_NORMAL, + .ev.event_type = RTE_EVENT_TYPE_TIMER, + .state = RTE_EVENT_TIMER_NOT_ARMED, + }; + + rte_mempool_get(eventdev_test_mempool, (void **)&ev_tim); + *ev_tim = tim; + ev_tim->ev.event_ptr = ev_tim; +#define TEST_TICKS 5 + ev_tim->timeout_ticks = CALC_TICKS(TEST_TICKS); + + /* Test that unarmed timer returns error */ + TEST_ASSERT_FAIL(rte_event_timer_get_remaining_ticks(timdev, ev_tim, +&ticks_remaining), +"Didn't fail to get ticks for unarmed event timer"); + + TEST_ASSERT_EQUAL(rte_event_timer_arm_burst(timdev, &ev_tim, 1), 1, + "Failed to arm timer with proper timeout."); + TEST_ASSERT_EQUAL(ev_tim->state, RTE_EVENT_TIMER_ARMED, + "Improper timer state set expected %d returned %d", + RTE_EVENT_TIMER_ARMED, ev_tim->state); + + for (i = 0; i < TEST_TICKS; i++) { + ret = rte_event_timer_get_remaining_ticks(timdev, ev_tim, + &ticks_remaining); + if (ret < 0) + return TEST_FAILED; + + TEST_ASSERT_EQUAL((int)ticks_remaining, TEST_TICKS - i, + "Expected %d ticks remaining, got %"PRIu64"", + TEST_TICKS - i, ticks_remaining); + + rte_delay_ms(100); + } + + rte_delay_ms(100); + + TEST_ASSERT_EQUAL(rte_event_dequeue_burst(evdev, 0, &ev, 1, 0), 1, + "Armed timer failed to trigger."); + TEST_ASSERT_EQUAL(ev_tim->state, RTE_EVENT_TIMER_NOT_ARMED, + "Improper timer state set expected %d returned %d", + RTE_EVENT_TIMER_NOT_ARMED, ev_tim->state); + + /* Test that timer that fired returns error */ + TEST_ASSERT_FAIL(rte_event_timer_get_remaining_ticks(timdev, ev_tim, + &ticks_remaining), +"Didn't fail to get ticks for unarmed event timer"); + + rte_mempool_put(eventdev_test_mempool, (void *)ev_tim); + +#undef TEST_TICKS + return TEST_SUCCESS; +} + + static struct unit_test_suite event_timer_adptr_functional_testsuite = { .suite_name = "event timer functional test suite", .setup = testsuite_setup, @@ -1982,6 +2048,8 @@ static struct unit_test_suite event_timer_adptr_functional_testsuite = { TEST_CASE_ST(timdev_setup_msec, timdev_teardown, adapter_tick_resolution), TEST_CASE(adapter_create_max), + TEST_CASE_ST(timdev_setup_msec, timdev_teardown, + test_timer_ticks_remaining), TEST_CASES_END() /**< NULL terminate unit test array */ } }; diff --git a/lib/eventdev/event_timer_adapter_pmd.h b/lib/eventdev/event_timer_adapter_pmd.h index 189017b5c1..7ba9df463b 100644 --- a/lib/eventdev/event_timer_adapter_pmd.h +++ b/lib/eventdev/event_timer_adapter_pmd.h @@ -52,6 +52,11 @@ typedef int (*rte_event_timer_adapter_stats_get_t)( typedef int (*rte_event_timer_adapter_stats_reset_t)( const struct rte_event_timer_adapter *adapter); /**< @internal Reset statistics for event timer adapter */ +typedef int (*rte_event_timer_get_remaining_ticks_t)( + const struct rte_event_timer_adapter *adapter, + const struct rte_event_timer *evtim, + uint64_t *ticks_remaining); +/*
RE: [EXT] [PATCH v2] eventdev/timer: add API to get remaining ticks
> -Original Message- > From: Erik Gabriel Carrillo > Sent: Tuesday, December 20, 2022 1:29 AM > To: Jerin Jacob Kollanukkaran > Cc: hof...@lysator.liu.se; s.v.naga.haris...@intel.com; > jay.jayatheert...@intel.com; Pavan Nikhilesh Bhagavatula > ; Shijith Thotton ; > dev@dpdk.org > Subject: [EXT] [PATCH v2] eventdev/timer: add API to get remaining ticks > > External Email > > -- > Introduce an event timer adapter API which allows users to determine how > many adapter ticks remain until an event timer fires. > > Signed-off-by: Erik Gabriel Carrillo > --- > v2: > * Rename API to rte_event_timer_get_remaining_ticks > * Assert that API out param is non-NULL instead of checking and returning > error > > app/test/test_event_timer_adapter.c| 68 > ++ > lib/eventdev/event_timer_adapter_pmd.h | 7 +++ > lib/eventdev/rte_event_timer_adapter.c | 52 > lib/eventdev/rte_event_timer_adapter.h | 27 ++ > lib/eventdev/version.map | 3 ++ > 5 files changed, 157 insertions(+) > > diff --git a/app/test/test_event_timer_adapter.c > b/app/test/test_event_timer_adapter.c > index 1a440dfd10..1a1fb48b24 100644 > --- a/app/test/test_event_timer_adapter.c > +++ b/app/test/test_event_timer_adapter.c > @@ -1920,6 +1920,72 @@ adapter_create_max(void) > return TEST_SUCCESS; > } > > +static inline int > +test_timer_ticks_remaining(void) > +{ > + uint64_t ticks_remaining = UINT64_MAX; > + struct rte_event_timer *ev_tim; > + struct rte_event ev; > + int ret, i; > + const struct rte_event_timer tim = { > + .ev.op = RTE_EVENT_OP_NEW, > + .ev.queue_id = 0, > + .ev.sched_type = RTE_SCHED_TYPE_ATOMIC, > + .ev.priority = RTE_EVENT_DEV_PRIORITY_NORMAL, > + .ev.event_type = RTE_EVENT_TYPE_TIMER, > + .state = RTE_EVENT_TIMER_NOT_ARMED, > + }; > + > + rte_mempool_get(eventdev_test_mempool, (void **)&ev_tim); > + *ev_tim = tim; > + ev_tim->ev.event_ptr = ev_tim; > +#define TEST_TICKS 5 > + ev_tim->timeout_ticks = CALC_TICKS(TEST_TICKS); > + > + /* Test that unarmed timer returns error */ > + TEST_ASSERT_FAIL(rte_event_timer_get_remaining_ticks(timdev, > ev_tim, > + &ticks_remaining), > + "Didn't fail to get ticks for unarmed event timer"); Please handle ENOSUP case. > + > + TEST_ASSERT_EQUAL(rte_event_timer_arm_burst(timdev, > &ev_tim, 1), 1, > + "Failed to arm timer with proper timeout."); > + TEST_ASSERT_EQUAL(ev_tim->state, RTE_EVENT_TIMER_ARMED, > + "Improper timer state set expected %d returned > %d", > + RTE_EVENT_TIMER_ARMED, ev_tim->state); > + > + for (i = 0; i < TEST_TICKS; i++) { > + ret = rte_event_timer_get_remaining_ticks(timdev, ev_tim, > + &ticks_remaining); > + if (ret < 0) > + return TEST_FAILED; > + > + TEST_ASSERT_EQUAL((int)ticks_remaining, TEST_TICKS - i, > + "Expected %d ticks remaining, got > %"PRIu64"", > + TEST_TICKS - i, ticks_remaining); > + > + rte_delay_ms(100); > + } > + > + rte_delay_ms(100); > + > + TEST_ASSERT_EQUAL(rte_event_dequeue_burst(evdev, 0, &ev, 1, > 0), 1, > + "Armed timer failed to trigger."); > + TEST_ASSERT_EQUAL(ev_tim->state, > RTE_EVENT_TIMER_NOT_ARMED, > + "Improper timer state set expected %d returned > %d", > + RTE_EVENT_TIMER_NOT_ARMED, ev_tim->state); > + > + /* Test that timer that fired returns error */ > + TEST_ASSERT_FAIL(rte_event_timer_get_remaining_ticks(timdev, > ev_tim, > +&ticks_remaining), > + "Didn't fail to get ticks for unarmed event timer"); > + > + rte_mempool_put(eventdev_test_mempool, (void *)ev_tim); > + > +#undef TEST_TICKS > + return TEST_SUCCESS; > +} > + > + > static struct unit_test_suite event_timer_adptr_functional_testsuite = { > .suite_name = "event timer functional test suite", > .setup = testsuite_setup, > @@ -1982,6 +2048,8 @@ static struct unit_test_suite > event_timer_adptr_functional_testsuite = { > TEST_CASE_ST(timdev_setup_msec, timdev_teardown, > adapter_tick_resolution), > TEST_CASE(adapter_create_max), > + TEST_CASE_ST(timdev_setup_msec, timdev_teardown, > + test_timer_ticks_remaining), > TEST_CASES_END() /**< NULL terminate unit test array */ > } > }; > diff --git a/lib/eventdev/event_timer_adapter_pmd.h > b/lib/eventdev/event_timer_a
[PATCH v3] eventdev/timer: add API to get remaining ticks
Introduce an event timer adapter API which allows users to determine how many adapter ticks remain until an event timer fires. Signed-off-by: Erik Gabriel Carrillo --- v3: * Handle ENOTSUP case in unit test v2: * Rename API to rte_event_timer_get_remaining_ticks * Assert that API out param is non-NULL instead of checking and returning error app/test/test_event_timer_adapter.c| 75 ++ lib/eventdev/event_timer_adapter_pmd.h | 7 +++ lib/eventdev/rte_event_timer_adapter.c | 52 ++ lib/eventdev/rte_event_timer_adapter.h | 27 ++ lib/eventdev/version.map | 3 ++ 5 files changed, 164 insertions(+) diff --git a/app/test/test_event_timer_adapter.c b/app/test/test_event_timer_adapter.c index 1a440dfd10..6241a70597 100644 --- a/app/test/test_event_timer_adapter.c +++ b/app/test/test_event_timer_adapter.c @@ -1920,6 +1920,79 @@ adapter_create_max(void) return TEST_SUCCESS; } +static inline int +test_timer_ticks_remaining(void) +{ + uint64_t ticks_remaining = UINT64_MAX; + struct rte_event_timer *ev_tim; + struct rte_event ev; + int ret, i; + const struct rte_event_timer tim = { + .ev.op = RTE_EVENT_OP_NEW, + .ev.queue_id = 0, + .ev.sched_type = RTE_SCHED_TYPE_ATOMIC, + .ev.priority = RTE_EVENT_DEV_PRIORITY_NORMAL, + .ev.event_type = RTE_EVENT_TYPE_TIMER, + .state = RTE_EVENT_TIMER_NOT_ARMED, + }; + + rte_mempool_get(eventdev_test_mempool, (void **)&ev_tim); + *ev_tim = tim; + ev_tim->ev.event_ptr = ev_tim; +#define TEST_TICKS 5 + ev_tim->timeout_ticks = CALC_TICKS(TEST_TICKS); + + ret = rte_event_timer_get_remaining_ticks(timdev, ev_tim, + &ticks_remaining); + if (ret == -ENOTSUP) { + rte_mempool_put(eventdev_test_mempool, (void *)ev_tim); + printf("API not supported, skipping test\n"); + return TEST_SKIPPED; + } + + /* Test that unarmed timer returns error */ + TEST_ASSERT_FAIL(ret, +"Didn't fail to get ticks for unarmed event timer"); + + TEST_ASSERT_EQUAL(rte_event_timer_arm_burst(timdev, &ev_tim, 1), 1, + "Failed to arm timer with proper timeout."); + TEST_ASSERT_EQUAL(ev_tim->state, RTE_EVENT_TIMER_ARMED, + "Improper timer state set expected %d returned %d", + RTE_EVENT_TIMER_ARMED, ev_tim->state); + + for (i = 0; i < TEST_TICKS; i++) { + ret = rte_event_timer_get_remaining_ticks(timdev, ev_tim, + &ticks_remaining); + if (ret < 0) + return TEST_FAILED; + + TEST_ASSERT_EQUAL((int)ticks_remaining, TEST_TICKS - i, + "Expected %d ticks remaining, got %"PRIu64"", + TEST_TICKS - i, ticks_remaining); + + rte_delay_ms(100); + } + + rte_delay_ms(100); + + TEST_ASSERT_EQUAL(rte_event_dequeue_burst(evdev, 0, &ev, 1, 0), 1, + "Armed timer failed to trigger."); + TEST_ASSERT_EQUAL(ev_tim->state, RTE_EVENT_TIMER_NOT_ARMED, + "Improper timer state set expected %d returned %d", + RTE_EVENT_TIMER_NOT_ARMED, ev_tim->state); + + /* Test that timer that fired returns error */ + TEST_ASSERT_FAIL(rte_event_timer_get_remaining_ticks(timdev, ev_tim, + &ticks_remaining), +"Didn't fail to get ticks for unarmed event timer"); + + rte_mempool_put(eventdev_test_mempool, (void *)ev_tim); + +#undef TEST_TICKS + return TEST_SUCCESS; +} + + static struct unit_test_suite event_timer_adptr_functional_testsuite = { .suite_name = "event timer functional test suite", .setup = testsuite_setup, @@ -1982,6 +2055,8 @@ static struct unit_test_suite event_timer_adptr_functional_testsuite = { TEST_CASE_ST(timdev_setup_msec, timdev_teardown, adapter_tick_resolution), TEST_CASE(adapter_create_max), + TEST_CASE_ST(timdev_setup_msec, timdev_teardown, + test_timer_ticks_remaining), TEST_CASES_END() /**< NULL terminate unit test array */ } }; diff --git a/lib/eventdev/event_timer_adapter_pmd.h b/lib/eventdev/event_timer_adapter_pmd.h index 189017b5c1..7ba9df463b 100644 --- a/lib/eventdev/event_timer_adapter_pmd.h +++ b/lib/eventdev/event_timer_adapter_pmd.h @@ -52,6 +52,11 @@ typedef int (*rte_event_timer_adapter_stats_get_t)( typedef int (*rte_event_timer_adapter_stats_reset_t)( const struct rte_event_timer_adapter *adapter); /**< @internal Re
RE: [EXT] [PATCH v2] eventdev/timer: add API to get remaining ticks
<... snipped ...> > > diff --git a/app/test/test_event_timer_adapter.c > > b/app/test/test_event_timer_adapter.c > > index 1a440dfd10..1a1fb48b24 100644 > > --- a/app/test/test_event_timer_adapter.c > > +++ b/app/test/test_event_timer_adapter.c > > @@ -1920,6 +1920,72 @@ adapter_create_max(void) > > return TEST_SUCCESS; > > } > > > > +static inline int > > +test_timer_ticks_remaining(void) > > +{ > > + uint64_t ticks_remaining = UINT64_MAX; > > + struct rte_event_timer *ev_tim; > > + struct rte_event ev; > > + int ret, i; > > + const struct rte_event_timer tim = { > > + .ev.op = RTE_EVENT_OP_NEW, > > + .ev.queue_id = 0, > > + .ev.sched_type = RTE_SCHED_TYPE_ATOMIC, > > + .ev.priority = RTE_EVENT_DEV_PRIORITY_NORMAL, > > + .ev.event_type = RTE_EVENT_TYPE_TIMER, > > + .state = RTE_EVENT_TIMER_NOT_ARMED, > > + }; > > + > > + rte_mempool_get(eventdev_test_mempool, (void **)&ev_tim); > > + *ev_tim = tim; > > + ev_tim->ev.event_ptr = ev_tim; > > +#define TEST_TICKS 5 > > + ev_tim->timeout_ticks = CALC_TICKS(TEST_TICKS); > > + > > + /* Test that unarmed timer returns error */ > > + TEST_ASSERT_FAIL(rte_event_timer_get_remaining_ticks(timdev, > > ev_tim, > > +&ticks_remaining), > > +"Didn't fail to get ticks for unarmed event timer"); > > Please handle ENOSUP case. > Good catch - updated in v3. Thanks, Erik <... snipped ...>
[PATCH] net/iavf: add lock for vf commands
iavf admin queue commands aren't thread-safe. Bugs surrounding this issue can manifest in a variety of ways but frequently pend_cmd is over written. Simultaneously executing commands can result in a misconfigured device or DPDK sleeping in a thread for 2 second. Despite this limitation, vf commands may be executed from both iavf_dev_alarm_handler() in a control thread and the applications main thread. This is trivial to simulate in the testpmd application by creating a bond of vf's in active backup mode, and then turning the bond off and then on again repeatedly. Previously [1] was proposed as a potential solution, but this commit has been reverted. I propose adding locks until a more complete solution is available. [1] commit cb5c1b91f76f ("net/iavf: add thread for event callbacks") Fixes: 48de41ca11f0 ("net/avf: enable link status update") Fixes: 84108425054a ("net/iavf: support asynchronous virtual channel message") Cc: sta...@dpdk.org Signed-off-by: Mike Pattrick --- drivers/net/iavf/iavf.h | 1 + drivers/net/iavf/iavf_vchnl.c | 76 +++ 2 files changed, 77 insertions(+) diff --git a/drivers/net/iavf/iavf.h b/drivers/net/iavf/iavf.h index 1edebab8dc..aa18650ffa 100644 --- a/drivers/net/iavf/iavf.h +++ b/drivers/net/iavf/iavf.h @@ -262,6 +262,7 @@ struct iavf_info { struct iavf_qv_map *qv_map; /* queue vector mapping */ struct iavf_flow_list flow_list; rte_spinlock_t flow_ops_lock; + rte_spinlock_t aq_lock; struct iavf_parser_list rss_parser_list; struct iavf_parser_list dist_parser_list; struct iavf_parser_list ipsec_crypto_parser_list; diff --git a/drivers/net/iavf/iavf_vchnl.c b/drivers/net/iavf/iavf_vchnl.c index f92daf97f2..ca435dee2e 100644 --- a/drivers/net/iavf/iavf_vchnl.c +++ b/drivers/net/iavf/iavf_vchnl.c @@ -554,7 +554,9 @@ iavf_enable_vlan_strip(struct iavf_adapter *adapter) args.in_args_size = 0; args.out_buffer = vf->aq_resp; args.out_size = IAVF_AQ_BUF_SZ; + rte_spinlock_lock(&vf->aq_lock); ret = iavf_execute_vf_cmd(adapter, &args, 0); + rte_spinlock_unlock(&vf->aq_lock); if (ret) PMD_DRV_LOG(ERR, "Failed to execute command of" " OP_ENABLE_VLAN_STRIPPING"); @@ -575,7 +577,9 @@ iavf_disable_vlan_strip(struct iavf_adapter *adapter) args.in_args_size = 0; args.out_buffer = vf->aq_resp; args.out_size = IAVF_AQ_BUF_SZ; + rte_spinlock_lock(&vf->aq_lock); ret = iavf_execute_vf_cmd(adapter, &args, 0); + rte_spinlock_unlock(&vf->aq_lock); if (ret) PMD_DRV_LOG(ERR, "Failed to execute command of" " OP_DISABLE_VLAN_STRIPPING"); @@ -604,7 +608,9 @@ iavf_check_api_version(struct iavf_adapter *adapter) args.out_buffer = vf->aq_resp; args.out_size = IAVF_AQ_BUF_SZ; + rte_spinlock_lock(&vf->aq_lock); err = iavf_execute_vf_cmd(adapter, &args, 0); + rte_spinlock_unlock(&vf->aq_lock); if (err) { PMD_INIT_LOG(ERR, "Fail to execute command of OP_VERSION"); return err; @@ -665,7 +671,9 @@ iavf_get_vf_resource(struct iavf_adapter *adapter) args.in_args = (uint8_t *)∩︀ args.in_args_size = sizeof(caps); + rte_spinlock_lock(&vf->aq_lock); err = iavf_execute_vf_cmd(adapter, &args, 0); + rte_spinlock_unlock(&vf->aq_lock); if (err) { PMD_DRV_LOG(ERR, @@ -710,7 +718,9 @@ iavf_get_supported_rxdid(struct iavf_adapter *adapter) args.out_buffer = vf->aq_resp; args.out_size = IAVF_AQ_BUF_SZ; + rte_spinlock_lock(&vf->aq_lock); ret = iavf_execute_vf_cmd(adapter, &args, 0); + rte_spinlock_unlock(&vf->aq_lock); if (ret) { PMD_DRV_LOG(ERR, "Failed to execute command of OP_GET_SUPPORTED_RXDIDS"); @@ -754,7 +764,9 @@ iavf_config_vlan_strip_v2(struct iavf_adapter *adapter, bool enable) args.in_args_size = sizeof(vlan_strip); args.out_buffer = vf->aq_resp; args.out_size = IAVF_AQ_BUF_SZ; + rte_spinlock_lock(&vf->aq_lock); ret = iavf_execute_vf_cmd(adapter, &args, 0); + rte_spinlock_unlock(&vf->aq_lock); if (ret) PMD_DRV_LOG(ERR, "fail to execute command %s", enable ? "VIRTCHNL_OP_ENABLE_VLAN_STRIPPING_V2" : @@ -794,7 +806,9 @@ iavf_config_vlan_insert_v2(struct iavf_adapter *adapter, bool enable) args.in_args_size = sizeof(vlan_insert); args.out_buffer = vf->aq_resp; args.out_size = IAVF_AQ_BUF_SZ; + rte_spinlock_lock(&vf->aq_lock); ret = iavf_execute_vf_cmd(adapter, &args, 0); + rte_spinlock_unlock(&vf->aq_lock); if (ret) PMD_DRV_LOG(ERR, "fail to execute command %s", enable ? "VIRTCHNL_OP_ENABLE_VLAN_INSERTION_V2" : @@ -837,
[PATCH 0/3] Async vhost packed ring optimization
To improve the performance of async vhost packed ring. We remove the unnecessary data copy in async vhost packed ring. And add the batch data path in both enqueue data path and dequeue data path. Cheng Jiang (3): vhost: remove redundant copy for packed shadow used ring vhost: add batch enqueue in async vhost packed ring vhost: add batch dequeue in async vhost packed ring lib/vhost/virtio_net.c | 393 + 1 file changed, 355 insertions(+), 38 deletions(-) -- 2.35.1
[PATCH 1/3] vhost: remove redundant copy for packed shadow used ring
In the packed ring enqueue data path of the current asynchronous Vhost design, the shadow used ring is first copied to the sync shadow used ring, and then it will be moved to the async shadow used ring for some historical reasons. This is completely unnecessary. This patch removes redundant copy for the shadow used ring. The async shadow used ring will be updated directly. Signed-off-by: Cheng Jiang --- lib/vhost/virtio_net.c | 66 -- 1 file changed, 31 insertions(+), 35 deletions(-) diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c index 9abf752f30..7c3ec128a0 100644 --- a/lib/vhost/virtio_net.c +++ b/lib/vhost/virtio_net.c @@ -572,6 +572,26 @@ vhost_shadow_enqueue_packed(struct vhost_virtqueue *vq, } } +static __rte_always_inline void +vhost_async_shadow_enqueue_packed(struct vhost_virtqueue *vq, + uint32_t *len, + uint16_t *id, + uint16_t *count, + uint16_t num_buffers) +{ + uint16_t i; + struct vhost_async *async = vq->async; + + for (i = 0; i < num_buffers; i++) { + async->buffers_packed[async->buffer_idx_packed].id = id[i]; + async->buffers_packed[async->buffer_idx_packed].len = len[i]; + async->buffers_packed[async->buffer_idx_packed].count = count[i]; + async->buffer_idx_packed++; + if (async->buffer_idx_packed >= vq->size) + async->buffer_idx_packed -= vq->size; + } +} + static __rte_always_inline void vhost_shadow_enqueue_single_packed(struct virtio_net *dev, struct vhost_virtqueue *vq, @@ -1647,23 +1667,6 @@ store_dma_desc_info_split(struct vring_used_elem *s_ring, struct vring_used_elem } } -static __rte_always_inline void -store_dma_desc_info_packed(struct vring_used_elem_packed *s_ring, - struct vring_used_elem_packed *d_ring, - uint16_t ring_size, uint16_t s_idx, uint16_t d_idx, uint16_t count) -{ - size_t elem_size = sizeof(struct vring_used_elem_packed); - - if (d_idx + count <= ring_size) { - rte_memcpy(d_ring + d_idx, s_ring + s_idx, count * elem_size); - } else { - uint16_t size = ring_size - d_idx; - - rte_memcpy(d_ring + d_idx, s_ring + s_idx, size * elem_size); - rte_memcpy(d_ring, s_ring + s_idx + size, (count - size) * elem_size); - } -} - static __rte_noinline uint32_t virtio_dev_rx_async_submit_split(struct virtio_net *dev, struct vhost_virtqueue *vq, struct rte_mbuf **pkts, uint32_t count, int16_t dma_id, uint16_t vchan_id) @@ -1822,7 +1825,8 @@ vhost_enqueue_async_packed(struct virtio_net *dev, if (unlikely(mbuf_to_desc(dev, vq, pkt, buf_vec, nr_vec, *nr_buffers, true) < 0)) return -1; - vhost_shadow_enqueue_packed(vq, buffer_len, buffer_buf_id, buffer_desc_count, *nr_buffers); + vhost_async_shadow_enqueue_packed(vq, buffer_len, buffer_buf_id, + buffer_desc_count, *nr_buffers); return 0; } @@ -1852,6 +1856,7 @@ dma_error_handler_packed(struct vhost_virtqueue *vq, uint16_t slot_idx, { uint16_t descs_err = 0; uint16_t buffers_err = 0; + struct vhost_async *async = vq->async; struct async_inflight_info *pkts_info = vq->async->pkts_info; *pkt_idx -= nr_err; @@ -1869,7 +1874,10 @@ dma_error_handler_packed(struct vhost_virtqueue *vq, uint16_t slot_idx, vq->avail_wrap_counter ^= 1; } - vq->shadow_used_idx -= buffers_err; + if (async->buffer_idx_packed >= buffers_err) + async->buffer_idx_packed -= buffers_err; + else + async->buffer_idx_packed = async->buffer_idx_packed + vq->size - buffers_err; } static __rte_noinline uint32_t @@ -1921,23 +1929,11 @@ virtio_dev_rx_async_submit_packed(struct virtio_net *dev, struct vhost_virtqueue dma_error_handler_packed(vq, slot_idx, pkt_err, &pkt_idx); } - if (likely(vq->shadow_used_idx)) { - /* keep used descriptors. */ - store_dma_desc_info_packed(vq->shadow_used_packed, async->buffers_packed, - vq->size, 0, async->buffer_idx_packed, - vq->shadow_used_idx); - - async->buffer_idx_packed += vq->shadow_used_idx; - if (async->buffer_idx_packed >= vq->size) - async->buffer_idx_packed -= vq->size; - - async->pkts_idx += pkt_idx; - if (async->pkts_idx >= vq->size) - async->pkts_idx -= vq->size; + async->pkts_idx += pkt_idx; + if (async->pkts_idx >= vq->size) + async->pkts_idx -= vq->size; - vq->sh
[PATCH 2/3] vhost: add batch enqueue in async vhost packed ring
Add batch enqueue function in asynchronous vhost packed ring to improve the performance. Chained mbufs are not supported, it will be handled in single enqueue function. Signed-off-by: Cheng Jiang --- lib/vhost/virtio_net.c | 157 + 1 file changed, 157 insertions(+) diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c index 7c3ec128a0..ac8c404327 100644 --- a/lib/vhost/virtio_net.c +++ b/lib/vhost/virtio_net.c @@ -432,6 +432,24 @@ vhost_flush_enqueue_batch_packed(struct virtio_net *dev, vq_inc_last_used_packed(vq, PACKED_BATCH_SIZE); } +static __rte_always_inline void +vhost_async_shadow_enqueue_packed_batch(struct vhost_virtqueue *vq, +uint64_t *lens, +uint16_t *ids) +{ + uint16_t i; + struct vhost_async *async = vq->async; + + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { + async->buffers_packed[async->buffer_idx_packed].id = ids[i]; + async->buffers_packed[async->buffer_idx_packed].len = lens[i]; + async->buffers_packed[async->buffer_idx_packed].count = 1; + async->buffer_idx_packed++; + if (async->buffer_idx_packed >= vq->size) + async->buffer_idx_packed -= vq->size; + } +} + static __rte_always_inline void vhost_shadow_dequeue_batch_packed_inorder(struct vhost_virtqueue *vq, uint16_t id) @@ -1451,6 +1469,58 @@ virtio_dev_rx_sync_batch_check(struct virtio_net *dev, return 0; } +static __rte_always_inline int +virtio_dev_rx_async_batch_check(struct vhost_virtqueue *vq, + struct rte_mbuf **pkts, + uint64_t *desc_addrs, + uint64_t *lens, + int16_t dma_id, + uint16_t vchan_id) +{ + bool wrap_counter = vq->avail_wrap_counter; + struct vring_packed_desc *descs = vq->desc_packed; + uint16_t avail_idx = vq->last_avail_idx; + uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf); + uint16_t i; + + if (unlikely(avail_idx & PACKED_BATCH_MASK)) + return -1; + + if (unlikely((avail_idx + PACKED_BATCH_SIZE) > vq->size)) + return -1; + + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { + if (unlikely(pkts[i]->next != NULL)) + return -1; + if (unlikely(!desc_is_avail(&descs[avail_idx + i], + wrap_counter))) + return -1; + } + + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) + lens[i] = descs[avail_idx + i].len; + + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { + if (unlikely(pkts[i]->pkt_len > (lens[i] - buf_offset))) + return -1; + } + + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) + desc_addrs[i] = descs[avail_idx + i].addr; + + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { + if (unlikely(!desc_addrs[i])) + return -1; + if (unlikely(lens[i] != descs[avail_idx + i].len)) + return -1; + } + + if (rte_dma_burst_capacity(dma_id, vchan_id) < PACKED_BATCH_SIZE) + return -1; + + return 0; +} + static __rte_always_inline void virtio_dev_rx_batch_packed_copy(struct virtio_net *dev, struct vhost_virtqueue *vq, @@ -1850,6 +1920,78 @@ virtio_dev_rx_async_packed(struct virtio_net *dev, struct vhost_virtqueue *vq, return 0; } +static __rte_always_inline void +virtio_dev_rx_async_packed_batch_enqueue(struct virtio_net *dev, + struct vhost_virtqueue *vq, + struct rte_mbuf **pkts, + uint64_t *desc_addrs, + uint64_t *lens) +{ + uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf); + struct virtio_net_hdr_mrg_rxbuf *hdrs[PACKED_BATCH_SIZE]; + struct vring_packed_desc *descs = vq->desc_packed; + struct vhost_async *async = vq->async; + uint16_t avail_idx = vq->last_avail_idx; + uint32_t mbuf_offset = 0; + uint16_t ids[PACKED_BATCH_SIZE]; + uint64_t mapped_len[PACKED_BATCH_SIZE]; + void *host_iova[PACKED_BATCH_SIZE]; + uintptr_t desc; + uint16_t i; + + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { + rte_prefetch0((void *)(uintptr_t)desc_addrs[i]); + desc = vhost_iova_to_vva(dev, vq, desc_addrs[i], &lens[i], VHOST_ACCESS_RW); + hdrs[i] = (struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)desc; + lens[i] = pkts[i]->pkt_len + + sizeof(struct virtio_net_hdr_mrg_rxbuf); + } + + vhost_for_
[PATCH 3/3] vhost: add batch dequeue in async vhost packed ring
Add batch dequeue function in asynchronous vhost packed ring to improve the performance. Chained mbufs are not supported, it will be handled in single dequeue function. Signed-off-by: Cheng Jiang Signed-off-by: Yuan Wang --- lib/vhost/virtio_net.c | 170 - 1 file changed, 167 insertions(+), 3 deletions(-) diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c index ac8c404327..9cd69fc7bf 100644 --- a/lib/vhost/virtio_net.c +++ b/lib/vhost/virtio_net.c @@ -450,6 +450,23 @@ vhost_async_shadow_enqueue_packed_batch(struct vhost_virtqueue *vq, } } +static __rte_always_inline void +vhost_async_shadow_dequeue_packed_batch(struct vhost_virtqueue *vq, uint16_t *ids) +{ + uint16_t i; + struct vhost_async *async = vq->async; + + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { + async->buffers_packed[async->buffer_idx_packed].id = ids[i]; + async->buffers_packed[async->buffer_idx_packed].len = 0; + async->buffers_packed[async->buffer_idx_packed].count = 1; + + async->buffer_idx_packed++; + if (async->buffer_idx_packed >= vq->size) + async->buffer_idx_packed -= vq->size; + } +} + static __rte_always_inline void vhost_shadow_dequeue_batch_packed_inorder(struct vhost_virtqueue *vq, uint16_t id) @@ -3193,6 +3210,80 @@ vhost_reserve_avail_batch_packed(struct virtio_net *dev, return -1; } +static __rte_always_inline int +vhost_async_tx_batch_packed_check(struct virtio_net *dev, +struct vhost_virtqueue *vq, +struct rte_mbuf **pkts, +uint16_t avail_idx, +uintptr_t *desc_addrs, +uint64_t *lens, +uint16_t *ids, +int16_t dma_id, +uint16_t vchan_id) +{ + bool wrap = vq->avail_wrap_counter; + struct vring_packed_desc *descs = vq->desc_packed; + uint64_t buf_lens[PACKED_BATCH_SIZE]; + uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf); + uint16_t flags, i; + + if (unlikely(avail_idx & PACKED_BATCH_MASK)) + return -1; + if (unlikely((avail_idx + PACKED_BATCH_SIZE) > vq->size)) + return -1; + + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { + flags = descs[avail_idx + i].flags; + if (unlikely((wrap != !!(flags & VRING_DESC_F_AVAIL)) || +(wrap == !!(flags & VRING_DESC_F_USED)) || +(flags & PACKED_DESC_SINGLE_DEQUEUE_FLAG))) + return -1; + } + + rte_atomic_thread_fence(__ATOMIC_ACQUIRE); + + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) + lens[i] = descs[avail_idx + i].len; + + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { + desc_addrs[i] = descs[avail_idx + i].addr; + } + + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { + if (unlikely(!desc_addrs[i])) + return -1; + if (unlikely((lens[i] != descs[avail_idx + i].len))) + return -1; + } + + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { + if (virtio_dev_pktmbuf_prep(dev, pkts[i], lens[i])) + goto err; + } + + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) + buf_lens[i] = pkts[i]->buf_len - pkts[i]->data_off; + + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { + if (unlikely(buf_lens[i] < (lens[i] - buf_offset))) + goto err; + } + + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { + pkts[i]->pkt_len = lens[i] - buf_offset; + pkts[i]->data_len = pkts[i]->pkt_len; + ids[i] = descs[avail_idx + i].id; + } + + if (rte_dma_burst_capacity(dma_id, vchan_id) < PACKED_BATCH_SIZE) + return -1; + + return 0; + +err: + return -1; +} + static __rte_always_inline int virtio_dev_tx_batch_packed(struct virtio_net *dev, struct vhost_virtqueue *vq, @@ -3769,16 +3860,74 @@ virtio_dev_tx_async_single_packed(struct virtio_net *dev, return err; } +static __rte_always_inline int +virtio_dev_tx_async_packed_batch(struct virtio_net *dev, + struct vhost_virtqueue *vq, + struct rte_mbuf **pkts, uint16_t slot_idx, + uint16_t dma_id, uint16_t vchan_id) +{ + uint16_t avail_idx = vq->last_avail_idx; + uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf); + struct vhost_async *async = vq->async; + struct async_inflig
[PATCH] app/dma-perf: introduce dma-perf application
There are many high-performance DMA devices supported in DPDK now, and these DMA devices can also be integrated into other modules of DPDK as accelerators, such as Vhost. Before integrating DMA into applications, developers need to know the performance of these DMA devices in various scenarios and the performance of CPUs in the same scenario, such as different buffer lengths. Only in this way can we know the target performance of the application accelerated by using them. This patch introduces a high-performance testing tool, which supports comparing the performance of CPU and DMA in different scenarios automatically with a pre-set config file. Memory Copy performance test are supported for now. Signed-off-by: Cheng Jiang Signed-off-by: Jiayu Hu Signed-off-by: Yuan Wang Acked-by: Morten Brørup --- app/meson.build | 1 + app/test-dma-perf/benchmark.c | 539 ++ app/test-dma-perf/benchmark.h | 12 + app/test-dma-perf/config.ini | 61 app/test-dma-perf/main.c | 419 ++ app/test-dma-perf/main.h | 51 app/test-dma-perf/meson.build | 16 + 7 files changed, 1099 insertions(+) create mode 100644 app/test-dma-perf/benchmark.c create mode 100644 app/test-dma-perf/benchmark.h create mode 100644 app/test-dma-perf/config.ini create mode 100644 app/test-dma-perf/main.c create mode 100644 app/test-dma-perf/main.h create mode 100644 app/test-dma-perf/meson.build diff --git a/app/meson.build b/app/meson.build index e32ea4bd5c..a060ad2725 100644 --- a/app/meson.build +++ b/app/meson.build @@ -28,6 +28,7 @@ apps = [ 'test-regex', 'test-sad', 'test-security-perf', +'test-dma-perf', ] default_cflags = machine_args + ['-DALLOW_EXPERIMENTAL_API'] diff --git a/app/test-dma-perf/benchmark.c b/app/test-dma-perf/benchmark.c new file mode 100644 index 00..f6f7cc9ed3 --- /dev/null +++ b/app/test-dma-perf/benchmark.c @@ -0,0 +1,539 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2022 Intel Corporation + */ + +#include + +#include +#include +#include +#include +#include + +#include "main.h" +#include "benchmark.h" + + +#define MAX_DMA_CPL_NB 255 + +#define CSV_LINE_DMA_FMT "Scenario %u,%u,%u,%u,%u,%u,%" PRIu64 ",%.3lf,%" PRIu64 "\n" +#define CSV_LINE_CPU_FMT "Scenario %u,%u,NA,%u,%u,%u,%" PRIu64 ",%.3lf,%" PRIu64 "\n" + +struct lcore_params { + uint16_t dev_id; + uint32_t nr_buf; + uint16_t kick_batch; + uint32_t buf_size; + uint32_t repeat_times; + uint16_t mpool_iter_step; + struct rte_mbuf **srcs; + struct rte_mbuf **dsts; + uint8_t scenario_id; +}; + +struct buf_info { + struct rte_mbuf **array; + uint32_t nr_buf; + uint32_t buf_size; +}; + +static struct rte_mempool *src_pool; +static struct rte_mempool *dst_pool; + +uint16_t dmadev_ids[MAX_WORKER_NB]; +uint32_t nb_dmadevs; + +extern char output_str[MAX_WORKER_NB][MAX_OUTPUT_STR_LEN]; + +#define PRINT_ERR(...) print_err(__func__, __LINE__, __VA_ARGS__) + +static inline int +__rte_format_printf(3, 4) +print_err(const char *func, int lineno, const char *format, ...) +{ + va_list ap; + int ret; + + ret = fprintf(stderr, "In %s:%d - ", func, lineno); + va_start(ap, format); + ret += vfprintf(stderr, format, ap); + va_end(ap); + + return ret; +} + +static inline void +calc_result(struct lcore_params *p, uint64_t cp_cycle_sum, double time_sec, + uint32_t repeat_times, uint32_t *memory, uint64_t *ave_cycle, + float *bandwidth, uint64_t *ops) +{ + *memory = (p->buf_size * p->nr_buf * 2) / (1024 * 1024); + *ave_cycle = cp_cycle_sum / (p->repeat_times * p->nr_buf); + *bandwidth = p->buf_size * 8 * rte_get_timer_hz() / (*ave_cycle * 1000 * 1000 * 1000.0); + *ops = (double)p->nr_buf * repeat_times / time_sec; +} + +static void +output_result(uint8_t scenario_id, uint32_t lcore_id, uint16_t dev_id, uint64_t ave_cycle, + uint32_t buf_size, uint32_t nr_buf, uint32_t memory, + float bandwidth, uint64_t ops, bool is_dma) +{ + if (is_dma) + printf("lcore %u, DMA %u:\n" + "average cycles: %" PRIu64 "," + " buffer size: %u, nr_buf: %u," + " memory: %uMB, frequency: %" PRIu64 ".\n", + lcore_id, + dev_id, + ave_cycle, + buf_size, + nr_buf, + memory, + rte_get_timer_hz()); + else + printf("lcore %u\n" + "average cycles: %" PRIu64 "," + " buffer size: %u, nr_buf: %u," + " memory: %uMB, frequency: %" PRIu64 "
[PATCH 0/3] net/igc: support PTP timesync
[PATCH 1/3] code refactoring. [PATCH 2/3] add related definitions for ptp timesync. [PATCH 3/3] add IEEE1588 API to support timesync. Simei Su (3): net/igc: code refactoring net/igc/base: support PTP timesync net/igc: support IEEE 1588 PTP drivers/net/igc/base/igc_defines.h | 11 ++ drivers/net/igc/igc_ethdev.c | 222 + drivers/net/igc/igc_ethdev.h | 4 +- drivers/net/igc/igc_txrx.c | 166 --- drivers/net/igc/igc_txrx.h | 116 +++ 5 files changed, 397 insertions(+), 122 deletions(-) -- 2.9.5
[PATCH 1/3] net/igc: code refactoring
Move related structures for Rx/Tx queue from igc_txrx.c to igc_txrx.h to make code cleaner and variables used more conveniently. Signed-off-by: Simei Su --- drivers/net/igc/igc_txrx.c | 118 - drivers/net/igc/igc_txrx.h | 115 +++ 2 files changed, 115 insertions(+), 118 deletions(-) diff --git a/drivers/net/igc/igc_txrx.c b/drivers/net/igc/igc_txrx.c index ffd219b..c462e91 100644 --- a/drivers/net/igc/igc_txrx.c +++ b/drivers/net/igc/igc_txrx.c @@ -93,124 +93,6 @@ #define IGC_TX_OFFLOAD_NOTSUP_MASK (RTE_MBUF_F_TX_OFFLOAD_MASK ^ IGC_TX_OFFLOAD_MASK) -/** - * Structure associated with each descriptor of the RX ring of a RX queue. - */ -struct igc_rx_entry { - struct rte_mbuf *mbuf; /**< mbuf associated with RX descriptor. */ -}; - -/** - * Structure associated with each RX queue. - */ -struct igc_rx_queue { - struct rte_mempool *mb_pool; /**< mbuf pool to populate RX ring. */ - volatile union igc_adv_rx_desc *rx_ring; - /**< RX ring virtual address. */ - uint64_trx_ring_phys_addr; /**< RX ring DMA address. */ - volatile uint32_t *rdt_reg_addr; /**< RDT register address. */ - volatile uint32_t *rdh_reg_addr; /**< RDH register address. */ - struct igc_rx_entry *sw_ring; /**< address of RX software ring. */ - struct rte_mbuf *pkt_first_seg; /**< First segment of current packet. */ - struct rte_mbuf *pkt_last_seg; /**< Last segment of current packet. */ - uint16_tnb_rx_desc; /**< number of RX descriptors. */ - uint16_trx_tail;/**< current value of RDT register. */ - uint16_tnb_rx_hold; /**< number of held free RX desc. */ - uint16_trx_free_thresh; /**< max free RX desc to hold. */ - uint16_tqueue_id; /**< RX queue index. */ - uint16_treg_idx;/**< RX queue register index. */ - uint16_tport_id;/**< Device port identifier. */ - uint8_t pthresh;/**< Prefetch threshold register. */ - uint8_t hthresh;/**< Host threshold register. */ - uint8_t wthresh;/**< Write-back threshold register. */ - uint8_t crc_len;/**< 0 if CRC stripped, 4 otherwise. */ - uint8_t drop_en;/**< If not 0, set SRRCTL.Drop_En. */ - uint32_tflags; /**< RX flags. */ - uint64_toffloads; /**< offloads of RTE_ETH_RX_OFFLOAD_* */ -}; - -/** Offload features */ -union igc_tx_offload { - uint64_t data; - struct { - uint64_t l3_len:9; /**< L3 (IP) Header Length. */ - uint64_t l2_len:7; /**< L2 (MAC) Header Length. */ - uint64_t vlan_tci:16; - /**< VLAN Tag Control Identifier(CPU order). */ - uint64_t l4_len:8; /**< L4 (TCP/UDP) Header Length. */ - uint64_t tso_segsz:16; /**< TCP TSO segment size. */ - /* uint64_t unused:8; */ - }; -}; - -/* - * Compare mask for igc_tx_offload.data, - * should be in sync with igc_tx_offload layout. - */ -#define TX_MACIP_LEN_CMP_MASK 0xULL /**< L2L3 header mask. */ -#define TX_VLAN_CMP_MASK 0xULL /**< Vlan mask. */ -#define TX_TCP_LEN_CMP_MASK0x00FFULL /**< TCP header mask. */ -#define TX_TSO_MSS_CMP_MASK0x0000ULL /**< TSO segsz mask. */ -/** Mac + IP + TCP + Mss mask. */ -#define TX_TSO_CMP_MASK\ - (TX_MACIP_LEN_CMP_MASK | TX_TCP_LEN_CMP_MASK | TX_TSO_MSS_CMP_MASK) - -/** - * Structure to check if new context need be built - */ -struct igc_advctx_info { - uint64_t flags; /**< ol_flags related to context build. */ - /** tx offload: vlan, tso, l2-l3-l4 lengths. */ - union igc_tx_offload tx_offload; - /** compare mask for tx offload. */ - union igc_tx_offload tx_offload_mask; -}; - -/** - * Hardware context number - */ -enum { - IGC_CTX_0= 0, /**< CTX0*/ - IGC_CTX_1= 1, /**< CTX1*/ - IGC_CTX_NUM = 2, /**< CTX_NUM */ -}; - -/** - * Structure associated with each descriptor of the TX ring of a TX queue. - */ -struct igc_tx_entry { - struct rte_mbuf *mbuf; /**< mbuf associated with TX desc, if any. */ - uint16_t next_id; /**< Index of next descriptor in ring. */ - uint16_t last_id; /**< Index of last scattered descriptor. */ -}; - -/** - * Structure associated with each TX queue. - */ -struct igc_tx_queue { - volatile union igc_adv_tx_desc *tx_ring; /**< TX ring address */ - uint64_t tx_ring_phys_addr; /**< TX ring DMA address. */ - struct igc_tx_entry*sw_ring; /**< virtual address of SW ring. */ - volatile uint32_t *tdt_reg_addr; /**< Address of TDT register. */ - uint32_t txd_type; /**< Device-specific TXD t
[PATCH 2/3] net/igc/base: support PTP timesync
Add definitions for timesync enabling. Signed-off-by: Simei Su --- drivers/net/igc/base/igc_defines.h | 11 +++ 1 file changed, 11 insertions(+) diff --git a/drivers/net/igc/base/igc_defines.h b/drivers/net/igc/base/igc_defines.h index 61964bc..dd7330a 100644 --- a/drivers/net/igc/base/igc_defines.h +++ b/drivers/net/igc/base/igc_defines.h @@ -795,6 +795,17 @@ #define TSYNC_INTERRUPTS TSINTR_TXTS +/* Split Replication Receive Control */ +#define IGC_SRRCTL_TIMESTAMP 0x4000 +#define IGC_SRRCTL_TIMER1SEL(timer)(((timer) & 0x3) << 14) +#define IGC_SRRCTL_TIMER0SEL(timer)(((timer) & 0x3) << 17) + +/* Sample RX tstamp in PHY sop */ +#define IGC_TSYNCRXCTL_RXSYNSIG 0x0400 + +/* Sample TX tstamp in PHY sop */ +#define IGC_TSYNCTXCTL_TXSYNSIG 0x0020 + /* TSAUXC Configuration Bits */ #define TSAUXC_EN_TT0 (1 << 0) /* Enable target time 0. */ #define TSAUXC_EN_TT1 (1 << 1) /* Enable target time 1. */ -- 2.9.5
[PATCH 3/3] net/igc: support IEEE 1588 PTP
Add igc support for new ethdev APIs to enable/disable and read/write/adjust IEEE1588 PTP timestamps. The example command for running ptpclient is as below: ./build/examples/dpdk-ptpclient -c 1 -n 3 -- -T 0 -p 0x1 Signed-off-by: Simei Su --- drivers/net/igc/igc_ethdev.c | 222 +++ drivers/net/igc/igc_ethdev.h | 4 +- drivers/net/igc/igc_txrx.c | 50 +- drivers/net/igc/igc_txrx.h | 1 + 4 files changed, 272 insertions(+), 5 deletions(-) diff --git a/drivers/net/igc/igc_ethdev.c b/drivers/net/igc/igc_ethdev.c index dcd262f..ef3346b 100644 --- a/drivers/net/igc/igc_ethdev.c +++ b/drivers/net/igc/igc_ethdev.c @@ -78,6 +78,16 @@ #define IGC_ALARM_INTERVAL 800u /* us, about 13.6s some per-queue registers will wrap around back to 0. */ +/* Transmit and receive latency (for PTP timestamps) */ +#define IGC_I225_TX_LATENCY_10 240 +#define IGC_I225_TX_LATENCY_10058 +#define IGC_I225_TX_LATENCY_1000 80 +#define IGC_I225_TX_LATENCY_2500 1325 +#define IGC_I225_RX_LATENCY_10 6450 +#define IGC_I225_RX_LATENCY_100185 +#define IGC_I225_RX_LATENCY_1000 300 +#define IGC_I225_RX_LATENCY_2500 1485 + static const struct rte_eth_desc_lim rx_desc_lim = { .nb_max = IGC_MAX_RXD, .nb_min = IGC_MIN_RXD, @@ -245,6 +255,18 @@ eth_igc_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on); static int eth_igc_vlan_offload_set(struct rte_eth_dev *dev, int mask); static int eth_igc_vlan_tpid_set(struct rte_eth_dev *dev, enum rte_vlan_type vlan_type, uint16_t tpid); +static int eth_igc_timesync_enable(struct rte_eth_dev *dev); +static int eth_igc_timesync_disable(struct rte_eth_dev *dev); +static int eth_igc_timesync_read_rx_timestamp(struct rte_eth_dev *dev, + struct timespec *timestamp, + uint32_t flags); +static int eth_igc_timesync_read_tx_timestamp(struct rte_eth_dev *dev, + struct timespec *timestamp); +static int eth_igc_timesync_adjust_time(struct rte_eth_dev *dev, int64_t delta); +static int eth_igc_timesync_read_time(struct rte_eth_dev *dev, + struct timespec *timestamp); +static int eth_igc_timesync_write_time(struct rte_eth_dev *dev, + const struct timespec *timestamp); static const struct eth_dev_ops eth_igc_ops = { .dev_configure = eth_igc_configure, @@ -298,6 +320,13 @@ static const struct eth_dev_ops eth_igc_ops = { .vlan_tpid_set = eth_igc_vlan_tpid_set, .vlan_strip_queue_set = eth_igc_vlan_strip_queue_set, .flow_ops_get = eth_igc_flow_ops_get, + .timesync_enable= eth_igc_timesync_enable, + .timesync_disable = eth_igc_timesync_disable, + .timesync_read_rx_timestamp = eth_igc_timesync_read_rx_timestamp, + .timesync_read_tx_timestamp = eth_igc_timesync_read_tx_timestamp, + .timesync_adjust_time = eth_igc_timesync_adjust_time, + .timesync_read_time = eth_igc_timesync_read_time, + .timesync_write_time= eth_igc_timesync_write_time, }; /* @@ -2582,6 +2611,199 @@ eth_igc_vlan_tpid_set(struct rte_eth_dev *dev, } static int +eth_igc_timesync_enable(struct rte_eth_dev *dev) +{ + struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev); + struct timespec system_time; + struct igc_rx_queue *rxq; + uint32_t val; + uint16_t i; + + IGC_WRITE_REG(hw, IGC_TSAUXC, 0x0); + + clock_gettime(CLOCK_REALTIME, &system_time); + IGC_WRITE_REG(hw, IGC_SYSTIML, system_time.tv_nsec); + IGC_WRITE_REG(hw, IGC_SYSTIMH, system_time.tv_sec); + + /* Enable timestamping of received PTP packets. */ + val = IGC_READ_REG(hw, IGC_RXPBS); + val |= IGC_RXPBS_CFG_TS_EN; + IGC_WRITE_REG(hw, IGC_RXPBS, val); + + for (i = 0; i < dev->data->nb_rx_queues; i++) { + val = IGC_READ_REG(hw, IGC_SRRCTL(i)); + /* For now, only support retrieving Rx timestamp from timer0. */ + val |= IGC_SRRCTL_TIMER1SEL(0) | IGC_SRRCTL_TIMER0SEL(0) | + IGC_SRRCTL_TIMESTAMP; + IGC_WRITE_REG(hw, IGC_SRRCTL(i), val); + } + + val = IGC_TSYNCRXCTL_ENABLED | IGC_TSYNCRXCTL_TYPE_ALL | + IGC_TSYNCRXCTL_RXSYNSIG; + IGC_WRITE_REG(hw, IGC_TSYNCRXCTL, val); + + /* Enable Timestamping of transmitted PTP packets. */ + IGC_WRITE_REG(hw, IGC_TSYNCTXCTL, IGC_TSYNCTXCTL_ENABLED | + IGC_TSYNCTXCTL_TXSYNSIG); + + /* Read TXSTMP registers to discard any timestamp previously stored. */ + IGC_READ_REG(hw, IGC_TXSTMPL); + IGC_READ_REG(hw, IGC_TXSTMPH); + + for (i = 0; i < dev->data->nb_rx_queues; i++) { + rxq = dev->data->rx_queues[i]; + rxq->offlo
[PATCH 0/2] net/igc: support Tx timestamp offload
[PATCH 1/2] add related definitions to support Tx timestamp offload. [PATCH 2/2] enable Tx timestamp offload by "Launch Time". Simei Su (2): net/igc/base: support Tx timestamp offload net/igc: enable Tx timestamp offload drivers/net/igc/base/igc_defines.h | 9 + drivers/net/igc/base/igc_regs.h| 8 + drivers/net/igc/igc_ethdev.c | 70 ++ drivers/net/igc/igc_ethdev.h | 6 +++- drivers/net/igc/igc_txrx.c | 58 ++- drivers/net/igc/igc_txrx.h | 3 ++ 6 files changed, 145 insertions(+), 9 deletions(-) -- 2.9.5
[PATCH 1/2] net/igc/base: support Tx timestamp offload
Add definitions for Tx timestamp offload "RTE_ETH_TX_OFFLOAD_SEND_ON_TIMESTAMP". Signed-off-by: Simei Su --- drivers/net/igc/base/igc_defines.h | 9 + drivers/net/igc/base/igc_regs.h| 8 2 files changed, 17 insertions(+) diff --git a/drivers/net/igc/base/igc_defines.h b/drivers/net/igc/base/igc_defines.h index dd7330a..280570b 100644 --- a/drivers/net/igc/base/igc_defines.h +++ b/drivers/net/igc/base/igc_defines.h @@ -188,6 +188,15 @@ #define IGC_RCTL_BSEX 0x0200 /* Buffer size extension */ #define IGC_RCTL_SECRC 0x0400 /* Strip Ethernet CRC */ +#define IGC_DTXMXPKTSZ_TSN 0x19 /* 1600 bytes of max TX DMA packet size */ +#define IGC_TXPBSIZE_TSN 0x04145145 /* 5k bytes buffer for each queue */ + +/* Transmit Scheduling */ +#define IGC_TQAVCTRL_TRANSMIT_MODE_TSN 0x0001 +#define IGC_TQAVCTRL_ENHANCED_QAV 0x0008 + +#define IGC_TXQCTL_QUEUE_MODE_LAUNCHT 0x0001 + /* Use byte values for the following shift parameters * Usage: * psrctl |= (((ROUNDUP(value0, 128) >> IGC_PSRCTL_BSIZE0_SHIFT) & diff --git a/drivers/net/igc/base/igc_regs.h b/drivers/net/igc/base/igc_regs.h index d424387..e423814 100644 --- a/drivers/net/igc/base/igc_regs.h +++ b/drivers/net/igc/base/igc_regs.h @@ -602,6 +602,14 @@ #define IGC_RXMTRL 0x0B634 /* Time sync Rx EtherType and Msg Type - RW */ #define IGC_RXUDP 0x0B638 /* Time Sync Rx UDP Port - RW */ +#define IGC_QBVCYCLET 0x331C +#define IGC_QBVCYCLET_S 0x3320 +#define IGC_STQT(_n) (0x3324 + 0x4 * (_n)) +#define IGC_ENDQT(_n) (0x3334 + 0x4 * (_n)) +#define IGC_TXQCTL(_n) (0x3344 + 0x4 * (_n)) +#define IGC_BASET_L0x3314 +#define IGC_BASET_H0x3318 + /* Filtering Registers */ #define IGC_SAQF(_n) (0x05980 + (4 * (_n))) /* Source Address Queue Fltr */ #define IGC_DAQF(_n) (0x059A0 + (4 * (_n))) /* Dest Address Queue Fltr */ -- 2.9.5
[PATCH 2/2] net/igc: enable Tx timestamp offload
This patch supports Tx timestamp offload by leveraging NIC's "Launch Time" for "RTE_ETH_TX_OFFLOAD_SEND_ON_TIMESTAMP". Signed-off-by: Simei Su --- drivers/net/igc/igc_ethdev.c | 70 drivers/net/igc/igc_ethdev.h | 6 +++- drivers/net/igc/igc_txrx.c | 58 +++- drivers/net/igc/igc_txrx.h | 3 ++ 4 files changed, 128 insertions(+), 9 deletions(-) diff --git a/drivers/net/igc/igc_ethdev.c b/drivers/net/igc/igc_ethdev.c index ef3346b..28f6cd5 100644 --- a/drivers/net/igc/igc_ethdev.c +++ b/drivers/net/igc/igc_ethdev.c @@ -88,6 +88,9 @@ #define IGC_I225_RX_LATENCY_1000 300 #define IGC_I225_RX_LATENCY_2500 1485 +uint64_t igc_timestamp_dynflag; +int igc_timestamp_dynfield_offset = -1; + static const struct rte_eth_desc_lim rx_desc_lim = { .nb_max = IGC_MAX_RXD, .nb_min = IGC_MIN_RXD, @@ -267,6 +270,7 @@ static int eth_igc_timesync_read_time(struct rte_eth_dev *dev, struct timespec *timestamp); static int eth_igc_timesync_write_time(struct rte_eth_dev *dev, const struct timespec *timestamp); +static int eth_igc_read_clock(struct rte_eth_dev *dev, uint64_t *clock); static const struct eth_dev_ops eth_igc_ops = { .dev_configure = eth_igc_configure, @@ -327,6 +331,7 @@ static const struct eth_dev_ops eth_igc_ops = { .timesync_adjust_time = eth_igc_timesync_adjust_time, .timesync_read_time = eth_igc_timesync_read_time, .timesync_write_time= eth_igc_timesync_write_time, + .read_clock = eth_igc_read_clock, }; /* @@ -949,7 +954,12 @@ eth_igc_start(struct rte_eth_dev *dev) struct igc_adapter *adapter = IGC_DEV_PRIVATE(dev); struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); struct rte_intr_handle *intr_handle = pci_dev->intr_handle; + uint32_t nsec, sec, baset_l, baset_h, tqavctrl; + struct timespec system_time; + int64_t n, systime; + uint32_t txqctl = 0; uint32_t *speeds; + uint16_t i; int ret; PMD_INIT_FUNC_TRACE(); @@ -1009,6 +1019,55 @@ eth_igc_start(struct rte_eth_dev *dev) return ret; } + if (igc_timestamp_dynflag > 0) { + adapter->base_time = 0; + adapter->cycle_time = NSEC_PER_SEC; + + IGC_WRITE_REG(hw, IGC_TSSDP, 0); + IGC_WRITE_REG(hw, IGC_TSIM, TSINTR_TXTS); + IGC_WRITE_REG(hw, IGC_IMS, IGC_ICR_TS); + + IGC_WRITE_REG(hw, IGC_TSAUXC, 0); + IGC_WRITE_REG(hw, IGC_I350_DTXMXPKTSZ, IGC_DTXMXPKTSZ_TSN); + IGC_WRITE_REG(hw, IGC_TXPBS, IGC_TXPBSIZE_TSN); + + tqavctrl = IGC_READ_REG(hw, IGC_I210_TQAVCTRL); + tqavctrl |= IGC_TQAVCTRL_TRANSMIT_MODE_TSN | + IGC_TQAVCTRL_ENHANCED_QAV; + IGC_WRITE_REG(hw, IGC_I210_TQAVCTRL, tqavctrl); + + IGC_WRITE_REG(hw, IGC_QBVCYCLET_S, adapter->cycle_time); + IGC_WRITE_REG(hw, IGC_QBVCYCLET, adapter->cycle_time); + + for (i = 0; i < dev->data->nb_tx_queues; i++) { + IGC_WRITE_REG(hw, IGC_STQT(i), 0); + IGC_WRITE_REG(hw, IGC_ENDQT(i), NSEC_PER_SEC); + + txqctl |= IGC_TXQCTL_QUEUE_MODE_LAUNCHT; + IGC_WRITE_REG(hw, IGC_TXQCTL(i), txqctl); + } + + clock_gettime(CLOCK_REALTIME, &system_time); + IGC_WRITE_REG(hw, IGC_SYSTIML, system_time.tv_nsec); + IGC_WRITE_REG(hw, IGC_SYSTIMH, system_time.tv_sec); + + nsec = IGC_READ_REG(hw, IGC_SYSTIML); + sec = IGC_READ_REG(hw, IGC_SYSTIMH); + systime = (int64_t)sec * NSEC_PER_SEC + (int64_t)nsec; + + if (systime > adapter->base_time) { + n = (systime - adapter->base_time) / +adapter->cycle_time; + adapter->base_time = adapter->base_time + + (n + 1) * adapter->cycle_time; + } + + baset_h = adapter->base_time / NSEC_PER_SEC; + baset_l = adapter->base_time % NSEC_PER_SEC; + IGC_WRITE_REG(hw, IGC_BASET_H, baset_h); + IGC_WRITE_REG(hw, IGC_BASET_L, baset_l); + } + igc_clear_hw_cntrs_base_generic(hw); /* VLAN Offload Settings */ @@ -2804,6 +2863,17 @@ eth_igc_timesync_disable(struct rte_eth_dev *dev) } static int +eth_igc_read_clock(__rte_unused struct rte_eth_dev *dev, uint64_t *clock) +{ + struct timespec system_time; + + clock_gettime(CLOCK_REALTIME, &system_time); + *clock = system_time.tv_sec * NSEC_PER_SEC + system_time.tv_nsec; + + return 0; +} + +static int eth_igc_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, struct rte_pc
[PATCH] app/crypto-perf: fix number of segments
When segment size is provided, the total number of segments would be calculated. Segment size updates due to headroom/tailroom need to be accounted for when determining total number of segments required. Fixes: c1670ae0022b ("app/crypto-perf: honour min headroom/tailroom") Signed-off-by: Anoob Joseph Signed-off-by: Akhil Goyal --- app/test-crypto-perf/cperf_test_common.c | 8 +--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/app/test-crypto-perf/cperf_test_common.c b/app/test-crypto-perf/cperf_test_common.c index 00aadc9a47..cc03b33ac5 100644 --- a/app/test-crypto-perf/cperf_test_common.c +++ b/app/test-crypto-perf/cperf_test_common.c @@ -198,9 +198,11 @@ cperf_alloc_common_memory(const struct cperf_options *options, RTE_CACHE_LINE_ROUNDUP(crypto_op_total_size); uint32_t mbuf_size = sizeof(struct rte_mbuf) + options->segment_sz; uint32_t max_size = options->max_buffer_size + options->digest_sz; - uint16_t segments_nb = (max_size % options->segment_sz) ? - (max_size / options->segment_sz) + 1 : - max_size / options->segment_sz; + uint32_t segment_data_len = options->segment_sz - options->headroom_sz - + options->tailroom_sz; + uint16_t segments_nb = (max_size % segment_data_len) ? + (max_size / segment_data_len) + 1 : + (max_size / segment_data_len); uint32_t obj_size = crypto_op_total_size_padded + (mbuf_size * segments_nb); -- 2.25.1
RE: [PATCH] net/iavf:fix slow memory allocation
> -Original Message- > From: Ferruh Yigit > Sent: 2022年12月13日 21:28 > To: You, KaisenX ; dev@dpdk.org; Burakov, > Anatoly ; David Marchand > > Cc: sta...@dpdk.org; Yang, Qiming ; Zhou, YidingX > ; Wu, Jingjing ; Xing, > Beilei ; Zhang, Qi Z ; Luca > Boccassi ; Mcnamara, John > ; Kevin Traynor > Subject: Re: [PATCH] net/iavf:fix slow memory allocation > > On 12/13/2022 9:35 AM, Ferruh Yigit wrote: > > On 12/13/2022 7:52 AM, You, KaisenX wrote: > >> > >> > >>> -Original Message- > >>> From: Ferruh Yigit > >>> Sent: 2022年12月8日 23:04 > >>> To: You, KaisenX ; dev@dpdk.org; Burakov, > >>> Anatoly ; David Marchand > >>> > >>> Cc: sta...@dpdk.org; Yang, Qiming ; Zhou, > >>> YidingX ; Wu, Jingjing > >>> ; Xing, Beilei ; > >>> Zhang, Qi Z ; Luca Boccassi > >>> ; Mcnamara, John ; > Kevin > >>> Traynor > >>> Subject: Re: [PATCH] net/iavf:fix slow memory allocation > >>> > >>> On 11/17/2022 6:57 AM, Kaisen You wrote: > In some cases, the DPDK does not allocate hugepage heap memory to > >>> some > sockets due to the user setting parameters (e.g. -l 40-79, SOCKET 0 > has no memory). > When the interrupt thread runs on the corresponding core of this > socket, each allocation/release will execute a whole set of heap > allocation/release operations,resulting in poor performance. > Instead we call malloc() to get memory from the system's heap space > to fix this problem. > > >>> > >>> Hi Kaisen, > >>> > >>> Using libc malloc can improve performance for this case, but I would > >>> like to understand root cause of the problem. > >>> > >>> > >>> As far as I can see, interrupt callbacks are run by interrupt thread > >>> ("eal-intr- thread"), and interrupt thread created by > 'rte_ctrl_thread_create()' API. > >>> > >>> 'rte_ctrl_thread_create()' comment mentions that "CPU affinity > >>> retrieved at the time 'rte_eal_init()' was called," > >>> > >>> And 'rte_eal_init()' is run on main lcore, which is the first lcore > >>> in the core list (unless otherwise defined with --main-lcore). > >>> > >>> So, the interrupts should be running on a core that has hugepages > >>> allocated for it, am I missing something here? > >>> > >>> > >> Thank for your comments. Let me try to explain the root cause here: > >> eal_intr_thread the CPU in the corresponding slot does not create > memory pool. > >> That results in frequent memory subsequently creating/destructing. > >> > >> When testpmd started, the parameter (e.g. -l 40-79) is set. > >> Different OS has different topology. Some OS like SUSE only creates > >> memory pool for one CPU slot, while other system creates for two. > >> That is why the problem occurs when using memories in different OS. > > > > > > It is testpmd application that decides from which socket to allocate > > memory from, right. This is nothing specific to OS. > > > > As far as I remember, testpmd logic is too allocate from socket that > > its cores are used (provided with -l parameter), and allocate from > > socket that device is attached to. > > > > So, in a dual socket system, if all used cores are in socket 1 and the > > NIC is in socket 1, no memory is allocated for socket 0. This is to > > optimize memory consumption. > > > > > > Can you please confirm that the problem you are observing is because > > interrupt handler is running on a CPU, which doesn't have memory > > allocated for its socket? > > > > In this case what I don't understand is why interrupts is not running > > on main lcore, which should be first core in the list, for "-l 40-79" > > sample it should be lcore 40. > > For your case, is interrupt handler run on core 0? Or any arbitrary core? > > If so, can you please confirm when you provide core list as "-l 0,40-79" > > fixes the issue? > > First of all, sorry to reply to you so late. I can confirm that the problem I observed is because interrupt handler is running on a CPU, which doesn't have memory allocated for its socket. In my case, interrupt handler is running on core 0. I tried providing "-l 0,40-79" as a startup parameter, this issue can be resolved. I corrected the previous statement that this problem does only occur on the SUSE system. In any OS, this problem occurs as long as the range of startup parameters is only on node1. > > > >>> > >>> > >>> And what about using 'rte_malloc_socket()' API (instead of > >>> rte_malloc), which gets 'socket' as parameter, and provide the > >>> socket that devices is on as parameter to this API? Is it possible to test > this? > >>> > >>> > >> As to the reason for not using rte_malloc_socket. I thought > >> rte_malloc_socket() could solve the problem too. And the appropriate > >> parameter should be the socket_id that created the memory pool for > >> DPDK initialization. Assuming that> the socket_id of the initially > >> allocated memory = 1, first let the > > eal_intr_thread > >> determine if it is on the socket_id, then record this socket_id in > >> the eal_intr_thread and pass it
[PATCH v2 0/3] support match icmpv6 ID and sequence
Currently, rte_flow API does not support matching ID and sequence fields of icmp6 echo packets. This patchset is used to support match icmpv6 ID and sequence in rte_flow. It adds needed API in rte_flow, and gives corresponding implementation for mlx5 pmd. Leo Xu (3): ethdev: add ICMPv6 ID and sequence net/mlx5: add ICMPv6 ID and sequence match support net/mlx5/hws: add ICMPv6 ID and sequence match support --- v2: * rebase 23.03 app/test-pmd/cmdline_flow.c | 70 doc/guides/nics/mlx5.rst| 2 +- doc/guides/prog_guide/rte_flow.rst | 14 doc/guides/rel_notes/release_23_03.rst | 10 +++ doc/guides/testpmd_app_ug/testpmd_funcs.rst | 10 +++ drivers/net/mlx5/hws/mlx5dr_definer.c | 88 + drivers/net/mlx5/mlx5_flow.c| 61 ++ drivers/net/mlx5/mlx5_flow.h| 4 + drivers/net/mlx5/mlx5_flow_dv.c | 76 ++ drivers/net/mlx5/mlx5_flow_hw.c | 2 + lib/ethdev/rte_flow.c | 4 + lib/ethdev/rte_flow.h | 25 ++ lib/net/meson.build | 1 + lib/net/rte_icmp6.h | 48 +++ 14 files changed, 414 insertions(+), 1 deletion(-) create mode 100644 lib/net/rte_icmp6.h -- 2.27.0
[PATCH v2 1/3] ethdev: add ICMPv6 ID and sequence
This patch adds API support for ICMPv6 ID and sequence. 1: Add two new pattern item types for ICMPv6 echo request and reply: RTE_FLOW_ITEM_TYPE_ICMP6_ECHO_REQUEST RTE_FLOW_ITEM_TYPE_ICMP6_ECHO_REPLY 2: Add new header file rte_icmp6.h for ICMPv6 packet definitions. 3: Enhance testpmd flow pattern to support ICMPv6 identifier and sequence. Example of ICMPv6 echo pattern in testpmd command: pattern eth / ipv6 / icmp6_echo_request / end pattern eth / ipv6 / icmp6_echo_reply / end pattern eth / ipv6 / icmp6_echo_request ident is 20 seq is 30 / end Signed-off-by: Leo Xu Signed-off-by: Bing Zhao --- app/test-pmd/cmdline_flow.c | 70 + doc/guides/prog_guide/rte_flow.rst | 14 + doc/guides/rel_notes/release_23_03.rst | 4 ++ doc/guides/testpmd_app_ug/testpmd_funcs.rst | 10 +++ lib/ethdev/rte_flow.c | 4 ++ lib/ethdev/rte_flow.h | 25 lib/net/meson.build | 1 + lib/net/rte_icmp6.h | 48 ++ 8 files changed, 176 insertions(+) create mode 100644 lib/net/rte_icmp6.h diff --git a/app/test-pmd/cmdline_flow.c b/app/test-pmd/cmdline_flow.c index 88108498e0..7dc1528899 100644 --- a/app/test-pmd/cmdline_flow.c +++ b/app/test-pmd/cmdline_flow.c @@ -360,6 +360,12 @@ enum index { ITEM_ICMP6, ITEM_ICMP6_TYPE, ITEM_ICMP6_CODE, + ITEM_ICMP6_ECHO_REQUEST, + ITEM_ICMP6_ECHO_REQUEST_ID, + ITEM_ICMP6_ECHO_REQUEST_SEQ, + ITEM_ICMP6_ECHO_REPLY, + ITEM_ICMP6_ECHO_REPLY_ID, + ITEM_ICMP6_ECHO_REPLY_SEQ, ITEM_ICMP6_ND_NS, ITEM_ICMP6_ND_NS_TARGET_ADDR, ITEM_ICMP6_ND_NA, @@ -1327,6 +1333,8 @@ static const enum index next_item[] = { ITEM_IPV6_EXT, ITEM_IPV6_FRAG_EXT, ITEM_ICMP6, + ITEM_ICMP6_ECHO_REQUEST, + ITEM_ICMP6_ECHO_REPLY, ITEM_ICMP6_ND_NS, ITEM_ICMP6_ND_NA, ITEM_ICMP6_ND_OPT, @@ -1575,6 +1583,20 @@ static const enum index item_icmp6[] = { ZERO, }; +static const enum index item_icmp6_echo_request[] = { + ITEM_ICMP6_ECHO_REQUEST_ID, + ITEM_ICMP6_ECHO_REQUEST_SEQ, + ITEM_NEXT, + ZERO, +}; + +static const enum index item_icmp6_echo_reply[] = { + ITEM_ICMP6_ECHO_REPLY_ID, + ITEM_ICMP6_ECHO_REPLY_SEQ, + ITEM_NEXT, + ZERO, +}; + static const enum index item_icmp6_nd_ns[] = { ITEM_ICMP6_ND_NS_TARGET_ADDR, ITEM_NEXT, @@ -4323,6 +4345,54 @@ static const struct token token_list[] = { .args = ARGS(ARGS_ENTRY_HTON(struct rte_flow_item_icmp6, code)), }, + [ITEM_ICMP6_ECHO_REQUEST] = { + .name = "icmp6_echo_request", + .help = "match ICMPv6 echo request", + .priv = PRIV_ITEM(ICMP6_ECHO_REQUEST, + sizeof(struct rte_flow_item_icmp6_echo)), + .next = NEXT(item_icmp6_echo_request), + .call = parse_vc, + }, + [ITEM_ICMP6_ECHO_REQUEST_ID] = { + .name = "ident", + .help = "ICMPv6 echo request identifier", + .next = NEXT(item_icmp6_echo_request, NEXT_ENTRY(COMMON_UNSIGNED), +item_param), + .args = ARGS(ARGS_ENTRY_HTON(struct rte_flow_item_icmp6_echo, +echo.identifier)), + }, + [ITEM_ICMP6_ECHO_REQUEST_SEQ] = { + .name = "seq", + .help = "ICMPv6 echo request sequence", + .next = NEXT(item_icmp6_echo_request, NEXT_ENTRY(COMMON_UNSIGNED), +item_param), + .args = ARGS(ARGS_ENTRY_HTON(struct rte_flow_item_icmp6_echo, +echo.sequence)), + }, + [ITEM_ICMP6_ECHO_REPLY] = { + .name = "icmp6_echo_reply", + .help = "match ICMPv6 echo reply", + .priv = PRIV_ITEM(ICMP6_ECHO_REPLY, + sizeof(struct rte_flow_item_icmp6_echo)), + .next = NEXT(item_icmp6_echo_reply), + .call = parse_vc, + }, + [ITEM_ICMP6_ECHO_REPLY_ID] = { + .name = "ident", + .help = "ICMPv6 echo reply identifier", + .next = NEXT(item_icmp6_echo_reply, NEXT_ENTRY(COMMON_UNSIGNED), +item_param), + .args = ARGS(ARGS_ENTRY_HTON(struct rte_flow_item_icmp6_echo, +echo.identifier)), + }, + [ITEM_ICMP6_ECHO_REPLY_SEQ] = { + .name = "seq", + .help = "ICMPv6 echo reply sequence", + .next = NEXT(item_icmp6_echo_reply, NEXT_ENTRY(COMMON_UNSIGNED), +item_param), + .args = ARGS(ARGS_ENTRY_HTON(struct rte_flow_item_icmp6_echo
[PATCH v2 2/3] net/mlx5: add ICMPv6 ID and sequence match support
This patch adds ICMPv6 ID and sequence match support. Since type and code of ICMPv6 echo is already specified by ITEM type: RTE_FLOW_ITEM_TYPE_ICMP6_ECHO_REQUEST RTE_FLOW_ITEM_TYPE_ICMP6_ECHO_REPLY mlx5 pmd will set appropriate type and code automatically: Echo request: type(128), code(0) Echo reply: type(129), code(0) type and code provided by application will be ignored. Signed-off-by: Leo Xu --- doc/guides/nics/mlx5.rst | 2 +- doc/guides/rel_notes/release_23_03.rst | 6 ++ drivers/net/mlx5/mlx5_flow.c | 61 + drivers/net/mlx5/mlx5_flow.h | 4 ++ drivers/net/mlx5/mlx5_flow_dv.c| 76 ++ drivers/net/mlx5/mlx5_flow_hw.c| 2 + 6 files changed, 150 insertions(+), 1 deletion(-) diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst index 51f51259e3..78693d19b0 100644 --- a/doc/guides/nics/mlx5.rst +++ b/doc/guides/nics/mlx5.rst @@ -405,7 +405,7 @@ Limitations - The input buffer, providing the removal size, is not validated. - The buffer size must match the length of the headers to be removed. -- ICMP(code/type/identifier/sequence number) / ICMP6(code/type) matching, IP-in-IP and MPLS flow matching are all +- ICMP(code/type/identifier/sequence number) / ICMP6(code/type/identifier/sequence number) matching, IP-in-IP and MPLS flow matching are all mutually exclusive features which cannot be supported together (see :ref:`mlx5_firmware_config`). diff --git a/doc/guides/rel_notes/release_23_03.rst b/doc/guides/rel_notes/release_23_03.rst index 5af9c43dd9..011c2489f7 100644 --- a/doc/guides/rel_notes/release_23_03.rst +++ b/doc/guides/rel_notes/release_23_03.rst @@ -60,6 +60,12 @@ New Features Added ``icmp6_echo`` item in rte_flow to support ID and sequence matching in ICMPv6 echo request/reply packets. +* **Updated Mellanox mlx5 driver.** + + Updated the Mellanox mlx5 driver with new features and improvements, including: + + * Added support for matching on ICMPv6 ID and sequence fields. + Removed Items - diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c index a0cf677fb0..8eea78251e 100644 --- a/drivers/net/mlx5/mlx5_flow.c +++ b/drivers/net/mlx5/mlx5_flow.c @@ -2352,6 +2352,67 @@ mlx5_flow_validate_item_icmp6(const struct rte_flow_item *item, return 0; } +/** + * Validate ICMP6 echo request/reply item. + * + * @param[in] item + * Item specification. + * @param[in] item_flags + * Bit-fields that holds the items detected until now. + * @param[in] ext_vlan_sup + * Whether extended VLAN features are supported or not. + * @param[out] error + * Pointer to error structure. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +int +mlx5_flow_validate_item_icmp6_echo(const struct rte_flow_item *item, + uint64_t item_flags, + uint8_t target_protocol, + struct rte_flow_error *error) +{ + const struct rte_flow_item_icmp6_echo *mask = item->mask; + const struct rte_flow_item_icmp6_echo nic_mask = { + .echo.hdr.type = 0xff, + .echo.hdr.code = 0xff, + .echo.identifier = RTE_BE16(0x), + .echo.sequence = RTE_BE16(0x), + }; + const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); + const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 : + MLX5_FLOW_LAYER_OUTER_L3_IPV6; + const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 : + MLX5_FLOW_LAYER_OUTER_L4; + int ret; + + if (target_protocol != 0xFF && target_protocol != IPPROTO_ICMPV6) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, item, + "protocol filtering not compatible" + " with ICMP6 layer"); + if (!(item_flags & l3m)) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, item, + "IPv6 is mandatory to filter on" + " ICMP6"); + if (item_flags & l4m) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, item, + "multiple L4 layers not supported"); + if (!mask) + mask = &nic_mask; + ret = mlx5_flow_item_acceptable + (item, (const uint8_t *)mask, +(const uint8_t *)&nic_mask, +sizeof(struct rte_flow_item_icmp6_echo), +MLX5_ITEM_RANGE_NOT_ACCEPTED, error); + if (ret < 0) + return ret; + return 0; +} + /** *
[PATCH v2 3/3] net/mlx5/hws: add ICMPv6 ID and sequence match support
This patch adds ICMPv6 ID and sequence match support for HWS. Since type and code of ICMPv6 echo is already specified by ITEM type: RTE_FLOW_ITEM_TYPE_ICMP6_ECHO_REQUEST RTE_FLOW_ITEM_TYPE_ICMP6_ECHO_REPLY mlx5 pmd will set appropriate type and code automatically: Echo request: type(128), code(0) Echo reply: type(129), code(0) type and code provided by application will be ignored This patch also fixes these issues in ICMP definer. 1. Parsing inner ICMP item gets and overwrites the outer IP_PROTOCOL function, which will remove the outer L4 match incorrectly. Fix this by getting correct inner function. 2. Member order of mlx5_ifc_header_icmp_bits doesn't follow ICMP format. Reorder them to make it more consistent. Signed-off-by: Leo Xu --- drivers/net/mlx5/hws/mlx5dr_definer.c | 88 +++ 1 file changed, 88 insertions(+) diff --git a/drivers/net/mlx5/hws/mlx5dr_definer.c b/drivers/net/mlx5/hws/mlx5dr_definer.c index 6b98eb8c96..8fbc40ff15 100644 --- a/drivers/net/mlx5/hws/mlx5dr_definer.c +++ b/drivers/net/mlx5/hws/mlx5dr_definer.c @@ -368,6 +368,47 @@ mlx5dr_definer_icmp6_dw1_set(struct mlx5dr_definer_fc *fc, DR_SET(tag, icmp_dw1, fc->byte_off, fc->bit_off, fc->bit_mask); } +static void +mlx5dr_definer_icmp6_echo_dw1_mask_set(struct mlx5dr_definer_fc *fc, + __rte_unused const void *item_spec, + uint8_t *tag) +{ + const struct rte_flow_item_icmp6 spec = {0xFF, 0xFF, 0x0}; + mlx5dr_definer_icmp6_dw1_set(fc, &spec, tag); +} + +static void +mlx5dr_definer_icmp6_echo_request_dw1_set(struct mlx5dr_definer_fc *fc, + __rte_unused const void *item_spec, + uint8_t *tag) +{ + const struct rte_flow_item_icmp6 spec = {RTE_ICMP6_ECHO_REQUEST, 0, 0}; + mlx5dr_definer_icmp6_dw1_set(fc, &spec, tag); +} + +static void +mlx5dr_definer_icmp6_echo_reply_dw1_set(struct mlx5dr_definer_fc *fc, + __rte_unused const void *item_spec, + uint8_t *tag) +{ + const struct rte_flow_item_icmp6 spec = {RTE_ICMP6_ECHO_REPLY, 0, 0}; + mlx5dr_definer_icmp6_dw1_set(fc, &spec, tag); +} + +static void +mlx5dr_definer_icmp6_echo_dw2_set(struct mlx5dr_definer_fc *fc, + const void *item_spec, + uint8_t *tag) +{ + const struct rte_flow_item_icmp6_echo *v = item_spec; + rte_be32_t dw2; + + dw2 = (rte_be_to_cpu_16(v->echo.identifier) << __mlx5_dw_bit_off(header_icmp, ident)) | + (rte_be_to_cpu_16(v->echo.sequence) << __mlx5_dw_bit_off(header_icmp, seq_nb)); + + DR_SET(tag, dw2, fc->byte_off, fc->bit_off, fc->bit_mask); +} + static void mlx5dr_definer_ipv6_flow_label_set(struct mlx5dr_definer_fc *fc, const void *item_spec, @@ -1441,6 +1482,48 @@ mlx5dr_definer_conv_item_icmp6(struct mlx5dr_definer_conv_data *cd, return 0; } +static int +mlx5dr_definer_conv_item_icmp6_echo(struct mlx5dr_definer_conv_data *cd, + struct rte_flow_item *item, + int item_idx) +{ + const struct rte_flow_item_icmp6_echo *m = item->mask; + struct mlx5dr_definer_fc *fc; + bool inner = cd->tunnel; + + if (!cd->relaxed) { + /* Overwrite match on L4 type ICMP6 */ + fc = &cd->fc[DR_CALC_FNAME(IP_PROTOCOL, inner)]; + fc->item_idx = item_idx; + fc->tag_set = &mlx5dr_definer_icmp_protocol_set; + fc->tag_mask_set = &mlx5dr_definer_ones_set; + DR_CALC_SET(fc, eth_l2, l4_type, inner); + + /* Set fixed type and code for icmp6 echo request/reply */ + fc = &cd->fc[MLX5DR_DEFINER_FNAME_ICMP_DW1]; + fc->item_idx = item_idx; + fc->tag_mask_set = &mlx5dr_definer_icmp6_echo_dw1_mask_set; + if (item->type == RTE_FLOW_ITEM_TYPE_ICMP6_ECHO_REQUEST) + fc->tag_set = &mlx5dr_definer_icmp6_echo_request_dw1_set; + else /* RTE_FLOW_ITEM_TYPE_ICMP6_ECHO_REPLY */ + fc->tag_set = &mlx5dr_definer_icmp6_echo_reply_dw1_set; + DR_CALC_SET_HDR(fc, tcp_icmp, icmp_dw1); + } + + if (!m) + return 0; + + /* Set identifier & sequence into icmp_dw2 */ + if (m->echo.identifier || m->echo.sequence) { + fc = &cd->fc[MLX5DR_DEFINER_FNAME_ICMP_DW2]; + fc->item_idx = item_idx; + fc->tag_set = &mlx5dr_definer_icmp6_echo_dw2_set; + DR_CALC_SET_HDR(fc, tcp_icmp, icmp_dw2); + } + + return 0; +} + static int mlx5dr_definer_conv_item_meter_color(struct mlx5dr_definer_conv_data *cd, struct rte_flow_item *item, @@ -1577,6 +1660,11