[PATCH v6 0/8] support dump reigser names and filter
The registers can be dumped through the API rte_eth_dev_get_reg_info. However, only register values are exported, which is inconvenient for users to interpret. Therefore, an extension of the structure "rte_dev_reg_info" and a new API rte_eth_dev_get_reg_info_ext is added to support the capability of exporting the name of the corresponding register and filtering by module names. The hns3 driver and telemetry are examples for that. Jie Hai (8): ethdev: support report register names and filter ethdev: add telemetry cmd for registers net/hns3: remove some basic address dump net/hns3: fix dump counter of registers net/hns3: remove separators between register module net/hns3: refactor register dump net/hns3: support report names of registers net/hns3: support filter registers by module names doc/guides/rel_notes/release_24_07.rst |8 + drivers/net/hns3/hns3_regs.c | 1394 +++- lib/ethdev/ethdev_trace.h |2 + lib/ethdev/rte_dev_info.h | 11 + lib/ethdev/rte_ethdev.c| 38 + lib/ethdev/rte_ethdev.h| 29 + lib/ethdev/rte_ethdev_telemetry.c | 128 +++ lib/ethdev/version.map |3 + 8 files changed, 1347 insertions(+), 266 deletions(-) -- 2.33.0
[PATCH v6 2/8] ethdev: add telemetry cmd for registers
This patch adds a telemetry command for registers dump, and supports obtaining the registers of a specified module. In one way, the number of registers that can be exported is limited by the number of elements carried by dict and container. In another way, the length of the string exported by telemetry is limited by MAX_OUTPUT_LEN. Therefore, when the number of registers to be exported exceeds, some information will be lost. Warn on the former case. An example usage is shown below: --> /ethdev/regs,0,ring { "/ethdev/regs": { "registers_length": 318, "registers_width": 4, "register_offset": "0x0", "version": "0x1140011", "group_0": { "Q0_ring_rx_bd_num": "0x0", "Q0_ring_rx_bd_len": "0x0", ... }, "group_1": { ... }, ... } Signed-off-by: Jie Hai --- lib/ethdev/rte_ethdev_telemetry.c | 128 ++ 1 file changed, 128 insertions(+) diff --git a/lib/ethdev/rte_ethdev_telemetry.c b/lib/ethdev/rte_ethdev_telemetry.c index 6b873e7abe68..1d59c693883e 100644 --- a/lib/ethdev/rte_ethdev_telemetry.c +++ b/lib/ethdev/rte_ethdev_telemetry.c @@ -1395,6 +1395,132 @@ eth_dev_handle_port_tm_node_caps(const char *cmd __rte_unused, return ret; } +static void +eth_dev_add_reg_data(struct rte_tel_data *d, struct rte_dev_reg_info *reg_info, +uint32_t idx) +{ + if (reg_info->width == sizeof(uint32_t)) + rte_tel_data_add_dict_uint_hex(d, reg_info->names[idx].name, + *((uint32_t *)reg_info->data + idx), 0); + else + rte_tel_data_add_dict_uint_hex(d, reg_info->names[idx].name, + *((uint64_t *)reg_info->data + idx), 0); +} + +static int +eth_dev_store_regs(struct rte_tel_data *d, struct rte_dev_reg_info *reg_info) +{ + struct rte_tel_data *groups[RTE_TEL_MAX_DICT_ENTRIES]; + char group_name[RTE_TEL_MAX_STRING_LEN] = {0}; + struct rte_tel_data *group = NULL; + uint32_t grp_num = 0; + uint32_t i; + int ret; + + rte_tel_data_start_dict(d); + rte_tel_data_add_dict_uint(d, "register_length", reg_info->length); + rte_tel_data_add_dict_uint(d, "register_width", reg_info->width); + rte_tel_data_add_dict_uint_hex(d, "register_offset", reg_info->offset, 0); + rte_tel_data_add_dict_uint_hex(d, "version", reg_info->version, 0); + + for (i = 0; i < reg_info->length; i++) { + if (i % RTE_TEL_MAX_DICT_ENTRIES != 0) { + eth_dev_add_reg_data(group, reg_info, i); + continue; + } + + group = rte_tel_data_alloc(); + if (group == NULL) { + ret = -ENOMEM; + RTE_ETHDEV_LOG_LINE(WARNING, "No enough memory for group data"); + goto out; + } + groups[grp_num++] = group; + rte_tel_data_start_dict(group); + eth_dev_add_reg_data(group, reg_info, i); + } + + for (i = 0; i < grp_num; i++) { + snprintf(group_name, RTE_TEL_MAX_STRING_LEN, "group_%u", i); + ret = rte_tel_data_add_dict_container(d, group_name, groups[i], 0); + if (ret == -ENOSPC) { + RTE_ETHDEV_LOG_LINE(WARNING, + "Reduce register number to be displayed from %u to %u due to limited capacity of telemetry", + reg_info->length, i * RTE_TEL_MAX_DICT_ENTRIES); + break; + } + } + return 0; +out: + for (i = 0; i < grp_num; i++) + rte_tel_data_free(groups[i]); + + return ret; +} + +static int +eth_dev_get_port_regs(int port_id, struct rte_tel_data *d, char *filter) +{ + struct rte_dev_reg_info reg_info; + int ret; + + memset(®_info, 0, sizeof(reg_info)); + reg_info.filter = filter; + + ret = rte_eth_dev_get_reg_info_ext(port_id, ®_info); + if (ret != 0) { + RTE_ETHDEV_LOG_LINE(ERR, "Error getting device reg info: %d", ret); + return ret; + } + + reg_info.data = calloc(reg_info.length, reg_info.width); + if (reg_info.data == NULL) { + RTE_ETHDEV_LOG_LINE(ERR, "Fail to allocate memory for reg_info.data"); + return -ENOMEM; + } + + reg_info.names = calloc(reg_info.length, sizeof(struct rte_eth_reg_name)); + if (reg_info.names == NULL) { + RTE_ETHDEV_LOG_LINE(ERR, "Fail to allocate memory for reg_info.names"); + free(reg_info.data); + return -ENOMEM; + } + + ret = rte_eth_dev_get_reg_info_ext(port_id, ®_info); + if (ret != 0) { + RTE_ETHDEV_LOG_LINE(ERR, "Error getting regs from device: %d", ret); + ret = -EINVAL; + goto out; + } + + ret = eth_dev_store_regs(d, ®_in
[PATCH v6 3/8] net/hns3: remove some basic address dump
For security reasons, some address registers are not suitable to be exposed, remove them. Cc: sta...@dpdk.org Signed-off-by: Jie Hai Acked-by: Huisong Li --- drivers/net/hns3/hns3_regs.c | 12 ++-- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/drivers/net/hns3/hns3_regs.c b/drivers/net/hns3/hns3_regs.c index be1be6a89c94..53d829a4fc68 100644 --- a/drivers/net/hns3/hns3_regs.c +++ b/drivers/net/hns3/hns3_regs.c @@ -17,13 +17,9 @@ static int hns3_get_dfx_reg_line(struct hns3_hw *hw, uint32_t *lines); -static const uint32_t cmdq_reg_addrs[] = {HNS3_CMDQ_TX_ADDR_L_REG, - HNS3_CMDQ_TX_ADDR_H_REG, - HNS3_CMDQ_TX_DEPTH_REG, +static const uint32_t cmdq_reg_addrs[] = {HNS3_CMDQ_TX_DEPTH_REG, HNS3_CMDQ_TX_TAIL_REG, HNS3_CMDQ_TX_HEAD_REG, - HNS3_CMDQ_RX_ADDR_L_REG, - HNS3_CMDQ_RX_ADDR_H_REG, HNS3_CMDQ_RX_DEPTH_REG, HNS3_CMDQ_RX_TAIL_REG, HNS3_CMDQ_RX_HEAD_REG, @@ -44,9 +40,7 @@ static const uint32_t common_vf_reg_addrs[] = {HNS3_MISC_VECTOR_REG_BASE, HNS3_FUN_RST_ING, HNS3_GRO_EN_REG}; -static const uint32_t ring_reg_addrs[] = {HNS3_RING_RX_BASEADDR_L_REG, - HNS3_RING_RX_BASEADDR_H_REG, - HNS3_RING_RX_BD_NUM_REG, +static const uint32_t ring_reg_addrs[] = {HNS3_RING_RX_BD_NUM_REG, HNS3_RING_RX_BD_LEN_REG, HNS3_RING_RX_EN_REG, HNS3_RING_RX_MERGE_EN_REG, @@ -57,8 +51,6 @@ static const uint32_t ring_reg_addrs[] = {HNS3_RING_RX_BASEADDR_L_REG, HNS3_RING_RX_FBD_OFFSET_REG, HNS3_RING_RX_STASH_REG, HNS3_RING_RX_BD_ERR_REG, - HNS3_RING_TX_BASEADDR_L_REG, - HNS3_RING_TX_BASEADDR_H_REG, HNS3_RING_TX_BD_NUM_REG, HNS3_RING_TX_EN_REG, HNS3_RING_TX_PRIORITY_REG, -- 2.33.0
[PATCH v6 4/8] net/hns3: fix dump counter of registers
Since the driver dumps the queue interrupt registers according to the intr_tqps_num, the counter should be the same. Fixes: acb3260fac5c ("net/hns3: fix dump register out of range") Fixes: 936eda25e8da ("net/hns3: support dump register") Cc: sta...@dpdk.org Signed-off-by: Jie Hai Acked-by: Huisong Li Acked-by: Chengwen Feng --- drivers/net/hns3/hns3_regs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/hns3/hns3_regs.c b/drivers/net/hns3/hns3_regs.c index 53d829a4fc68..d9c546470dbe 100644 --- a/drivers/net/hns3/hns3_regs.c +++ b/drivers/net/hns3/hns3_regs.c @@ -127,7 +127,7 @@ hns3_get_regs_length(struct hns3_hw *hw, uint32_t *length) tqp_intr_lines = sizeof(tqp_intr_reg_addrs) / REG_LEN_PER_LINE + 1; len = (cmdq_lines + common_lines + ring_lines * hw->tqps_num + - tqp_intr_lines * hw->num_msi) * REG_NUM_PER_LINE; + tqp_intr_lines * hw->intr_tqps_num) * REG_NUM_PER_LINE; if (!hns->is_vf) { ret = hns3_get_regs_num(hw, ®s_num_32_bit, ®s_num_64_bit); -- 2.33.0
[PATCH v6 5/8] net/hns3: remove separators between register module
Since the driver is going to support reporting names of all registers, remove the counter and insert of separators between different register modules. Signed-off-by: Jie Hai Reviewed-by: Huisong Li Acked-by: Chengwen Feng --- drivers/net/hns3/hns3_regs.c | 68 ++-- 1 file changed, 18 insertions(+), 50 deletions(-) diff --git a/drivers/net/hns3/hns3_regs.c b/drivers/net/hns3/hns3_regs.c index d9c546470dbe..c8e3fb118e4b 100644 --- a/drivers/net/hns3/hns3_regs.c +++ b/drivers/net/hns3/hns3_regs.c @@ -10,12 +10,9 @@ #include "hns3_rxtx.h" #include "hns3_regs.h" -#define MAX_SEPARATE_NUM 4 -#define SEPARATOR_VALUE0x -#define REG_NUM_PER_LINE 4 -#define REG_LEN_PER_LINE (REG_NUM_PER_LINE * sizeof(uint32_t)) +#define HNS3_64_BIT_REG_OUTPUT_SIZE (sizeof(uint64_t) / sizeof(uint32_t)) -static int hns3_get_dfx_reg_line(struct hns3_hw *hw, uint32_t *lines); +static int hns3_get_dfx_reg_cnt(struct hns3_hw *hw, uint32_t *count); static const uint32_t cmdq_reg_addrs[] = {HNS3_CMDQ_TX_DEPTH_REG, HNS3_CMDQ_TX_TAIL_REG, @@ -111,23 +108,21 @@ static int hns3_get_regs_length(struct hns3_hw *hw, uint32_t *length) { struct hns3_adapter *hns = HNS3_DEV_HW_TO_ADAPTER(hw); - uint32_t cmdq_lines, common_lines, ring_lines, tqp_intr_lines; uint32_t regs_num_32_bit, regs_num_64_bit; - uint32_t dfx_reg_lines; + uint32_t dfx_reg_cnt; + uint32_t common_cnt; uint32_t len; int ret; - cmdq_lines = sizeof(cmdq_reg_addrs) / REG_LEN_PER_LINE + 1; if (hns->is_vf) - common_lines = - sizeof(common_vf_reg_addrs) / REG_LEN_PER_LINE + 1; + common_cnt = sizeof(common_vf_reg_addrs); else - common_lines = sizeof(common_reg_addrs) / REG_LEN_PER_LINE + 1; - ring_lines = sizeof(ring_reg_addrs) / REG_LEN_PER_LINE + 1; - tqp_intr_lines = sizeof(tqp_intr_reg_addrs) / REG_LEN_PER_LINE + 1; + common_cnt = sizeof(common_reg_addrs); - len = (cmdq_lines + common_lines + ring_lines * hw->tqps_num + - tqp_intr_lines * hw->intr_tqps_num) * REG_NUM_PER_LINE; + len = sizeof(cmdq_reg_addrs) + common_cnt + + sizeof(ring_reg_addrs) * hw->tqps_num + + sizeof(tqp_intr_reg_addrs) * hw->intr_tqps_num; + len /= sizeof(uint32_t); if (!hns->is_vf) { ret = hns3_get_regs_num(hw, ®s_num_32_bit, ®s_num_64_bit); @@ -136,18 +131,16 @@ hns3_get_regs_length(struct hns3_hw *hw, uint32_t *length) "ret = %d.", ret); return ret; } - dfx_reg_lines = regs_num_32_bit * sizeof(uint32_t) / - REG_LEN_PER_LINE + 1; - dfx_reg_lines += regs_num_64_bit * sizeof(uint64_t) / - REG_LEN_PER_LINE + 1; + dfx_reg_cnt = regs_num_32_bit + + regs_num_64_bit * HNS3_64_BIT_REG_OUTPUT_SIZE; - ret = hns3_get_dfx_reg_line(hw, &dfx_reg_lines); + ret = hns3_get_dfx_reg_cnt(hw, &dfx_reg_cnt); if (ret) { hns3_err(hw, "fail to get the number of dfx registers, " "ret = %d.", ret); return ret; } - len += dfx_reg_lines * REG_NUM_PER_LINE; + len += dfx_reg_cnt; } *length = len; @@ -268,18 +261,6 @@ hns3_get_64_bit_regs(struct hns3_hw *hw, uint32_t regs_num, void *data) return 0; } -static int -hns3_insert_reg_separator(int reg_num, uint32_t *data) -{ - int separator_num; - int i; - - separator_num = MAX_SEPARATE_NUM - reg_num % REG_NUM_PER_LINE; - for (i = 0; i < separator_num; i++) - *data++ = SEPARATOR_VALUE; - return separator_num; -} - static int hns3_direct_access_regs(struct hns3_hw *hw, uint32_t *data) { @@ -294,7 +275,6 @@ hns3_direct_access_regs(struct hns3_hw *hw, uint32_t *data) reg_num = sizeof(cmdq_reg_addrs) / sizeof(uint32_t); for (i = 0; i < reg_num; i++) *data++ = hns3_read_dev(hw, cmdq_reg_addrs[i]); - data += hns3_insert_reg_separator(reg_num, data); if (hns->is_vf) reg_num = sizeof(common_vf_reg_addrs) / sizeof(uint32_t); @@ -305,7 +285,6 @@ hns3_direct_access_regs(struct hns3_hw *hw, uint32_t *data) *data++ = hns3_read_dev(hw, common_vf_reg_addrs[i]); else *data++ = hns3_read_dev(hw, common_reg_addrs[i]); - data += hns3_insert_reg_separator(reg_num, data); reg_num = sizeof(ring_reg_addrs) / sizeof(uint32_t); for (j = 0; j < hw->tqps_num; j++) { @@ -313,7 +292,6 @@ hns3_direct_access_regs(struct hns3_hw *hw, uint32
[PATCH v6 1/8] ethdev: support report register names and filter
This patch adds "filter" and "names" fields to "rte_dev_reg_info" structure. Names of registers in data fields can be reported and the registers can be filtered by their module names. The new API rte_eth_dev_get_reg_info_ext() is added to support reporting names and filtering by modules. And the original API rte_eth_dev_get_reg_info() does not use the names and filter fields. A local variable is used in rte_eth_dev_get_reg_info for compatibility. If the drivers does not report the names, set them to "index_XXX", which means the location in the register table. Signed-off-by: Jie Hai Acked-by: Huisong Li Acked-by: Chengwen Feng --- doc/guides/rel_notes/release_24_07.rst | 8 ++ lib/ethdev/ethdev_trace.h | 2 ++ lib/ethdev/rte_dev_info.h | 11 lib/ethdev/rte_ethdev.c| 38 ++ lib/ethdev/rte_ethdev.h| 29 lib/ethdev/version.map | 3 ++ 6 files changed, 91 insertions(+) diff --git a/doc/guides/rel_notes/release_24_07.rst b/doc/guides/rel_notes/release_24_07.rst index 058609b0f36b..b0bb49c8f29e 100644 --- a/doc/guides/rel_notes/release_24_07.rst +++ b/doc/guides/rel_notes/release_24_07.rst @@ -186,6 +186,12 @@ New Features * Added defer queue reclamation via RCU. * Added SVE support for bulk lookup. +* **Added support for dumping registers with names and filtering by modules.** + + * Added new API functions ``rte_eth_dev_get_reg_info_ext()`` to filter the +registers by module names and get the information (names, values and other +attributes) of the filtered registers. + Removed Items - @@ -241,6 +247,8 @@ ABI Changes This section is a comment. Do not overwrite or remove it. Also, make sure to start the actual text at the margin. === + * ethdev: Added ``filter`` and ``names`` fields to ``rte_dev_reg_info`` + structure for filtering by modules and reporting names of registers. * No ABI change that would break compatibility with 23.11. diff --git a/lib/ethdev/ethdev_trace.h b/lib/ethdev/ethdev_trace.h index 3bec87bfdb70..0c4780a09ef5 100644 --- a/lib/ethdev/ethdev_trace.h +++ b/lib/ethdev/ethdev_trace.h @@ -1152,6 +1152,8 @@ RTE_TRACE_POINT( rte_trace_point_emit_u32(info->length); rte_trace_point_emit_u32(info->width); rte_trace_point_emit_u32(info->version); + rte_trace_point_emit_ptr(info->names); + rte_trace_point_emit_ptr(info->filter); rte_trace_point_emit_int(ret); ) diff --git a/lib/ethdev/rte_dev_info.h b/lib/ethdev/rte_dev_info.h index 67cf0ae52668..26b777f9836e 100644 --- a/lib/ethdev/rte_dev_info.h +++ b/lib/ethdev/rte_dev_info.h @@ -11,6 +11,11 @@ extern "C" { #include +#define RTE_ETH_REG_NAME_SIZE 64 +struct rte_eth_reg_name { + char name[RTE_ETH_REG_NAME_SIZE]; +}; + /* * Placeholder for accessing device registers */ @@ -20,6 +25,12 @@ struct rte_dev_reg_info { uint32_t length; /**< Number of registers to fetch */ uint32_t width; /**< Size of device register */ uint32_t version; /**< Device version */ + /** +* Name of target module, filter for target subset of registers. +* This field could affects register selection for data/length/names. +*/ + const char *filter; + struct rte_eth_reg_name *names; /**< Registers name saver */ }; /* diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c index f1c658f49e80..30ca4a0043c5 100644 --- a/lib/ethdev/rte_ethdev.c +++ b/lib/ethdev/rte_ethdev.c @@ -6388,8 +6388,37 @@ rte_eth_read_clock(uint16_t port_id, uint64_t *clock) int rte_eth_dev_get_reg_info(uint16_t port_id, struct rte_dev_reg_info *info) +{ + struct rte_dev_reg_info reg_info = { 0 }; + int ret; + + if (info == NULL) { + RTE_ETHDEV_LOG_LINE(ERR, + "Cannot get ethdev port %u register info to NULL", + port_id); + return -EINVAL; + } + + reg_info.length = info->length; + reg_info.data = info->data; + + ret = rte_eth_dev_get_reg_info_ext(port_id, ®_info); + if (ret != 0) + return ret; + + info->length = reg_info.length; + info->width = reg_info.width; + info->version = reg_info.version; + info->offset = reg_info.offset; + + return 0; +} + +int +rte_eth_dev_get_reg_info_ext(uint16_t port_id, struct rte_dev_reg_info *info) { struct rte_eth_dev *dev; + uint32_t i; int ret; RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); @@ -6402,12 +6431,21 @@ rte_eth_dev_get_reg_info(uint16_t port_id, struct rte_dev_reg_info *info) return -EINVAL; } + if (info->names != NULL && info->length != 0) + memset(info->names, 0, sizeof(struct rte_eth_reg_name) * info->length); + if (*dev->dev
[PATCH v6 6/8] net/hns3: refactor register dump
This patch refactors codes dumping registers from firmware. Signed-off-by: Jie Hai --- drivers/net/hns3/hns3_regs.c | 203 --- 1 file changed, 115 insertions(+), 88 deletions(-) diff --git a/drivers/net/hns3/hns3_regs.c b/drivers/net/hns3/hns3_regs.c index c8e3fb118e4b..89858c2b1c09 100644 --- a/drivers/net/hns3/hns3_regs.c +++ b/drivers/net/hns3/hns3_regs.c @@ -104,12 +104,93 @@ hns3_get_regs_num(struct hns3_hw *hw, uint32_t *regs_num_32_bit, return 0; } +static int +hns3_get_32_64_regs_cnt(struct hns3_hw *hw, uint32_t *count) +{ + uint32_t regs_num_32_bit, regs_num_64_bit; + int ret; + + ret = hns3_get_regs_num(hw, ®s_num_32_bit, ®s_num_64_bit); + if (ret) { + hns3_err(hw, "fail to get the number of registers, " +"ret = %d.", ret); + return ret; + } + + *count += regs_num_32_bit + regs_num_64_bit * HNS3_64_BIT_REG_OUTPUT_SIZE; + return 0; +} + +static int +hns3_get_dfx_reg_bd_num(struct hns3_hw *hw, uint32_t *bd_num_list, + uint32_t list_size) +{ +#define HNS3_GET_DFX_REG_BD_NUM_SIZE 4 + struct hns3_cmd_desc desc[HNS3_GET_DFX_REG_BD_NUM_SIZE]; + uint32_t index, desc_index; + uint32_t bd_num; + uint32_t i; + int ret; + + for (i = 0; i < HNS3_GET_DFX_REG_BD_NUM_SIZE - 1; i++) { + hns3_cmd_setup_basic_desc(&desc[i], HNS3_OPC_DFX_BD_NUM, true); + desc[i].flag |= rte_cpu_to_le_16(HNS3_CMD_FLAG_NEXT); + } + /* The last BD does not need a next flag */ + hns3_cmd_setup_basic_desc(&desc[i], HNS3_OPC_DFX_BD_NUM, true); + + ret = hns3_cmd_send(hw, desc, HNS3_GET_DFX_REG_BD_NUM_SIZE); + if (ret) { + hns3_err(hw, "fail to get dfx bd num, ret = %d.\n", ret); + return ret; + } + + /* The first data in the first BD is a reserved field */ + for (i = 1; i <= list_size; i++) { + desc_index = i / HNS3_CMD_DESC_DATA_NUM; + index = i % HNS3_CMD_DESC_DATA_NUM; + bd_num = rte_le_to_cpu_32(desc[desc_index].data[index]); + bd_num_list[i - 1] = bd_num; + } + + return 0; +} + +static int +hns3_get_dfx_reg_cnt(struct hns3_hw *hw, uint32_t *count) +{ + int opcode_num = RTE_DIM(hns3_dfx_reg_opcode_list); + uint32_t bd_num_list[opcode_num]; + int ret; + int i; + + ret = hns3_get_dfx_reg_bd_num(hw, bd_num_list, opcode_num); + if (ret) + return ret; + + for (i = 0; i < opcode_num; i++) + *count += bd_num_list[i] * HNS3_CMD_DESC_DATA_NUM; + + return 0; +} + +static int +hns3_get_firmware_reg_cnt(struct hns3_hw *hw, uint32_t *count) +{ + int ret; + + ret = hns3_get_32_64_regs_cnt(hw, count); + if (ret < 0) + return ret; + + return hns3_get_dfx_reg_cnt(hw, count); +} + static int hns3_get_regs_length(struct hns3_hw *hw, uint32_t *length) { struct hns3_adapter *hns = HNS3_DEV_HW_TO_ADAPTER(hw); - uint32_t regs_num_32_bit, regs_num_64_bit; - uint32_t dfx_reg_cnt; + uint32_t dfx_reg_cnt = 0; uint32_t common_cnt; uint32_t len; int ret; @@ -125,16 +206,7 @@ hns3_get_regs_length(struct hns3_hw *hw, uint32_t *length) len /= sizeof(uint32_t); if (!hns->is_vf) { - ret = hns3_get_regs_num(hw, ®s_num_32_bit, ®s_num_64_bit); - if (ret) { - hns3_err(hw, "fail to get the number of registers, " -"ret = %d.", ret); - return ret; - } - dfx_reg_cnt = regs_num_32_bit + - regs_num_64_bit * HNS3_64_BIT_REG_OUTPUT_SIZE; - - ret = hns3_get_dfx_reg_cnt(hw, &dfx_reg_cnt); + ret = hns3_get_firmware_reg_cnt(hw, &dfx_reg_cnt); if (ret) { hns3_err(hw, "fail to get the number of dfx registers, " "ret = %d.", ret); @@ -304,41 +376,6 @@ hns3_direct_access_regs(struct hns3_hw *hw, uint32_t *data) return data - origin_data_ptr; } -static int -hns3_get_dfx_reg_bd_num(struct hns3_hw *hw, uint32_t *bd_num_list, - uint32_t list_size) -{ -#define HNS3_GET_DFX_REG_BD_NUM_SIZE 4 - struct hns3_cmd_desc desc[HNS3_GET_DFX_REG_BD_NUM_SIZE]; - uint32_t index, desc_index; - uint32_t bd_num; - uint32_t i; - int ret; - - for (i = 0; i < HNS3_GET_DFX_REG_BD_NUM_SIZE - 1; i++) { - hns3_cmd_setup_basic_desc(&desc[i], HNS3_OPC_DFX_BD_NUM, true); - desc[i].flag |= rte_cpu_to_le_16(HNS3_CMD_FLAG_NEXT); - } - /* The last BD does not need a next flag */ - hns3_cmd_setup_basic_desc(&desc[i], HNS3_OPC_DFX_BD_NUM, true); - - ret = hns3_cmd_send(hw, desc, HNS3_GET_DFX_REG_BD
[PATCH v6 8/8] net/hns3: support filter registers by module names
This patch support dumping registers which name contains the `filter` string. The module names are in lower case and so is the `filter`. Available module names are cmdq, common_pf, common_vf, ring, tqp_intr, 32_bit_dfx, 64_bit_dfx, bios, igu_egu, ssu, ppp, rpu, ncsi, rtc, rcb, etc. Signed-off-by: Jie Hai --- drivers/net/hns3/hns3_regs.c | 309 --- 1 file changed, 180 insertions(+), 129 deletions(-) diff --git a/drivers/net/hns3/hns3_regs.c b/drivers/net/hns3/hns3_regs.c index 622d2e1c3d02..265d9b433653 100644 --- a/drivers/net/hns3/hns3_regs.c +++ b/drivers/net/hns3/hns3_regs.c @@ -12,8 +12,6 @@ #define HNS3_64_BIT_REG_OUTPUT_SIZE (sizeof(uint64_t) / sizeof(uint32_t)) -static int hns3_get_dfx_reg_cnt(struct hns3_hw *hw, uint32_t *count); - struct hns3_dirt_reg_entry { const char *name; uint32_t addr; @@ -795,33 +793,77 @@ enum hns3_reg_modules { HNS3_64_BIT_DFX, }; +#define HNS3_MODULE_MASK(x) RTE_BIT32(x) +#define HNS3_VF_MODULES (HNS3_MODULE_MASK(HNS3_CMDQ) | HNS3_MODULE_MASK(HNS3_COMMON_VF) | \ +HNS3_MODULE_MASK(HNS3_RING) | HNS3_MODULE_MASK(HNS3_TQP_INTR)) +#define HNS3_VF_ONLY_MODULES HNS3_MODULE_MASK(HNS3_COMMON_VF) + struct hns3_reg_list { const void *reg_list; uint32_t entry_num; + const char *module; }; static struct hns3_reg_list hns3_reg_lists[] = { - [HNS3_BIOS_COMMON] = { dfx_bios_common_reg_list, RTE_DIM(dfx_bios_common_reg_list)}, - [HNS3_SSU_0]= { dfx_ssu_reg_0_list, RTE_DIM(dfx_ssu_reg_0_list)}, - [HNS3_SSU_1]= { dfx_ssu_reg_1_list, RTE_DIM(dfx_ssu_reg_1_list)}, - [HNS3_IGU_EGU] = { dfx_igu_egu_reg_list, RTE_DIM(dfx_igu_egu_reg_list)}, - [HNS3_RPU_0]= { dfx_rpu_reg_0_list, RTE_DIM(dfx_rpu_reg_0_list)}, - [HNS3_RPU_1]= { dfx_rpu_reg_1_list, RTE_DIM(dfx_rpu_reg_1_list)}, - [HNS3_NCSI] = { dfx_ncsi_reg_list, RTE_DIM(dfx_ncsi_reg_list)}, - [HNS3_RTC] = { dfx_rtc_reg_list, RTE_DIM(dfx_rtc_reg_list)}, - [HNS3_PPP] = { dfx_ppp_reg_list, RTE_DIM(dfx_ppp_reg_list)}, - [HNS3_RCB] = { dfx_rcb_reg_list, RTE_DIM(dfx_rcb_reg_list)}, - [HNS3_TQP] = { dfx_tqp_reg_list, RTE_DIM(dfx_tqp_reg_list)}, - [HNS3_SSU_2]= { dfx_ssu_reg_2_list, RTE_DIM(dfx_ssu_reg_2_list)}, - - [HNS3_CMDQ] = { cmdq_reg_list, RTE_DIM(cmdq_reg_list)}, - [HNS3_COMMON_PF]= { common_reg_list, RTE_DIM(common_reg_list)}, - [HNS3_COMMON_VF]= { common_vf_reg_list, RTE_DIM(common_vf_reg_list)}, - [HNS3_RING] = { ring_reg_list, RTE_DIM(ring_reg_list)}, - [HNS3_TQP_INTR] = { tqp_intr_reg_list, RTE_DIM(tqp_intr_reg_list)}, - - [HNS3_32_BIT_DFX] = { regs_32_bit_list, RTE_DIM(regs_32_bit_list)}, - [HNS3_64_BIT_DFX] = { regs_64_bit_list, RTE_DIM(regs_64_bit_list)}, + [HNS3_BIOS_COMMON] = { + dfx_bios_common_reg_list, RTE_DIM(dfx_bios_common_reg_list), "bios" + }, + [HNS3_SSU_0] = { + dfx_ssu_reg_0_list, RTE_DIM(dfx_ssu_reg_0_list), "ssu" + }, + [HNS3_SSU_1] = { + dfx_ssu_reg_1_list, RTE_DIM(dfx_ssu_reg_1_list), "ssu" + }, + [HNS3_IGU_EGU] = { + dfx_igu_egu_reg_list, RTE_DIM(dfx_igu_egu_reg_list), "igu_egu" + }, + [HNS3_RPU_0] = { + dfx_rpu_reg_0_list, RTE_DIM(dfx_rpu_reg_0_list), "rpu" + }, + [HNS3_RPU_1] = { + dfx_rpu_reg_1_list, RTE_DIM(dfx_rpu_reg_1_list), "rpu" + }, + [HNS3_NCSI] = { + dfx_ncsi_reg_list, RTE_DIM(dfx_ncsi_reg_list), "ncsi" + }, + [HNS3_RTC] = { + dfx_rtc_reg_list, RTE_DIM(dfx_rtc_reg_list), "rtc" + }, + [HNS3_PPP] = { + dfx_ppp_reg_list, RTE_DIM(dfx_ppp_reg_list), "ppp" + }, + [HNS3_RCB] = { + dfx_rcb_reg_list, RTE_DIM(dfx_rcb_reg_list), "rcb" + }, + [HNS3_TQP] = { + dfx_tqp_reg_list, RTE_DIM(dfx_tqp_reg_list), "tqp" + }, + [HNS3_SSU_2] = { + dfx_ssu_reg_2_list, RTE_DIM(dfx_ssu_reg_2_list), "ssu" + }, + + [HNS3_CMDQ] = { + cmdq_reg_list, RTE_DIM(cmdq_reg_list), "cmdq" + }, + [HNS3_COMMON_PF] = { + common_reg_list,RTE_DIM(common_reg_list),
[PATCH v6 7/8] net/hns3: support report names of registers
This patch adds names for register lists, and support report names of registers. Some registers has different names on different platform, use names of HIP08 as default names. Signed-off-by: Jie Hai --- drivers/net/hns3/hns3_regs.c | 1090 +- 1 file changed, 957 insertions(+), 133 deletions(-) diff --git a/drivers/net/hns3/hns3_regs.c b/drivers/net/hns3/hns3_regs.c index 89858c2b1c09..622d2e1c3d02 100644 --- a/drivers/net/hns3/hns3_regs.c +++ b/drivers/net/hns3/hns3_regs.c @@ -14,73 +14,829 @@ static int hns3_get_dfx_reg_cnt(struct hns3_hw *hw, uint32_t *count); -static const uint32_t cmdq_reg_addrs[] = {HNS3_CMDQ_TX_DEPTH_REG, - HNS3_CMDQ_TX_TAIL_REG, - HNS3_CMDQ_TX_HEAD_REG, - HNS3_CMDQ_RX_DEPTH_REG, - HNS3_CMDQ_RX_TAIL_REG, - HNS3_CMDQ_RX_HEAD_REG, - HNS3_VECTOR0_CMDQ_SRC_REG, - HNS3_CMDQ_INTR_STS_REG, - HNS3_CMDQ_INTR_EN_REG, - HNS3_CMDQ_INTR_GEN_REG}; - -static const uint32_t common_reg_addrs[] = {HNS3_MISC_VECTOR_REG_BASE, - HNS3_VECTOR0_OTER_EN_REG, - HNS3_MISC_RESET_STS_REG, - HNS3_VECTOR0_OTHER_INT_STS_REG, - HNS3_GLOBAL_RESET_REG, - HNS3_FUN_RST_ING, - HNS3_GRO_EN_REG}; - -static const uint32_t common_vf_reg_addrs[] = {HNS3_MISC_VECTOR_REG_BASE, - HNS3_FUN_RST_ING, - HNS3_GRO_EN_REG}; - -static const uint32_t ring_reg_addrs[] = {HNS3_RING_RX_BD_NUM_REG, - HNS3_RING_RX_BD_LEN_REG, - HNS3_RING_RX_EN_REG, - HNS3_RING_RX_MERGE_EN_REG, - HNS3_RING_RX_TAIL_REG, - HNS3_RING_RX_HEAD_REG, - HNS3_RING_RX_FBDNUM_REG, - HNS3_RING_RX_OFFSET_REG, - HNS3_RING_RX_FBD_OFFSET_REG, - HNS3_RING_RX_STASH_REG, - HNS3_RING_RX_BD_ERR_REG, - HNS3_RING_TX_BD_NUM_REG, - HNS3_RING_TX_EN_REG, - HNS3_RING_TX_PRIORITY_REG, - HNS3_RING_TX_TC_REG, - HNS3_RING_TX_MERGE_EN_REG, - HNS3_RING_TX_TAIL_REG, - HNS3_RING_TX_HEAD_REG, - HNS3_RING_TX_FBDNUM_REG, - HNS3_RING_TX_OFFSET_REG, - HNS3_RING_TX_EBD_NUM_REG, - HNS3_RING_TX_EBD_OFFSET_REG, - HNS3_RING_TX_BD_ERR_REG, - HNS3_RING_EN_REG}; - -static const uint32_t tqp_intr_reg_addrs[] = {HNS3_TQP_INTR_CTRL_REG, - HNS3_TQP_INTR_GL0_REG, - HNS3_TQP_INTR_GL1_REG, - HNS3_TQP_INTR_GL2_REG, - HNS3_TQP_INTR_RL_REG}; +struct hns3_dirt_reg_entry { + const char *name; + uint32_t addr; +}; + +static const struct hns3_dirt_reg_entry cmdq_reg_list[] = { + {"cmdq_tx_depth", HNS3_CMDQ_TX_DEPTH_REG}, + {"cmdq_tx_tail",HNS3_CMDQ_TX_TAIL_REG}, + {"cmdq_tx_head",HNS3_CMDQ_TX_HEAD_REG}, + {"cmdq_rx_depth", HNS3_CMDQ_RX_DEPTH_REG}, + {"cmdq_rx_tail",HNS3_CMDQ_RX_TAIL_REG}, + {"cmdq_rx_head",HNS3_CMDQ_RX_HEAD_REG}, + {"vector0_cmdq_src",HNS3_VECTOR0_CMDQ_SRC_REG}, + {"cmdq_intr_sts", HNS3_CMDQ_INTR_STS_REG}, + {"cmdq_intr_en",HNS3_CMDQ_INTR_EN_REG}, + {"cmdq_intr_gen", HNS3_CMDQ_INTR_GEN_REG}, +}; + +static const struct hns3_dirt_reg_entry common_reg_list[] = { + {"misc_vector_reg_base",HNS3_MISC_VECTOR_REG_BASE}, + {"vector0_oter_en", HNS3_VECTOR0_OTER_EN_REG}, + {"misc_reset_sts", HNS3_MISC_RESET_STS_REG}, + {"vector0_other_int_st
RE: [V1] app/testpmd: restore VXLAN-GPE support
> -Original Message- > From: Ferruh Yigit > Sent: Saturday, July 20, 2024 4:25 AM > To: Minggang(Gavin) Li ; Matan Azrad ; > Slava Ovsiienko ; Ori Kam ; NBU- > Contact-Thomas Monjalon (EXTERNAL) ; Aman Singh > > Cc: dev@dpdk.org; Raslan Darawsheh > Subject: Re: [V1] app/testpmd: restore VXLAN-GPE support > > On 7/17/2024 8:11 AM, Gavin Li wrote: > > VXLAN-GPE support was removed from testpmd recently. Drivers which are > > not migrated are still using VXLAN-GPE in tests. > > > > This commit is to restore the support for VXLAN-GPE in testpmd. > > > > Fixes: da118115d95c ("app/testpmd: support matching any VXLAN field") > > Signed-off-by: Gavin Li > > > > Hi Gavin, > > The original patch was from you, right? What went wrong? The remove of VXLAN-GPE from testpmd is too aggressive since there are drivers which are not migrated are still using VXLAN-GPE. It's better to keep it till the day to remove the RTE item of VXLAN-GPE from DPDK.
[PATCH v3 1/3] net/ice: fix possible memory leak
This patch fixes possible memory leak inside the ice_hash_parse_raw_pattern() due to the lack of a call to rte_free() for previously allocated pkt_buf and msk_buf. Fixes: 1b9c68120a1c ("net/ice: enable protocol agnostic flow offloading in RSS") Cc: sta...@dpdk.org Reported-by: Michael Theodore Stolarchuk Signed-off-by: Vladimir Medvedkin --- drivers/net/ice/ice_hash.c | 21 + 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/drivers/net/ice/ice_hash.c b/drivers/net/ice/ice_hash.c index f923641533..cdce1d0ea2 100644 --- a/drivers/net/ice/ice_hash.c +++ b/drivers/net/ice/ice_hash.c @@ -650,7 +650,7 @@ ice_hash_parse_raw_pattern(struct ice_adapter *ad, uint8_t *pkt_buf, *msk_buf; uint8_t tmp_val = 0; uint8_t tmp_c = 0; - int i, j; + int i, j, ret = 0; if (ad->psr == NULL) return -rte_errno; @@ -670,8 +670,10 @@ ice_hash_parse_raw_pattern(struct ice_adapter *ad, return -ENOMEM; msk_buf = rte_zmalloc(NULL, pkt_len, 0); - if (!msk_buf) + if (!msk_buf) { + rte_free(pkt_buf); return -ENOMEM; + } /* convert string to int array */ for (i = 0, j = 0; i < spec_len; i += 2, j++) { @@ -708,18 +710,21 @@ ice_hash_parse_raw_pattern(struct ice_adapter *ad, msk_buf[j] = tmp_val * 16 + tmp_c - '0'; } - if (ice_parser_run(ad->psr, pkt_buf, pkt_len, &rslt)) - return -rte_errno; + ret = ice_parser_run(ad->psr, pkt_buf, pkt_len, &rslt); + if (ret) + goto free_mem; - if (ice_parser_profile_init(&rslt, pkt_buf, msk_buf, - pkt_len, ICE_BLK_RSS, true, &prof)) - return -rte_errno; + ret = ice_parser_profile_init(&rslt, pkt_buf, msk_buf, + pkt_len, ICE_BLK_RSS, true, &prof); + goto free_mem; rte_memcpy(&meta->raw.prof, &prof, sizeof(prof)); +free_mem: rte_free(pkt_buf); rte_free(msk_buf); - return 0; + + return ret; } static void -- 2.34.1
[PATCH v3 2/3] net/ice: refactor raw pattern parsing function
Replace strlen with more secure strnlen in ice_hash_parse_raw_pattern. Signed-off-by: Vladimir Medvedkin --- drivers/net/ice/ice_hash.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ice/ice_hash.c b/drivers/net/ice/ice_hash.c index cdce1d0ea2..d63e673b25 100644 --- a/drivers/net/ice/ice_hash.c +++ b/drivers/net/ice/ice_hash.c @@ -658,9 +658,9 @@ ice_hash_parse_raw_pattern(struct ice_adapter *ad, raw_spec = item->spec; raw_mask = item->mask; - spec_len = strlen((char *)(uintptr_t)raw_spec->pattern); - if (strlen((char *)(uintptr_t)raw_mask->pattern) != - spec_len) + spec_len = strnlen((char *)(uintptr_t)raw_spec->pattern, raw_spec->length); + if (strnlen((char *)(uintptr_t)raw_mask->pattern, raw_spec->length) != + spec_len) return -rte_errno; pkt_len = spec_len / 2; -- 2.34.1
[PATCH v3 3/3] net/ice: fix return value for raw pattern parsing function
If the parser was not initialized when calling ice_hash_parse_raw_pattern() -rte_errno was returned. Replace returning rte_errno with ENOTSUP since rte_errno is meaningless in the context of ice_hash_parse_raw_pattern(). Fixes: 1b9c68120a1c ("net/ice: enable protocol agnostic flow offloading in RSS") Cc: sta...@dpdk.org Signed-off-by: Vladimir Medvedkin --- drivers/net/ice/ice_hash.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ice/ice_hash.c b/drivers/net/ice/ice_hash.c index d63e673b25..b040a198bb 100644 --- a/drivers/net/ice/ice_hash.c +++ b/drivers/net/ice/ice_hash.c @@ -653,7 +653,7 @@ ice_hash_parse_raw_pattern(struct ice_adapter *ad, int i, j, ret = 0; if (ad->psr == NULL) - return -rte_errno; + return -ENOTSUP; raw_spec = item->spec; raw_mask = item->mask; -- 2.34.1
RE: IPv6 APIs rework
> Vladimir Medvedkin, Jul 18, 2024 at 23:25: > > I think alignment should be 1 since in FIB6 users usually don't copy IPv6 > > address and just provide a pointer to the memory inside the packet. Current > > vector implementation loads IPv6 addresses using unaligned access ( > > _mm512_loadu_si512) so it doesn't rely on alignment. > > Yes, my intention was exactly that, being able to map that structure > directly in packets without copying them on the stack. > > > > 2. In the IPv6 packet header, the IPv6 addresses are not 16 byte aligned, > > > they are 8 byte aligned. So we cannot make the IPv6 address type 16 byte > > > aligned. > > > Not necessary, if Ethernet frame in mbuf starts on 8b aligned address, then > > IPv6 is aligned only by 2 bytes. > > We probably could safely say that aligning on 2 bytes would be OK. But > is there any benefit, performance wise, in doing so? Keeping the same > alignment as before the change would at least make it ABI compatible. I am also not sure that this extra alignment (2B or 4B) here will give us any benefit, while it most likely will introduce extra restrictions. AFAIK, right now we do have ipv6 as array of plain chars, and there were no much complaints about it. So I am for keeping it 1B aligned. Overall proposal looks reasonable to me... might be 24.11 is a good opportunity for such change. Konstantin
Re: FDIR packet distribution with specific multiple RX queues.
Hi Stephen, Thanks for your response. As our application has limitations while using RSS, I would prefer a similar approach to RTE_ACTION_TYPE_QUEUE. Since flow director supports only one RXQ index, I could not be able to achieve desired outcome. Please suggest if any approach like RTE_ACTION_TYPE_QUEUE but not RSS could match my requirement. Thanks, Raghavan V From: Stephen Hemminger Sent: Thursday, July 18, 2024 9:06:14 pm To: Raghavan V Cc: dev@dpdk.org ; us...@dpdk.org Subject: Re: FDIR packet distribution with specific multiple RX queues. CAUTION: This email originated from outside of the organization. Do not click links or open attachments unless you recognize the sender and know the content is safe. On Thu, 18 Jul 2024 11:36:43 + Raghavan V wrote: > Is there any way to distribute packets evenly (Like RSS) to specific multiple > RX queues in RTE_FLOW_ACTION_TYPE_QUEUE DPDK Flow director? > > Desired action: > > uint16_t queue_indices[] = {10, 11, 12, 13, 14, 15}; > struct rte_flow_action_queue queue = {.index = queue_indices}; > struct rte_flow_action action[]={ > [0]={.type = RTE_FLOW_ACTION_TYPE_QUEUE,.conf = &queue}, > [1]={.type = RTE_FLOW_ACTION_TYPE_END} > }; You want RTE_FLOW_ACTION_TYPE_RSS uint16_t queue_indices[] = {10, 11, 12, 13, 14, 15}; struct rte_flow_action_rss rss = { .types = RTE_ETH_RSS_UDP | RTE_ETH_RSS_TCP, .queue_num = RTE_DIM(queue_indicies), .queue = queue_indicies, }; struct rte_flow_action action[]={ [0]={.type = RTE_FLOW_ACTION_TYPE_RSS,.conf = &rss}, [1]={.type = RTE_FLOW_ACTION_TYPE_END} }; > Is this action limited to drivers specific? Yes, drivers implement only what hardware can support.
RE: IPv6 APIs rework
> From: Robin Jarry [mailto:rja...@redhat.com] > Sent: Sunday, 21 July 2024 23.51 > > Hi Morten, Stephen, > > Morten Brørup, Jul 21, 2024 at 18:12: > > If the IPv6 address type you tested with was a struct containing > > a union of different types (other than an array of 16 bytes), then > > those sub-types made your IPv6 address type non-byte aligned, and > > caused padding when used in other structures. > > > > Please try again with the simple array type: > > struct rte_ipv6_addr { unsigned char addr_bytes[16]; }; > > > > This should not cause any padding when used in other structures, > > except if used with alignas(). > > Indeed removing the sub-types in the union removes the need for strict > alignment and packing. > > Too bad, I found these intermediate integers made the code a bit nicer > but I can understand that it brings a lot of trouble down the line. Maybe some magical macros (or inline functions) can be used for pretty casting to larger integer types, using alignof() and/or the GCC assume_aligned attribute. Such macros/functions can be added in later patches. Perhaps they might even be generic, so they could be used on other byte array types too. > > NB: I tried uint8_t vs unsigned char, it makes no difference with > implicit casting to (uint16_t *) or (uint32_t *). Explicit casting is > required anyway. Unfortunately, I still cannot recall why unsigned char is better for type casting than uint8_t, so I cannot support my statement with a trustworthy source of reference. > > > If you are introducing an official IPv6 address type into DPDK, its > > scope it not just the FIB6 API. > > > > Both Stephen and I can see that - in a broader perspective - the > > packed and unaligned constraints are unacceptable for performance. > > > > It might not be a problem for the current FIB6 implementation, but it > > *will* be a problem in many other places, if converted to using the > > new IPv6 address type. > > > > PS: > > I do consider adding a dedicated IPv6 address type to DPDK an > > improvement over the current convention of using an uint8_t[16] array. > > But we need to agree on the type, which must work optimally for > > a broad spectrum of use cases. Otherwise, the new type is not an > > improvement, but a deterioration of DPDK. > > OK, I understand the stakes. I will comply and propose a simple struct > without any packing nor explicit alignment. > > struct rte_ipv6_addr { > union { > unsigned char a[RTE_IPV6_ADDR_SIZE]; > }; > }; > > I have left the door open in order to ease adding sub-types in the > future. Indeed, lpm6/fib6 tests rely on literal definitions of IPv6 > addresses and union types need an extra set of curly braces for literal > definitions. If you think we will never need to add sub-types, I can get > rid of this. It makes no difference at runtime. I think it is safe to start without the union. If the anonymous union only has one member, it makes no difference if the union is there or not. So, if we add other sub-types in the future, the union can be added at that time. NB: I used "addr_bytes" as the name of the array in the structure, as in the rte_ether_addr structure [1]; but I support using "a" instead, it is shorter and it seems obvious that it is the same. [1]: https://elixir.bootlin.com/dpdk/v24.07-rc2/source/lib/net/rte_ether.h#L74 Perhaps we could add an anonymous union to rte_ether_addr, to shorten its access name similarly: struct __rte_aligned(2) rte_ether_addr { + __extension__ + union { uint8_t addr_bytes[RTE_ETHER_ADDR_LEN]; /**< Addr bytes in tx order */ + unsigned char a[RTE_ETHER_ADDR_LEN]; /**< Same, but shorter name */ + } }; This is not related to your patch in any way. Just thinking out loud. > > About the timing: when should I send a patch to announce IPv6 API > breakage for 24.11? ASAP, I guess. I suggest you describe it as an introduction of an IPv6 address type, and list the APIs that will be updated to use this new type. The intention of introducing the new IPv6 address type with a broader scope than just the FIB6 APIs is to inspire others to use the new IPv6 address type too. > > Thanks for taking the time. > Cheers. Thank you for listening.
Re: [V1] app/testpmd: restore VXLAN-GPE support
On 7/22/2024 8:10 AM, Minggang(Gavin) Li wrote: >> -Original Message- >> From: Ferruh Yigit >> Sent: Saturday, July 20, 2024 4:25 AM >> To: Minggang(Gavin) Li ; Matan Azrad ; >> Slava Ovsiienko ; Ori Kam ; NBU- >> Contact-Thomas Monjalon (EXTERNAL) ; Aman Singh >> >> Cc: dev@dpdk.org; Raslan Darawsheh >> Subject: Re: [V1] app/testpmd: restore VXLAN-GPE support >> >> On 7/17/2024 8:11 AM, Gavin Li wrote: >>> VXLAN-GPE support was removed from testpmd recently. Drivers which are >>> not migrated are still using VXLAN-GPE in tests. >>> >>> This commit is to restore the support for VXLAN-GPE in testpmd. >>> >>> Fixes: da118115d95c ("app/testpmd: support matching any VXLAN field") >>> Signed-off-by: Gavin Li >>> >> >> Hi Gavin, >> >> The original patch was from you, right? What went wrong? > The remove of VXLAN-GPE from testpmd is too aggressive since there are > drivers which are not migrated are still using VXLAN-GPE. It's better to > keep it till the day to remove the RTE item of VXLAN-GPE from DPDK. > Sorry, I was not clear enough maybe, I was asking more details on the problem? With a net/vxlan commit [1] in this release, (this commit is part of -rc1), VXLAN & VXLAN-GPE headers combined and VXLAN-GBP header added to this combined struct. VXLAN-GPE header is marked as deprecated. Testpmd is also updated to use new headers, that is the commit in the fixes tag of this patch. But drivers using old, now depreciated, VXLAN structs won't able to use testpmd to test, so I agree, may be I merged the patch too early. As this patch was part of -rc1, I wonder why we didn't get any complaint about not able to test VXLAN-GPE? Btw, if we revert this patch, is there a way to test VXLAN-GBP? Because it only exists as part of new combined VXLAN struct? Instead of reverting the commit all together, is there way to keep old capability in testpmd, but add feature to test VXLAN-GBP? And another issue is, there can still some users of the VXLAN-GPE header in the net library, perhaps that also deprecated immaturely. Can you please send a deprecation note for combining VXLAN headers and removing VXLAN-GPE in v24.11? Please CC all drivers implementing this flow pattern. This can be way to highlight the issue to driver maintainers and communicate the change with end users. [1] 77cb7b18ad9b ("net: extend VXLAN header to support more extensions")
[PATCH v2] ethdev: fix device init without socket-local memory
When allocating memory for an ethdev, the rte_malloc_socket call used only allocates memory on the NUMA node/socket local to the device. This means that even if the user wanted to, they could never use a remote NIC without also having memory on that NIC's socket. For example, if we change examples/skeleton/basicfwd.c to have SOCKET_ID_ANY as the socket_id parameter for Rx and Tx rings, we should be able to run the app cross-numa e.g. as below, where the two PCI devices are on socket 1, and core 1 is on socket 0: ./build/examples/dpdk-skeleton -l 1 --legacy-mem --socket-mem=1024,0 \ -a a8:00.0 -a b8:00.0 This fails however, with the error: ETHDEV: failed to allocate private data PCI_BUS: Requested device :a8:00.0 cannot be used We can remove this restriction by doing a fallback call to general rte_malloc after a call to rte_malloc_socket fails. This should be safe to do because the later ethdev calls to setup Rx/Tx queues all take a socket_id parameter, which can be used by applications to enforce the requirement for local-only memory for a device, if so desired. [If device-local memory is present it will be used as before, while if not present the rte_eth_dev_configure call will now pass, but the subsequent queue setup calls requesting local memory will fail]. Fixes: e489007a411c ("ethdev: add generic create/destroy ethdev APIs") Fixes: dcd5c8112bc3 ("ethdev: add PCI driver helpers") Cc: sta...@dpdk.org Signed-off-by: Bruce Richardson Signed-off-by: Padraig Connolly --- V2: * Add warning printout in the case where we don't get device-local memory, but we do get memory on another socket. --- lib/ethdev/ethdev_driver.c | 20 +++- lib/ethdev/ethdev_pci.h| 20 +--- 2 files changed, 32 insertions(+), 8 deletions(-) diff --git a/lib/ethdev/ethdev_driver.c b/lib/ethdev/ethdev_driver.c index f48c0eb8bc..c335a25a82 100644 --- a/lib/ethdev/ethdev_driver.c +++ b/lib/ethdev/ethdev_driver.c @@ -303,15 +303,25 @@ rte_eth_dev_create(struct rte_device *device, const char *name, return -ENODEV; if (priv_data_size) { + /* try alloc private data on device-local node. */ ethdev->data->dev_private = rte_zmalloc_socket( name, priv_data_size, RTE_CACHE_LINE_SIZE, device->numa_node); - if (!ethdev->data->dev_private) { - RTE_ETHDEV_LOG_LINE(ERR, - "failed to allocate private data"); - retval = -ENOMEM; - goto probe_failed; + /* fall back to alloc on any socket on failure */ + if (ethdev->data->dev_private == NULL) { + ethdev->data->dev_private = rte_zmalloc(name, + priv_data_size, RTE_CACHE_LINE_SIZE); + + if (ethdev->data->dev_private == NULL) { + RTE_ETHDEV_LOG_LINE(ERR, "failed to allocate private data"); + retval = -ENOMEM; + goto probe_failed; + } + /* got memory, but not local, so issue warning */ + RTE_ETHDEV_LOG_LINE(WARNING, + "Private data for ethdev '%s' not allocated on local NUMA node %d", + device->name, device->numa_node); } } } else { diff --git a/lib/ethdev/ethdev_pci.h b/lib/ethdev/ethdev_pci.h index 737fff1833..ec4f731270 100644 --- a/lib/ethdev/ethdev_pci.h +++ b/lib/ethdev/ethdev_pci.h @@ -93,12 +93,26 @@ rte_eth_dev_pci_allocate(struct rte_pci_device *dev, size_t private_data_size) return NULL; if (private_data_size) { + /* Try and alloc the private-data structure on socket local to the device */ eth_dev->data->dev_private = rte_zmalloc_socket(name, private_data_size, RTE_CACHE_LINE_SIZE, dev->device.numa_node); - if (!eth_dev->data->dev_private) { - rte_eth_dev_release_port(eth_dev); - return NULL; + + /* if cannot allocate memory on the socket local to the device +* use rte_malloc to allocate memory on some other socket, if available. +*/ + if (eth_dev->data->dev_private == NULL) { + eth_dev->data->dev_private = rte_zmalloc(name, + private_data_s
Re: [PATCH] ethdev: fix device init without socket-local memory
On Sun, Jul 21, 2024 at 11:56:08PM +0100, Ferruh Yigit wrote: > On 7/19/2024 5:10 PM, Bruce Richardson wrote: > > On Fri, Jul 19, 2024 at 04:31:11PM +0100, Ferruh Yigit wrote: > >> On 7/19/2024 2:22 PM, Bruce Richardson wrote: > >>> On Fri, Jul 19, 2024 at 12:10:24PM +0100, Ferruh Yigit wrote: > One option can be adding a warning log to the fallback case, saying that > memory allocated from non-local socket and performance will be less. > Although this message may not mean much to a new user, it may still help > via a support engineer or internet search... > > >>> > >>> Yes, we could add a warning, but that is probably better in the app > >>> itself. > >>> Thinking about where we get issues, they primarily stem from running the > >>> cores on a different numa node Since the private_data struct is accessed > >>> by cores not devices, any perf degradation will come from having it remote > >>> to the cores. Because of that, I actually think the original > >>> implementation > >>> should really have used "rte_socket_id()" rather than the device socket id > >>> for the allocation. > >>> > >> > >> Yes I guess private_data is not accessed by device, but it may be > >> accessed by cores that is running the datapath. > >> > >> This API may be called by core that is not involved to the datapath, so > >> it may not correct to allocate memory from its numa. > >> > >> Will it be wrong to assume that cores used for datapath will be same > >> numa with device, if so allocating memory from that numa (which device > >> is in) makes more sense. Am I missing something? > >> > > > > It depends on which you think is more likely for the polling cores: > > - they will be on the same socket as the device, but different socket to > > the main lcore. > > - they will be on the same socket as the main lcore, but different socket > > to the device. > > > > Personally, I'd suspect both to be unlikely, but also both to be possible. > > For the first scenario, I don't see anything being broken or affected by > > the proposed fix here, since priority is still being given to memory on the > > same socket as the device. It just opens up the possibility of scenario > > two. > > > > My comment was on suggestion to use "rte_socket_id()" rather than the > device socket id, > if both nodes have memory, memory should be allocated from the one where > device is in, because although device doesn't use private_data, polling > cores will and polling cores will be most likely in the same node with > device and memory, but we don't know main core is in. > So I think first try for memory allocation should be node where device > is in, which is the existing code. > > If node that has device doesn't have any memory attached, your change > enables this case, as already there is memory only in one node, it > doesn't matter if we check device node or main core node anyway. > > > Briefly, I am OK to current patch with a warning log in fallback, but > not to "rte_socket_id()" change. > Ack, makes sense. Done in V2 patch. Thanks for review. /Bruce
Re: [PATCH v3 1/3] net/ice: fix possible memory leak
On Mon, Jul 22, 2024 at 08:28:34AM +, Vladimir Medvedkin wrote: > This patch fixes possible memory leak inside the > ice_hash_parse_raw_pattern() due to the lack of a call to rte_free() > for previously allocated pkt_buf and msk_buf. > > Fixes: 1b9c68120a1c ("net/ice: enable protocol agnostic flow offloading in > RSS") > Cc: sta...@dpdk.org > > Reported-by: Michael Theodore Stolarchuk > Signed-off-by: Vladimir Medvedkin > --- > drivers/net/ice/ice_hash.c | 21 + > 1 file changed, 13 insertions(+), 8 deletions(-) > > - if (ice_parser_run(ad->psr, pkt_buf, pkt_len, &rslt)) > - return -rte_errno; > + ret = ice_parser_run(ad->psr, pkt_buf, pkt_len, &rslt); > + if (ret) > + goto free_mem; > > - if (ice_parser_profile_init(&rslt, pkt_buf, msk_buf, > - pkt_len, ICE_BLK_RSS, true, &prof)) > - return -rte_errno; > + ret = ice_parser_profile_init(&rslt, pkt_buf, msk_buf, > + pkt_len, ICE_BLK_RSS, true, &prof); > + goto free_mem; Are we not still missing an "if (ret != 0)" here? If so, I can just add on apply. /Bruce
Re: [PATCH v1 3/4] test/power: removed function pointer validations
On 20/07/2024 17:50, Sivaprasad Tummala wrote: After refactoring the power library, power management operations are now consistently supported regardless of the operating environment, making function pointer checks unnecessary and thus removed from applications. Signed-off-by: Sivaprasad Tummala --- app/test/test_power.c | 95 --- app/test/test_power_cpufreq.c | 52 --- app/test/test_power_kvm_vm.c | 36 - 3 files changed, 183 deletions(-) Hi Sivaprasad, Nice work on the patch-set. There's just four function pointer checks remaining that my compiler is complaining about. They are in examples/l3fwd-power/main.c (lines 443, 452, 1350, 1353). It would be nice to have these removed as well, seeing as the functions are now inlines and don't need these checks. I'm running the patch set through some tests here, will keep you posted on progress. Rgds, Dave. ---snip---
[PATCH] app/testpmd: fix build on signed comparison
Build error: .../app/test-pmd/config.c: In function 'icmp_echo_config_setup': .../app/test-pmd/config.c:5159:30: error: comparison between signed and unsigned integer expressions [-Werror=sign-compare] if ((nb_txq * nb_fwd_ports) < nb_fwd_lcores) ^ All 'nb_txq', 'nb_fwd_ports' & 'nb_fwd_lcores' are unsigned variables, but the warning is related to the integer promotion rules of C: 'nb_txq' -> uint16_t, promoted to 'int' 'nb_fwd_ports' -> uint16_t, promoted to 'int' (nb_txq * nb_fwd_ports) -> result 'int' nb_fwd_lcores -> 'uint32_t' Ends up comparing 'int' vs 'uint32_t'. Fixing by adding the casting back which was initially part of the patch. Fixes: 2bf44dd14fa5 ("app/testpmd: fix lcore ID restriction") Cc: sta...@dpdk.org Reported-by: Raslan Darawsheh Signed-off-by: Ferruh Yigit --- Cc: sivaprasad.tumm...@amd.com --- app/test-pmd/config.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/test-pmd/config.c b/app/test-pmd/config.c index 66c3a68c1dc6..6f0beafa271c 100644 --- a/app/test-pmd/config.c +++ b/app/test-pmd/config.c @@ -5156,7 +5156,7 @@ icmp_echo_config_setup(void) lcoreid_t lc_id; uint16_t sm_id; - if ((nb_txq * nb_fwd_ports) < nb_fwd_lcores) + if ((lcoreid_t)(nb_txq * nb_fwd_ports) < nb_fwd_lcores) cur_fwd_config.nb_fwd_lcores = (lcoreid_t) (nb_txq * nb_fwd_ports); else -- 2.43.0
[PATCH v4 1/3] net/ice: fix possible memory leak
This patch fixes possible memory leak inside the ice_hash_parse_raw_pattern() due to the lack of a call to rte_free() for previously allocated pkt_buf and msk_buf. Fixes: 1b9c68120a1c ("net/ice: enable protocol agnostic flow offloading in RSS") Cc: sta...@dpdk.org Reported-by: Michael Theodore Stolarchuk Signed-off-by: Vladimir Medvedkin --- drivers/net/ice/ice_hash.c | 22 ++ 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/drivers/net/ice/ice_hash.c b/drivers/net/ice/ice_hash.c index f923641533..6b3095e2c5 100644 --- a/drivers/net/ice/ice_hash.c +++ b/drivers/net/ice/ice_hash.c @@ -650,7 +650,7 @@ ice_hash_parse_raw_pattern(struct ice_adapter *ad, uint8_t *pkt_buf, *msk_buf; uint8_t tmp_val = 0; uint8_t tmp_c = 0; - int i, j; + int i, j, ret = 0; if (ad->psr == NULL) return -rte_errno; @@ -670,8 +670,10 @@ ice_hash_parse_raw_pattern(struct ice_adapter *ad, return -ENOMEM; msk_buf = rte_zmalloc(NULL, pkt_len, 0); - if (!msk_buf) + if (!msk_buf) { + rte_free(pkt_buf); return -ENOMEM; + } /* convert string to int array */ for (i = 0, j = 0; i < spec_len; i += 2, j++) { @@ -708,18 +710,22 @@ ice_hash_parse_raw_pattern(struct ice_adapter *ad, msk_buf[j] = tmp_val * 16 + tmp_c - '0'; } - if (ice_parser_run(ad->psr, pkt_buf, pkt_len, &rslt)) - return -rte_errno; + ret = ice_parser_run(ad->psr, pkt_buf, pkt_len, &rslt); + if (ret) + goto free_mem; - if (ice_parser_profile_init(&rslt, pkt_buf, msk_buf, - pkt_len, ICE_BLK_RSS, true, &prof)) - return -rte_errno; + ret = ice_parser_profile_init(&rslt, pkt_buf, msk_buf, + pkt_len, ICE_BLK_RSS, true, &prof); + if (ret) + goto free_mem; rte_memcpy(&meta->raw.prof, &prof, sizeof(prof)); +free_mem: rte_free(pkt_buf); rte_free(msk_buf); - return 0; + + return ret; } static void -- 2.34.1
[PATCH v4 2/3] net/ice: refactor raw pattern parsing function
Replace strlen with more secure strnlen in ice_hash_parse_raw_pattern. Signed-off-by: Vladimir Medvedkin --- drivers/net/ice/ice_hash.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ice/ice_hash.c b/drivers/net/ice/ice_hash.c index 6b3095e2c5..506ea261e8 100644 --- a/drivers/net/ice/ice_hash.c +++ b/drivers/net/ice/ice_hash.c @@ -658,9 +658,9 @@ ice_hash_parse_raw_pattern(struct ice_adapter *ad, raw_spec = item->spec; raw_mask = item->mask; - spec_len = strlen((char *)(uintptr_t)raw_spec->pattern); - if (strlen((char *)(uintptr_t)raw_mask->pattern) != - spec_len) + spec_len = strnlen((char *)(uintptr_t)raw_spec->pattern, raw_spec->length); + if (strnlen((char *)(uintptr_t)raw_mask->pattern, raw_spec->length) != + spec_len) return -rte_errno; pkt_len = spec_len / 2; -- 2.34.1
[PATCH v4 3/3] net/ice: fix return value for raw pattern parsing function
If the parser was not initialized when calling ice_hash_parse_raw_pattern() -rte_errno was returned. Replace returning rte_errno with ENOTSUP since rte_errno is meaningless in the context of ice_hash_parse_raw_pattern(). Fixes: 1b9c68120a1c ("net/ice: enable protocol agnostic flow offloading in RSS") Cc: sta...@dpdk.org Signed-off-by: Vladimir Medvedkin --- drivers/net/ice/ice_hash.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ice/ice_hash.c b/drivers/net/ice/ice_hash.c index 506ea261e8..1188962752 100644 --- a/drivers/net/ice/ice_hash.c +++ b/drivers/net/ice/ice_hash.c @@ -653,7 +653,7 @@ ice_hash_parse_raw_pattern(struct ice_adapter *ad, int i, j, ret = 0; if (ad->psr == NULL) - return -rte_errno; + return -ENOTSUP; raw_spec = item->spec; raw_mask = item->mask; -- 2.34.1
RE: [PATCH v3 1/3] net/ice: fix possible memory leak
-Original Message- From: Richardson, Bruce Sent: Monday, July 22, 2024 11:42 AM To: Medvedkin, Vladimir Cc: dev@dpdk.org; sta...@dpdk.org; Stolarchuk, Michael Subject: Re: [PATCH v3 1/3] net/ice: fix possible memory leak On Mon, Jul 22, 2024 at 08:28:34AM +, Vladimir Medvedkin wrote: > This patch fixes possible memory leak inside the > ice_hash_parse_raw_pattern() due to the lack of a call to rte_free() > for previously allocated pkt_buf and msk_buf. > > Fixes: 1b9c68120a1c ("net/ice: enable protocol agnostic flow > offloading in RSS") > Cc: sta...@dpdk.org > > Reported-by: Michael Theodore Stolarchuk > Signed-off-by: Vladimir Medvedkin > --- > drivers/net/ice/ice_hash.c | 21 + > 1 file changed, 13 insertions(+), 8 deletions(-) > > - if (ice_parser_run(ad->psr, pkt_buf, pkt_len, &rslt)) > - return -rte_errno; > + ret = ice_parser_run(ad->psr, pkt_buf, pkt_len, &rslt); > + if (ret) > + goto free_mem; > > - if (ice_parser_profile_init(&rslt, pkt_buf, msk_buf, > - pkt_len, ICE_BLK_RSS, true, &prof)) > - return -rte_errno; > + ret = ice_parser_profile_init(&rslt, pkt_buf, msk_buf, > + pkt_len, ICE_BLK_RSS, true, &prof); > + goto free_mem; Are we not still missing an "if (ret != 0)" here? If so, I can just add on apply. That's correct, will send v4 /Bruce
[PATCH] net/gve: Update TX queue state
Fixing Typo in updating the TX queue state Signed-off-by: Tathagat Priyadarshi --- drivers/net/gve/gve_tx.c | 2 +- drivers/net/gve/gve_tx_dqo.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/gve/gve_tx.c b/drivers/net/gve/gve_tx.c index 70d3ef0..500ae31 100644 --- a/drivers/net/gve/gve_tx.c +++ b/drivers/net/gve/gve_tx.c @@ -688,7 +688,7 @@ rte_write32(rte_cpu_to_be_32(GVE_IRQ_MASK), txq->ntfy_addr); - dev->data->rx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED; + dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED; return 0; } diff --git a/drivers/net/gve/gve_tx_dqo.c b/drivers/net/gve/gve_tx_dqo.c index a65e6aa..1b85557 100644 --- a/drivers/net/gve/gve_tx_dqo.c +++ b/drivers/net/gve/gve_tx_dqo.c @@ -392,7 +392,7 @@ rte_write32(rte_cpu_to_be_32(GVE_IRQ_MASK), txq->ntfy_addr); - dev->data->rx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED; + dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED; return 0; } -- 1.8.3.1
Re: [RFC v2] ethdev: an API for cache stashing hints
On 7/15/2024 11:11 PM, Wathsala Vithanage wrote: > An application provides cache stashing hints to the ethernet devices to > improve memory access latencies from the CPU and the NIC. This patch > introduces three distinct hints for this purpose. > > The RTE_ETH_DEV_STASH_HINT_HOST_WILLNEED hint indicates that the host > (CPU) requires the data written by the NIC immediately. This implies > that the CPU expects to read data from its local cache rather than LLC > or main memory if possible. This would improve memory access latency in > the Rx path. For PCI devices with TPH capability, these hints translate > into DWHR (Device Writes Host Reads) access pattern. This hint is only > valid for receive queues. > > The RTE_ETH_DEV_STASH_HINT_BI_DIR_DATA hint indicates that the host and > the device access the data structure equally. Rx/Tx queue descriptors > fit the description of such data. This hint applies to both Rx and Tx > directions. In the PCI TPH context, this hint translates into a > Bi-Directional access pattern. > > RTE_ETH_DEV_STASH_HINT_DEV_ONLY hint indicates that the CPU is not > involved in a given device's receive or transmit paths. This implies > that only devices are involved in the IO path. Depending on the > implementation, this hint may result in data getting placed in a cache > close to the device or not cached at all. For PCI devices with TPH > capability, this hint translates into D*D* (DWDR, DRDW, DWDW, DRDR) > access patterns. This is a bidirectional hint, and it can be applied to > both Rx and Tx queues. > > The RTE_ETH_DEV_STASH_HINT_HOST_DONTNEED hint indicates that the device > reads data written by the host (CPU) that may still be in the host's > local cache but is not required by the host anytime soon. This hint is > intended to prevent unnecessary cache invalidations that cause > interconnect latencies when a device writes to a buffer already in host > cache memory. In DPDK, this could happen with the recycling of mbufs > where a mbuf is placed in the Tx queue that then gets back into mempool > and gets recycled back into the Rx queue, all while a copy is being held > in the CPU's local cache unnecessarily. By using this hint on supported > platforms, the mbuf will be invalidated after the device completes the > buffer reading, but it will be well before the buffer gets recycled and > updated in the Rx path. This hint is only valid for transmit queues. > > Applications use three main interfaces in the ethdev library to discover > and set cache stashing hints. rte_eth_dev_stashing_hints_tx interface is > used to set hints on a Tx queue. rte_eth_dev_stashing_hints_rx interface > is used to set hints on an Rx queue. Both of these functions take the > following parameters as inputs: a port_id (the id of the ethernet > device), a cpu_id (the target CPU), a cache_level (the level of the > cache hierarchy the data should be stashed into), a queue_id (the queue > the hints are applied to). In addition to the above list of parameters, > a type parameter indicates the type of the object the application > expects to be stashed by the hardware. Depending on the hardware, these > may vary. Intel E810 NICs support the stashing of Rx/Tx descriptors, > packet headers, and packet payloads. These are indicated by the macros > RTE_ETH_DEV_STASH_TYPE_DESC, RTE_ETH_DEV_STASH_TYPE_HEADER, > RTE_ETH_DEV_STASH_TYPE_PAYLOAD. Hardware capable of stashing data at any > given offset into a packet can use the RTE_ETH_DEV_STASH_TYPE_OFFSET > type. When an offset is used, the offset parameter in the above two > functions should be set appropriately. > > rte_eth_dev_stashing_hints_discover is used to discover the object types > and hints supported in the platform and the device. The function takes > types and hints pointers used as a bit vector to indicate hints and > types supported by the NIC. An application that intends to use stashing > hints should first discover supported hints and types and then use the > functions rte_eth_dev_stashing_hints_tx and > rte_eth_dev_stashing_hints_rx as required to set stashing hints > accordingly. eth_dev_ops structure has been updated with two new ops > that a PMD should implement to support cache stashing hints. A PMD that > intends to support cache stashing hints should initialize the > set_stashing_hints function pointer to a function that issues hints to > the underlying hardware in compliance with platform capabilities. The > same PMD should also implement a function that can return two-bit fields > indicating supported types and hints and then initialize the > discover_stashing_hints function pointer with it. If the NIC supports > cache stashing hints, the NIC should always set the > RTE_ETH_DEV_CAPA_CACHE_STASHING device capability. > > Signed-off-by: Wathsala Vithanage > Reviewed-by: Dhruv Tripathi > This is a fine grained config for performance improvement, it may help to see the performance impact and driver implementation complexity, before decidi
Re: [PATCH v4 2/3] net/ice: refactor raw pattern parsing function
On Mon, Jul 22, 2024 at 10:59:49AM +, Vladimir Medvedkin wrote: > Replace strlen with more secure strnlen in ice_hash_parse_raw_pattern. > > Signed-off-by: Vladimir Medvedkin > --- > drivers/net/ice/ice_hash.c | 6 +++--- > 1 file changed, 3 insertions(+), 3 deletions(-) > > diff --git a/drivers/net/ice/ice_hash.c b/drivers/net/ice/ice_hash.c > index 6b3095e2c5..506ea261e8 100644 > --- a/drivers/net/ice/ice_hash.c > +++ b/drivers/net/ice/ice_hash.c > @@ -658,9 +658,9 @@ ice_hash_parse_raw_pattern(struct ice_adapter *ad, > raw_spec = item->spec; > raw_mask = item->mask; > > - spec_len = strlen((char *)(uintptr_t)raw_spec->pattern); > - if (strlen((char *)(uintptr_t)raw_mask->pattern) != > - spec_len) > + spec_len = strnlen((char *)(uintptr_t)raw_spec->pattern, > raw_spec->length); > + if (strnlen((char *)(uintptr_t)raw_mask->pattern, raw_spec->length) != > + spec_len) Are we missing something by not checking the return values from the length calls for overflow? If spec_len == raw_spec->length, then we have an overflow, and if raw_mask similarly overflows the comparison would still pass and not flag an error. /Bruce > return -rte_errno; > > pkt_len = spec_len / 2; > -- > 2.34.1 >
Re: [PATCH] net/ice: fix DCF init for E830 devices
On Fri, Jul 19, 2024 at 02:44:27PM +0100, Ian Stokes wrote: > From: Bruce Richardson > > E830 introduces a new version of Get Link Status Data > which increases the size of struct ice_aqc_get_link_status_data > from 32 bytes to 56. When initializing DCF, attempt to get > link status data using both formats of the command, by overriding > the mac type to E830 on failure with the default setting. > > Signed-off-by: Bruce Richardson > Signed-off-by: Ian Stokes > --- > drivers/net/ice/ice_dcf_parent.c | 15 +-- > 1 file changed, 13 insertions(+), 2 deletions(-) > Patch applied to dpdk-next-net-intel /Bruce
[v2 04/30] dma/dpaa2: multiple process support
From: Jun Yang Support multiple processes for dpaa2 dma. 1) Move queue configuration procedure from init function to device configuration function which is called by user. 2) Instances of dpaa2_dpdmai_dev and qdma_device are allocated from primary process and shared between multiple processes. 3) MC reg is per process mapped. 4) User is responsible to check vq number configured before using dma device to identify if this device is occupied by other process. Signed-off-by: Jun Yang --- drivers/dma/dpaa2/dpaa2_qdma.c | 405 - drivers/dma/dpaa2/dpaa2_qdma.h | 6 +- 2 files changed, 254 insertions(+), 157 deletions(-) diff --git a/drivers/dma/dpaa2/dpaa2_qdma.c b/drivers/dma/dpaa2/dpaa2_qdma.c index 15d3776603..44b82c139e 100644 --- a/drivers/dma/dpaa2/dpaa2_qdma.c +++ b/drivers/dma/dpaa2/dpaa2_qdma.c @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: BSD-3-Clause - * Copyright 2018-2022 NXP + * Copyright 2018-2023 NXP */ #include @@ -19,6 +19,8 @@ static uint32_t dpaa2_coherent_no_alloc_cache; static uint32_t dpaa2_coherent_alloc_cache; +static struct fsl_mc_io s_proc_mc_reg; + static inline int qdma_cntx_idx_ring_eq(struct qdma_cntx_idx_ring *ring, const uint16_t *elem, uint16_t nb, @@ -960,6 +962,9 @@ dpaa2_qdma_info_get(const struct rte_dma_dev *dev, dev_info->max_vchans = dpdmai_dev->num_queues; dev_info->max_desc = DPAA2_QDMA_MAX_DESC; dev_info->min_desc = DPAA2_QDMA_MIN_DESC; + dev_info->dev_name = dev->device->name; + if (dpdmai_dev->qdma_dev) + dev_info->nb_vchans = dpdmai_dev->qdma_dev->num_vqs; return 0; } @@ -969,25 +974,102 @@ dpaa2_qdma_configure(struct rte_dma_dev *dev, const struct rte_dma_conf *dev_conf, uint32_t conf_sz) { - char name[32]; /* RTE_MEMZONE_NAMESIZE = 32 */ struct dpaa2_dpdmai_dev *dpdmai_dev = dev->data->dev_private; struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev; uint16_t i; + struct dpdmai_rx_queue_cfg rx_queue_cfg; + struct dpdmai_rx_queue_attr rx_attr; + struct dpdmai_tx_queue_attr tx_attr; + struct dpaa2_queue *rxq; + int ret = 0; DPAA2_QDMA_FUNC_TRACE(); RTE_SET_USED(conf_sz); - /* In case QDMA device is not in stopped state, return -EBUSY */ - if (qdma_dev->state == 1) { - DPAA2_QDMA_ERR("%s Not stopped, configure failed.", - dev->data->dev_name); - return -EBUSY; + if (dev_conf->nb_vchans > dpdmai_dev->num_queues) { + DPAA2_QDMA_ERR("%s config queues(%d) > hw queues(%d)", + dev->data->dev_name, dev_conf->nb_vchans, + dpdmai_dev->num_queues); + + return -ENOTSUP; + } + + if (qdma_dev->vqs) { + DPAA2_QDMA_DEBUG("%s: queues de-config(%d)/re-config(%d)", + dev->data->dev_name, + qdma_dev->num_vqs, dev_conf->nb_vchans); + for (i = 0; i < qdma_dev->num_vqs; i++) { + if ((qdma_dev->vqs[i].num_enqueues != + qdma_dev->vqs[i].num_dequeues) && + !qdma_dev->is_silent) { + DPAA2_QDMA_ERR("VQ(%d) %"PRIu64" jobs in dma.", + i, qdma_dev->vqs[i].num_enqueues - + qdma_dev->vqs[i].num_dequeues); + return -EBUSY; + } + } + for (i = 0; i < qdma_dev->num_vqs; i++) { + if (qdma_dev->vqs[i].fle_pool) { + rte_mempool_free(qdma_dev->vqs[i].fle_pool); + qdma_dev->vqs[i].fle_pool = NULL; + } + if (qdma_dev->vqs[i].ring_cntx_idx) { + rte_free(qdma_dev->vqs[i].ring_cntx_idx); + qdma_dev->vqs[i].ring_cntx_idx = NULL; + } + rxq = &dpdmai_dev->rx_queue[i]; + if (rxq->q_storage) { + DPAA2_QDMA_DEBUG("%s rxq[%d] re-configure", + dev->data->dev_name, i); + dpaa2_free_dq_storage(rxq->q_storage); + rte_free(rxq->q_storage); + rxq->q_storage = NULL; + } + } + rte_free(qdma_dev->vqs); + qdma_dev->vqs = NULL; + qdma_dev->num_vqs = 0; + } + + /* Set up Rx Queues */ + for (i = 0; i < dev_conf->nb_vchans; i++) { + memset(&rx_queue_cfg, 0, sizeof(struct dpdmai_rx_queue_cfg)); + rxq = &dpdmai_dev->rx_queue[i]; + ret = dpdmai_set_rx_queue(&s_proc_mc_reg, +
[v2 03/30] dma/dpaa2: adapt DMA driver API
From: Jun Yang 1) Support DMA single copy and SG copy. 2) Silent mode support. Add index combined with length field. For Silent mode, this index is used to notify DMA driver which inner descriptor should be used. For none silent mode, this index is used to notify user which descriptor is completed. In addition, because dpaa2 qdma is not able to preserve order, "rte_dma_completed_t" returns multiple indexes instead of last index. Signed-off-by: Jun Yang --- drivers/dma/dpaa2/dpaa2_qdma.c | 1667 +++- drivers/dma/dpaa2/dpaa2_qdma.h | 126 +- drivers/dma/dpaa2/rte_pmd_dpaa2_qdma.h | 119 +- drivers/dma/dpaa2/version.map | 13 - 4 files changed, 799 insertions(+), 1126 deletions(-) delete mode 100644 drivers/dma/dpaa2/version.map diff --git a/drivers/dma/dpaa2/dpaa2_qdma.c b/drivers/dma/dpaa2/dpaa2_qdma.c index 945ba71e4a..15d3776603 100644 --- a/drivers/dma/dpaa2/dpaa2_qdma.c +++ b/drivers/dma/dpaa2/dpaa2_qdma.c @@ -16,218 +16,345 @@ #define DPAA2_QDMA_PREFETCH "prefetch" -uint32_t dpaa2_coherent_no_alloc_cache; -uint32_t dpaa2_coherent_alloc_cache; +static uint32_t dpaa2_coherent_no_alloc_cache; +static uint32_t dpaa2_coherent_alloc_cache; static inline int -qdma_populate_fd_pci(phys_addr_t src, phys_addr_t dest, -uint32_t len, struct qbman_fd *fd, -struct dpaa2_qdma_rbp *rbp, int ser) +qdma_cntx_idx_ring_eq(struct qdma_cntx_idx_ring *ring, + const uint16_t *elem, uint16_t nb, + uint16_t *free_space) { - fd->simple_pci.saddr_lo = lower_32_bits((uint64_t) (src)); - fd->simple_pci.saddr_hi = upper_32_bits((uint64_t) (src)); + if (unlikely(nb > ring->free_space)) + return 0; - fd->simple_pci.len_sl = len; + if ((ring->tail + nb) < DPAA2_QDMA_MAX_DESC) { + rte_memcpy(&ring->cntx_idx_ring[ring->tail], + elem, nb * sizeof(uint16_t)); + ring->tail += nb; + } else { + rte_memcpy(&ring->cntx_idx_ring[ring->tail], + elem, + (DPAA2_QDMA_MAX_DESC - ring->tail) * + sizeof(uint16_t)); + rte_memcpy(&ring->cntx_idx_ring[0], + &elem[DPAA2_QDMA_MAX_DESC - ring->tail], + (nb - DPAA2_QDMA_MAX_DESC + ring->tail) * + sizeof(uint16_t)); + ring->tail = (ring->tail + nb) & (DPAA2_QDMA_MAX_DESC - 1); + } + ring->free_space -= nb; + ring->nb_in_ring += nb; - fd->simple_pci.bmt = 1; - fd->simple_pci.fmt = 3; - fd->simple_pci.sl = 1; - fd->simple_pci.ser = ser; + if (free_space) + *free_space = ring->free_space; - fd->simple_pci.sportid = rbp->sportid; /*pcie 3 */ - fd->simple_pci.srbp = rbp->srbp; - if (rbp->srbp) - fd->simple_pci.rdttype = 0; - else - fd->simple_pci.rdttype = dpaa2_coherent_alloc_cache; + return nb; +} - /*dest is pcie memory */ - fd->simple_pci.dportid = rbp->dportid; /*pcie 3 */ - fd->simple_pci.drbp = rbp->drbp; - if (rbp->drbp) - fd->simple_pci.wrttype = 0; - else - fd->simple_pci.wrttype = dpaa2_coherent_no_alloc_cache; +static inline int +qdma_cntx_idx_ring_dq(struct qdma_cntx_idx_ring *ring, + uint16_t *elem, uint16_t max) +{ + int ret = ring->nb_in_ring > max ? max : ring->nb_in_ring; - fd->simple_pci.daddr_lo = lower_32_bits((uint64_t) (dest)); - fd->simple_pci.daddr_hi = upper_32_bits((uint64_t) (dest)); + if (!ret) + return 0; - return 0; + if ((ring->start + ret) < DPAA2_QDMA_MAX_DESC) { + rte_memcpy(elem, + &ring->cntx_idx_ring[ring->start], + ret * sizeof(uint16_t)); + ring->start += ret; + } else { + rte_memcpy(elem, + &ring->cntx_idx_ring[ring->start], + (DPAA2_QDMA_MAX_DESC - ring->start) * + sizeof(uint16_t)); + rte_memcpy(&elem[DPAA2_QDMA_MAX_DESC - ring->start], + &ring->cntx_idx_ring[0], + (ret - DPAA2_QDMA_MAX_DESC + ring->start) * + sizeof(uint16_t)); + ring->start = (ring->start + ret) & (DPAA2_QDMA_MAX_DESC - 1); + } + ring->free_space += ret; + ring->nb_in_ring -= ret; + + return ret; } -static inline int -qdma_populate_fd_ddr(phys_addr_t src, phys_addr_t dest, -uint32_t len, struct qbman_fd *fd, int ser) +static int +dpaa2_qdma_multi_eq(struct qdma_virt_queue *qdma_vq) { - fd->simple_ddr.saddr_lo = lower_32_bits((uint64_t) (src)); - fd->simple_ddr.saddr_hi = upper_32_bits((uint64_t) (src)); - - fd->simple_ddr.len = len; - - fd->simple_ddr.bmt = 1
[v2 05/30] dma/dpaa2: add sanity check for SG entry
From: Jun Yang Make sure the SG entry number doesn't overflow. Signed-off-by: Jun Yang --- drivers/dma/dpaa2/dpaa2_qdma.c | 11 ++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/dma/dpaa2/dpaa2_qdma.c b/drivers/dma/dpaa2/dpaa2_qdma.c index 44b82c139e..7f6ebcb46b 100644 --- a/drivers/dma/dpaa2/dpaa2_qdma.c +++ b/drivers/dma/dpaa2/dpaa2_qdma.c @@ -615,8 +615,17 @@ dpaa2_qdma_copy_sg(void *dev_private, struct qbman_fle *fle; struct qdma_sdd *sdd; - if (unlikely(nb_src != nb_dst)) + if (unlikely(nb_src != nb_dst)) { + DPAA2_QDMA_ERR("SG entry src num(%d) != dst num(%d)", + nb_src, nb_dst); return -ENOTSUP; + } + + if (unlikely(nb_src > RTE_DPAA2_QDMA_JOB_SUBMIT_MAX)) { + DPAA2_QDMA_ERR("SG entry number(%d) > MAX(%d)", + nb_src, RTE_DPAA2_QDMA_JOB_SUBMIT_MAX); + return -EINVAL; + } memset(fd, 0, sizeof(struct qbman_fd)); -- 2.25.1
[v2 08/30] bus/fslmc: enhance the qbman dq storage logic
From: Jun Yang Multiple DQ storages are used among multiple cores, the single dq storage of first union is leak if multiple storages are allocated. It does not make sense to keep the single dq storage of union, remove it and reuse the first dq storage of multiple storages for this case. Signed-off-by: Jun Yang --- drivers/bus/fslmc/portal/dpaa2_hw_dpci.c| 25 ++- drivers/bus/fslmc/portal/dpaa2_hw_dpio.c| 7 +- drivers/bus/fslmc/portal/dpaa2_hw_pvt.h | 38 +- drivers/crypto/dpaa2_sec/dpaa2_sec_dpseci.c | 23 ++ drivers/crypto/dpaa2_sec/dpaa2_sec_raw_dp.c | 4 +- drivers/dma/dpaa2/dpaa2_qdma.c | 43 ++- drivers/net/dpaa2/dpaa2_ethdev.c| 81 - drivers/net/dpaa2/dpaa2_rxtx.c | 19 +++-- drivers/raw/dpaa2_cmdif/dpaa2_cmdif.c | 4 +- 9 files changed, 103 insertions(+), 141 deletions(-) diff --git a/drivers/bus/fslmc/portal/dpaa2_hw_dpci.c b/drivers/bus/fslmc/portal/dpaa2_hw_dpci.c index 07256ed7ec..160126f6d6 100644 --- a/drivers/bus/fslmc/portal/dpaa2_hw_dpci.c +++ b/drivers/bus/fslmc/portal/dpaa2_hw_dpci.c @@ -81,22 +81,10 @@ rte_dpaa2_create_dpci_device(int vdev_fd __rte_unused, } /* Allocate DQ storage for the DPCI Rx queues */ - rxq = &(dpci_node->rx_queue[i]); - rxq->q_storage = rte_malloc("dq_storage", - sizeof(struct queue_storage_info_t), - RTE_CACHE_LINE_SIZE); - if (!rxq->q_storage) { - DPAA2_BUS_ERR("q_storage allocation failed\n"); - ret = -ENOMEM; + rxq = &dpci_node->rx_queue[i]; + ret = dpaa2_queue_storage_alloc(rxq, 1); + if (ret) goto err; - } - - memset(rxq->q_storage, 0, sizeof(struct queue_storage_info_t)); - ret = dpaa2_alloc_dq_storage(rxq->q_storage); - if (ret) { - DPAA2_BUS_ERR("dpaa2_alloc_dq_storage failed\n"); - goto err; - } } /* Enable the device */ @@ -141,12 +129,9 @@ rte_dpaa2_create_dpci_device(int vdev_fd __rte_unused, err: for (i = 0; i < DPAA2_DPCI_MAX_QUEUES; i++) { - struct dpaa2_queue *rxq = &(dpci_node->rx_queue[i]); + struct dpaa2_queue *rxq = &dpci_node->rx_queue[i]; - if (rxq->q_storage) { - dpaa2_free_dq_storage(rxq->q_storage); - rte_free(rxq->q_storage); - } + dpaa2_queue_storage_free(rxq, 1); } rte_free(dpci_node); diff --git a/drivers/bus/fslmc/portal/dpaa2_hw_dpio.c b/drivers/bus/fslmc/portal/dpaa2_hw_dpio.c index 4aec7b2cd8..a8afc772fd 100644 --- a/drivers/bus/fslmc/portal/dpaa2_hw_dpio.c +++ b/drivers/bus/fslmc/portal/dpaa2_hw_dpio.c @@ -574,6 +574,7 @@ dpaa2_free_dq_storage(struct queue_storage_info_t *q_storage) for (i = 0; i < NUM_DQS_PER_QUEUE; i++) { rte_free(q_storage->dq_storage[i]); + q_storage->dq_storage[i] = NULL; } } @@ -583,7 +584,7 @@ dpaa2_alloc_dq_storage(struct queue_storage_info_t *q_storage) int i = 0; for (i = 0; i < NUM_DQS_PER_QUEUE; i++) { - q_storage->dq_storage[i] = rte_malloc(NULL, + q_storage->dq_storage[i] = rte_zmalloc(NULL, dpaa2_dqrr_size * sizeof(struct qbman_result), RTE_CACHE_LINE_SIZE); if (!q_storage->dq_storage[i]) @@ -591,8 +592,10 @@ dpaa2_alloc_dq_storage(struct queue_storage_info_t *q_storage) } return 0; fail: - while (--i >= 0) + while (--i >= 0) { rte_free(q_storage->dq_storage[i]); + q_storage->dq_storage[i] = NULL; + } return -1; } diff --git a/drivers/bus/fslmc/portal/dpaa2_hw_pvt.h b/drivers/bus/fslmc/portal/dpaa2_hw_pvt.h index 169c7917ea..1ce481c88d 100644 --- a/drivers/bus/fslmc/portal/dpaa2_hw_pvt.h +++ b/drivers/bus/fslmc/portal/dpaa2_hw_pvt.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2016 Freescale Semiconductor, Inc. All rights reserved. - * Copyright 2016-2021 NXP + * Copyright 2016-2024 NXP * */ @@ -165,7 +165,9 @@ struct __rte_cache_aligned dpaa2_queue { uint64_t tx_pkts; uint64_t err_pkts; union { - struct queue_storage_info_t *q_storage; + /**Ingress*/ + struct queue_storage_info_t *q_storage[RTE_MAX_LCORE]; + /**Egress*/ struct qbman_result *cscn; }; struct rte_event ev; @@ -186,6 +188,38 @@ struct swp_active_dqs { uint64_t reserved[7]; }; +#define dpaa2_queue_storage_alloc(q, num) \ +({ \ + int ret = 0, i; \ + \ + for (i = 0; i < (num); i+
[v2 09/30] dma/dpaa2: add short FD support
From: Jun Yang Short FD can be used for single transfer scenario which shows higher performance than FLE. 1) Save index context in FD att field for short and FLE(NonSG). 2) Identify FD type by att of FD. 3) Force 48 bits address for source address and fle according to spec. Signed-off-by: Jun Yang --- drivers/dma/dpaa2/dpaa2_qdma.c | 314 +++-- drivers/dma/dpaa2/dpaa2_qdma.h | 69 -- drivers/dma/dpaa2/rte_pmd_dpaa2_qdma.h | 13 - 3 files changed, 285 insertions(+), 111 deletions(-) diff --git a/drivers/dma/dpaa2/dpaa2_qdma.c b/drivers/dma/dpaa2/dpaa2_qdma.c index 53caccecd7..d1358b686c 100644 --- a/drivers/dma/dpaa2/dpaa2_qdma.c +++ b/drivers/dma/dpaa2/dpaa2_qdma.c @@ -522,7 +522,6 @@ dpaa2_qdma_long_fmt_dump(const struct qbman_fle *fle) const struct qdma_cntx_fle_sdd *fle_sdd; const struct qdma_sdd *sdd; const struct qdma_cntx_sg *cntx_sg = NULL; - const struct qdma_cntx_long *cntx_long = NULL; fle_sdd = container_of(fle, const struct qdma_cntx_fle_sdd, fle[0]); sdd = fle_sdd->sdd; @@ -545,11 +544,8 @@ dpaa2_qdma_long_fmt_dump(const struct qbman_fle *fle) QBMAN_FLE_WORD4_FMT_SGE) { cntx_sg = container_of(fle_sdd, const struct qdma_cntx_sg, fle_sdd); - } else if (fle[DPAA2_QDMA_SRC_FLE].word4.fmt == + } else if (fle[DPAA2_QDMA_SRC_FLE].word4.fmt != QBMAN_FLE_WORD4_FMT_SBF) { - cntx_long = container_of(fle_sdd, const struct qdma_cntx_long, - fle_sdd); - } else { DPAA2_QDMA_ERR("Unsupported fle format:%d", fle[DPAA2_QDMA_SRC_FLE].word4.fmt); return; @@ -560,11 +556,6 @@ dpaa2_qdma_long_fmt_dump(const struct qbman_fle *fle) dpaa2_qdma_sdd_dump(&sdd[i]); } - if (cntx_long) { - DPAA2_QDMA_INFO("long format/Single buffer cntx idx:%d", - cntx_long->cntx_idx); - } - if (cntx_sg) { DPAA2_QDMA_INFO("long format/SG format, job number:%d", cntx_sg->job_nb); @@ -582,6 +573,8 @@ dpaa2_qdma_long_fmt_dump(const struct qbman_fle *fle) DPAA2_QDMA_INFO("cntx_idx[%d]:%d", i, cntx_sg->cntx_idx[i]); } + } else { + DPAA2_QDMA_INFO("long format/Single buffer cntx"); } } @@ -644,7 +637,7 @@ dpaa2_qdma_copy_sg(void *dev_private, offsetof(struct qdma_cntx_sg, fle_sdd) + offsetof(struct qdma_cntx_fle_sdd, fle); - DPAA2_SET_FD_ADDR(fd, fle_iova); + dpaa2_qdma_fd_set_addr(fd, fle_iova); DPAA2_SET_FD_COMPOUND_FMT(fd); DPAA2_SET_FD_FLC(fd, (uint64_t)cntx_sg); @@ -680,6 +673,7 @@ dpaa2_qdma_copy_sg(void *dev_private, if (unlikely(qdma_vq->flags & DPAA2_QDMA_DESC_DEBUG_FLAG)) dpaa2_qdma_long_fmt_dump(cntx_sg->fle_sdd.fle); + dpaa2_qdma_fd_save_att(fd, 0, DPAA2_QDMA_FD_SG); qdma_vq->fd_idx++; qdma_vq->silent_idx = (qdma_vq->silent_idx + 1) & (DPAA2_QDMA_MAX_DESC - 1); @@ -696,74 +690,178 @@ dpaa2_qdma_copy_sg(void *dev_private, return ret; } +static inline void +qdma_populate_fd_pci(uint64_t src, uint64_t dest, + uint32_t len, struct qbman_fd *fd, + struct dpaa2_qdma_rbp *rbp, int ser) +{ + fd->simple_pci.saddr_lo = lower_32_bits(src); + fd->simple_pci.saddr_hi = upper_32_bits(src); + + fd->simple_pci.len_sl = len; + + fd->simple_pci.bmt = DPAA2_QDMA_BMT_DISABLE; + fd->simple_pci.fmt = DPAA2_QDMA_FD_SHORT_FORMAT; + fd->simple_pci.sl = 1; + fd->simple_pci.ser = ser; + if (ser) + fd->simple.frc |= QDMA_SER_CTX; + + fd->simple_pci.sportid = rbp->sportid; + + fd->simple_pci.svfid = rbp->svfid; + fd->simple_pci.spfid = rbp->spfid; + fd->simple_pci.svfa = rbp->svfa; + fd->simple_pci.dvfid = rbp->dvfid; + fd->simple_pci.dpfid = rbp->dpfid; + fd->simple_pci.dvfa = rbp->dvfa; + + fd->simple_pci.srbp = rbp->srbp; + if (rbp->srbp) + fd->simple_pci.rdttype = 0; + else + fd->simple_pci.rdttype = dpaa2_coherent_alloc_cache; + + /*dest is pcie memory */ + fd->simple_pci.dportid = rbp->dportid; + fd->simple_pci.drbp = rbp->drbp; + if (rbp->drbp) + fd->simple_pci.wrttype = 0; + else + fd->simple_pci.wrttype = dpaa2_coherent_no_alloc_cache; + + fd->simple_pci.daddr_lo = lower_32_bits(dest); + fd->simple_pci.daddr_hi = upper_32_bits(dest); +} + +static inline void +qdma_populate_fd_ddr(uint64_t src, uint64_t dest, + uint32_t len, struct qbman_fd *fd, int ser) +{ + fd->simple_ddr.saddr_lo = lower_32_bits(src); + fd->simple_ddr.saddr_hi = upper_32_bits(src); + + fd->simple_ddr.len
[v2 10/30] dma/dpaa2: limit the max descriptor number
From: Jun Yang For non-SG format, the index is saved in FD with DPAA2_QDMA_FD_ATT_TYPE_OFFSET(13) bits width. The max descriptor number of ring is power of 2, so the eventual max number is: ((1 << DPAA2_QDMA_FD_ATT_TYPE_OFFSET) / 2) Signed-off-by: Jun Yang --- drivers/dma/dpaa2/dpaa2_qdma.h | 10 +++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/dma/dpaa2/dpaa2_qdma.h b/drivers/dma/dpaa2/dpaa2_qdma.h index 0be65e1cc6..250c83c83c 100644 --- a/drivers/dma/dpaa2/dpaa2_qdma.h +++ b/drivers/dma/dpaa2/dpaa2_qdma.h @@ -8,8 +8,6 @@ #include "portal/dpaa2_hw_pvt.h" #include "portal/dpaa2_hw_dpio.h" -#define DPAA2_QDMA_MAX_DESC4096 -#define DPAA2_QDMA_MIN_DESC1 #define DPAA2_QDMA_MAX_VHANS 64 #define DPAA2_DPDMAI_MAX_QUEUES16 @@ -169,10 +167,15 @@ enum dpaa2_qdma_fd_type { }; #define DPAA2_QDMA_FD_ATT_TYPE_OFFSET 13 +#define DPAA2_QDMA_FD_ATT_MAX_IDX \ + ((1 << DPAA2_QDMA_FD_ATT_TYPE_OFFSET) - 1) #define DPAA2_QDMA_FD_ATT_TYPE(att) \ (att >> DPAA2_QDMA_FD_ATT_TYPE_OFFSET) #define DPAA2_QDMA_FD_ATT_CNTX(att) \ - (att & ((1 << DPAA2_QDMA_FD_ATT_TYPE_OFFSET) - 1)) + (att & DPAA2_QDMA_FD_ATT_MAX_IDX) + +#define DPAA2_QDMA_MAX_DESC ((DPAA2_QDMA_FD_ATT_MAX_IDX + 1) / 2) +#define DPAA2_QDMA_MIN_DESC 1 static inline void dpaa2_qdma_fd_set_addr(struct qbman_fd *fd, @@ -186,6 +189,7 @@ static inline void dpaa2_qdma_fd_save_att(struct qbman_fd *fd, uint16_t job_idx, enum dpaa2_qdma_fd_type type) { + RTE_ASSERT(job_idx <= DPAA2_QDMA_FD_ATT_MAX_IDX); fd->simple_ddr.rsv1_att = job_idx | (type << DPAA2_QDMA_FD_ATT_TYPE_OFFSET); } -- 2.25.1
[v2 11/30] dma/dpaa2: change the DMA copy return value
From: Jun Yang The return value of DMA copy/sg copy should be index of descriptor copied in success. Signed-off-by: Jun Yang --- drivers/dma/dpaa2/dpaa2_qdma.c | 14 +++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/dma/dpaa2/dpaa2_qdma.c b/drivers/dma/dpaa2/dpaa2_qdma.c index d1358b686c..b70750fede 100644 --- a/drivers/dma/dpaa2/dpaa2_qdma.c +++ b/drivers/dma/dpaa2/dpaa2_qdma.c @@ -605,6 +605,11 @@ dpaa2_qdma_copy_sg(void *dev_private, return -ENOTSUP; } + if (unlikely(!nb_src)) { + DPAA2_QDMA_ERR("No SG entry specified"); + return -EINVAL; + } + if (unlikely(nb_src > RTE_DPAA2_QDMA_JOB_SUBMIT_MAX)) { DPAA2_QDMA_ERR("SG entry number(%d) > MAX(%d)", nb_src, RTE_DPAA2_QDMA_JOB_SUBMIT_MAX); @@ -681,10 +686,13 @@ dpaa2_qdma_copy_sg(void *dev_private, if (flags & RTE_DMA_OP_FLAG_SUBMIT) { expected = qdma_vq->fd_idx; ret = dpaa2_qdma_multi_eq(qdma_vq); - if (likely(ret == expected)) - return 0; + if (likely(ret == expected)) { + qdma_vq->copy_num += nb_src; + return (qdma_vq->copy_num - 1) & UINT16_MAX; + } } else { - return 0; + qdma_vq->copy_num += nb_src; + return (qdma_vq->copy_num - 1) & UINT16_MAX; } return ret; -- 2.25.1
[v2 12/30] dma/dpaa2: move the qdma header to common place
From: Jun Yang Include rte_pmd_dpaax_qdma.h instead of rte_pmd_dpaa2_qdma.h and change code accordingly. Signed-off-by: Jun Yang --- doc/api/doxy-api-index.md | 2 +- doc/api/doxy-api.conf.in | 2 +- drivers/common/dpaax/meson.build | 3 +- drivers/common/dpaax/rte_pmd_dpaax_qdma.h | 23 +++ drivers/dma/dpaa2/dpaa2_qdma.c| 84 +++ drivers/dma/dpaa2/dpaa2_qdma.h| 10 +-- drivers/dma/dpaa2/meson.build | 4 +- drivers/dma/dpaa2/rte_pmd_dpaa2_qdma.h| 23 --- 8 files changed, 72 insertions(+), 79 deletions(-) create mode 100644 drivers/common/dpaax/rte_pmd_dpaax_qdma.h delete mode 100644 drivers/dma/dpaa2/rte_pmd_dpaa2_qdma.h diff --git a/doc/api/doxy-api-index.md b/doc/api/doxy-api-index.md index f9283154f8..ab42440733 100644 --- a/doc/api/doxy-api-index.md +++ b/doc/api/doxy-api-index.md @@ -57,7 +57,7 @@ The public API headers are grouped by topics: [mlx5](@ref rte_pmd_mlx5.h), [dpaa2_mempool](@ref rte_dpaa2_mempool.h), [dpaa2_cmdif](@ref rte_pmd_dpaa2_cmdif.h), - [dpaa2_qdma](@ref rte_pmd_dpaa2_qdma.h), + [dpaax](@ref rte_pmd_dpaax_qdma.h), [crypto_scheduler](@ref rte_cryptodev_scheduler.h), [dlb2](@ref rte_pmd_dlb2.h), [ifpga](@ref rte_pmd_ifpga.h) diff --git a/doc/api/doxy-api.conf.in b/doc/api/doxy-api.conf.in index a8823c046f..33250d867c 100644 --- a/doc/api/doxy-api.conf.in +++ b/doc/api/doxy-api.conf.in @@ -8,7 +8,7 @@ INPUT = @TOPDIR@/doc/api/doxy-api-index.md \ @TOPDIR@/drivers/bus/vdev \ @TOPDIR@/drivers/crypto/cnxk \ @TOPDIR@/drivers/crypto/scheduler \ - @TOPDIR@/drivers/dma/dpaa2 \ + @TOPDIR@/drivers/common/dpaax \ @TOPDIR@/drivers/event/dlb2 \ @TOPDIR@/drivers/event/cnxk \ @TOPDIR@/drivers/mempool/cnxk \ diff --git a/drivers/common/dpaax/meson.build b/drivers/common/dpaax/meson.build index a162779116..db61b76ce3 100644 --- a/drivers/common/dpaax/meson.build +++ b/drivers/common/dpaax/meson.build @@ -1,5 +1,5 @@ # SPDX-License-Identifier: BSD-3-Clause -# Copyright(c) 2018 NXP +# Copyright 2018, 2024 NXP if not is_linux build = false @@ -16,3 +16,4 @@ endif if cc.has_argument('-Wno-pointer-arith') cflags += '-Wno-pointer-arith' endif +headers = files('rte_pmd_dpaax_qdma.h') diff --git a/drivers/common/dpaax/rte_pmd_dpaax_qdma.h b/drivers/common/dpaax/rte_pmd_dpaax_qdma.h new file mode 100644 index 00..2552a4adfb --- /dev/null +++ b/drivers/common/dpaax/rte_pmd_dpaax_qdma.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2021-2024 NXP + */ + +#ifndef _RTE_PMD_DPAAX_QDMA_H_ +#define _RTE_PMD_DPAAX_QDMA_H_ + +#include + +#define RTE_DPAAX_QDMA_COPY_IDX_OFFSET 8 +#define RTE_DPAAX_QDMA_SG_IDX_ADDR_ALIGN \ + RTE_BIT64(RTE_DPAAX_QDMA_COPY_IDX_OFFSET) +#define RTE_DPAAX_QDMA_SG_IDX_ADDR_MASK \ + (RTE_DPAAX_QDMA_SG_IDX_ADDR_ALIGN - 1) +#define RTE_DPAAX_QDMA_SG_SUBMIT(idx_addr, flag) \ + (((uint64_t)idx_addr) | (flag)) + +#define RTE_DPAAX_QDMA_COPY_SUBMIT(idx, flag) \ + ((idx << RTE_DPAAX_QDMA_COPY_IDX_OFFSET) | (flag)) + +#define RTE_DPAAX_QDMA_JOB_SUBMIT_MAX 64 +#define RTE_DMA_CAPA_DPAAX_QDMA_FLAGS_INDEX RTE_BIT64(63) +#endif /* _RTE_PMD_DPAAX_QDMA_H_ */ diff --git a/drivers/dma/dpaa2/dpaa2_qdma.c b/drivers/dma/dpaa2/dpaa2_qdma.c index b70750fede..19d8af9416 100644 --- a/drivers/dma/dpaa2/dpaa2_qdma.c +++ b/drivers/dma/dpaa2/dpaa2_qdma.c @@ -10,7 +10,7 @@ #include -#include "rte_pmd_dpaa2_qdma.h" +#include #include "dpaa2_qdma.h" #include "dpaa2_qdma_logs.h" @@ -212,16 +212,16 @@ fle_sdd_pre_populate(struct qdma_cntx_fle_sdd *fle_sdd, } /* source frame list to source buffer */ DPAA2_SET_FLE_ADDR(&fle[DPAA2_QDMA_SRC_FLE], src); -#ifdef RTE_LIBRTE_DPAA2_USE_PHYS_IOVA - DPAA2_SET_FLE_BMT(&fle[DPAA2_QDMA_SRC_FLE]); -#endif + /** IOMMU is always on for either VA or PA mode, +* so Bypass Memory Translation should be disabled. +* +* DPAA2_SET_FLE_BMT(&fle[DPAA2_QDMA_SRC_FLE]); +* DPAA2_SET_FLE_BMT(&fle[DPAA2_QDMA_DST_FLE]); +*/ fle[DPAA2_QDMA_SRC_FLE].word4.fmt = fmt; /* destination frame list to destination buffer */ DPAA2_SET_FLE_ADDR(&fle[DPAA2_QDMA_DST_FLE], dest); -#ifdef RTE_LIBRTE_DPAA2_USE_PHYS_IOVA - DPAA2_SET_FLE_BMT(&fle[DPAA2_QDMA_DST_FLE]); -#endif fle[DPAA2_QDMA_DST_FLE].word4.fmt = fmt; /* Final bit: 1, for last frame list */ @@ -235,23 +235,21 @@ sg_entry_pre_populate(struct qdma_cntx_sg *sg_cntx) struct qdma_sg_entry *src_sge = sg_cntx->sg_src_entry; struct qdma_sg_entry *dst_sge = sg_cntx->sg_dst_entry; - for (i = 0; i < RTE_DPAA2_QDMA_JOB_SUBMIT_MAX; i++) { + for (i = 0; i < RTE_DPAAX_QD
[v2 13/30] dma/dpaa: support multi channels
This patch add support to use multiple dma channel in the driver. Signed-off-by: Gagandeep Singh --- drivers/dma/dpaa/dpaa_qdma.c | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/dma/dpaa/dpaa_qdma.c b/drivers/dma/dpaa/dpaa_qdma.c index 10e65ef1d7..24ad7ad019 100644 --- a/drivers/dma/dpaa/dpaa_qdma.c +++ b/drivers/dma/dpaa/dpaa_qdma.c @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: BSD-3-Clause - * Copyright 2021 NXP + * Copyright 2021-2024 NXP */ #include @@ -648,8 +648,8 @@ fsl_qdma_alloc_chan_resources(struct fsl_qdma_chan *fsl_chan) } finally: - return fsl_qdma->desc_allocated++; - + fsl_qdma->desc_allocated++; + return 0; exit: return -ENOMEM; } @@ -670,7 +670,7 @@ dpaa_info_get(const struct rte_dma_dev *dev, struct rte_dma_info *dev_info, RTE_DMA_CAPA_DEV_TO_MEM | RTE_DMA_CAPA_SILENT | RTE_DMA_CAPA_OPS_COPY; - dev_info->max_vchans = 1; + dev_info->max_vchans = 4; dev_info->max_desc = DPAADMA_MAX_DESC; dev_info->min_desc = DPAADMA_MIN_DESC; -- 2.25.1
[v2 14/30] dma/dpaa: fix job enqueue
The check shall be end instead of equal. Fixes: 7da29a644c51 ("dma/dpaa: support DMA operations") Cc: sta...@dpdk.org Signed-off-by: Gagandeep Singh --- drivers/dma/dpaa/dpaa_qdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/dpaa/dpaa_qdma.c b/drivers/dma/dpaa/dpaa_qdma.c index 24ad7ad019..0a91cf040a 100644 --- a/drivers/dma/dpaa/dpaa_qdma.c +++ b/drivers/dma/dpaa/dpaa_qdma.c @@ -615,7 +615,7 @@ fsl_qdma_enqueue_desc(struct fsl_qdma_chan *fsl_chan, list_add_tail(&fsl_comp->list, &fsl_queue->comp_used); - if (flags == RTE_DMA_OP_FLAG_SUBMIT) { + if (flags & RTE_DMA_OP_FLAG_SUBMIT) { reg = qdma_readl_be(block + FSL_QDMA_BCQMR(fsl_queue->id)); reg |= FSL_QDMA_BCQMR_EI_BE; qdma_writel_be(reg, block + FSL_QDMA_BCQMR(fsl_queue->id)); -- 2.25.1
[v2 15/30] dma/dpaa: add burst capacity API
From: Vanshika Shukla This patch improves the dpaa qdma driver and adds dpaa_qdma_burst_capacity API which returns the remaining space in the descriptor ring. Signed-off-by: Vanshika Shukla --- drivers/dma/dpaa/dpaa_qdma.c | 34 +- drivers/dma/dpaa/dpaa_qdma.h | 3 +-- 2 files changed, 26 insertions(+), 11 deletions(-) diff --git a/drivers/dma/dpaa/dpaa_qdma.c b/drivers/dma/dpaa/dpaa_qdma.c index 0a91cf040a..bb6b54e583 100644 --- a/drivers/dma/dpaa/dpaa_qdma.c +++ b/drivers/dma/dpaa/dpaa_qdma.c @@ -423,7 +423,6 @@ fsl_qdma_halt(struct fsl_qdma_engine *fsl_qdma) static int fsl_qdma_queue_transfer_complete(struct fsl_qdma_engine *fsl_qdma, void *block, int id, const uint16_t nb_cpls, -uint16_t *last_idx, enum rte_dma_status_code *status) { struct fsl_qdma_queue *fsl_queue = fsl_qdma->queue; @@ -457,7 +456,6 @@ fsl_qdma_queue_transfer_complete(struct fsl_qdma_engine *fsl_qdma, if (fsl_status->virt_head == fsl_status->cq + fsl_status->n_cq) fsl_status->virt_head = fsl_status->cq; qdma_writel_be(reg, block + FSL_QDMA_BSQMR); - *last_idx = fsl_comp->index; if (status != NULL) status[count] = RTE_DMA_STATUS_SUCCESSFUL; @@ -607,7 +605,6 @@ fsl_qdma_enqueue_desc(struct fsl_qdma_chan *fsl_chan, qdma_desc_addr_set64(ccdf, fsl_comp->bus_addr + 16); qdma_ccdf_set_format(ccdf, qdma_ccdf_get_offset(fsl_comp->virt_addr)); qdma_ccdf_set_ser(ccdf, qdma_ccdf_get_status(fsl_comp->virt_addr)); - fsl_comp->index = fsl_queue->virt_head - fsl_queue->cq; fsl_queue->virt_head++; if (fsl_queue->virt_head == fsl_queue->cq + fsl_queue->n_cq) @@ -623,7 +620,7 @@ fsl_qdma_enqueue_desc(struct fsl_qdma_chan *fsl_chan, } else { fsl_queue->pending++; } - return fsl_comp->index; + return 0; } static int @@ -771,8 +768,10 @@ dpaa_qdma_enqueue(void *dev_private, uint16_t vchan, struct fsl_qdma_engine *fsl_qdma = (struct fsl_qdma_engine *)dev_private; struct fsl_qdma_chan *fsl_chan = &fsl_qdma->chans[fsl_qdma->vchan_map[vchan]]; - int ret; + struct fsl_qdma_queue *fsl_queue = fsl_chan->queue; + int ret, idx; + idx = (uint16_t)(fsl_queue->stats.submitted + fsl_queue->pending); void *fsl_comp = NULL; fsl_comp = fsl_qdma_prep_memcpy(fsl_chan, @@ -783,8 +782,10 @@ dpaa_qdma_enqueue(void *dev_private, uint16_t vchan, return -1; } ret = fsl_qdma_enqueue_desc(fsl_chan, fsl_comp, flags); + if (ret < 0) + return ret; - return ret; + return idx; } static uint16_t @@ -826,8 +827,10 @@ dpaa_qdma_dequeue_status(void *dev_private, uint16_t vchan, FSL_QDMA_BLOCK_BASE_OFFSET(fsl_qdma, id); intr = fsl_qdma_queue_transfer_complete(fsl_qdma, block, id, nb_cpls, - last_idx, st); + st); fsl_queue->stats.completed += intr; + if (last_idx != NULL) + *last_idx = (uint16_t)(fsl_queue->stats.completed - 1); return intr; } @@ -873,9 +876,10 @@ dpaa_qdma_dequeue(void *dev_private, FSL_QDMA_BLOCK_BASE_OFFSET(fsl_qdma, id); intr = fsl_qdma_queue_transfer_complete(fsl_qdma, block, id, nb_cpls, - last_idx, NULL); + NULL); fsl_queue->stats.completed += intr; - + if (last_idx != NULL) + *last_idx = (uint16_t)(fsl_queue->stats.completed - 1); return intr; } @@ -912,6 +916,17 @@ dpaa_qdma_stats_reset(struct rte_dma_dev *dmadev, uint16_t vchan) return 0; } +static uint16_t +dpaa_qdma_burst_capacity(const void *dev_private, uint16_t vchan) +{ + const struct fsl_qdma_engine *fsl_qdma = dev_private; + struct fsl_qdma_chan *fsl_chan = + &fsl_qdma->chans[fsl_qdma->vchan_map[vchan]]; + struct fsl_qdma_queue *fsl_queue = fsl_chan->queue; + + return fsl_queue->n_cq - fsl_queue->pending; +} + static struct rte_dma_dev_ops dpaa_qdma_ops = { .dev_info_get = dpaa_info_get, .dev_configure= dpaa_qdma_configure, @@ -1035,6 +1050,7 @@ dpaa_qdma_probe(__rte_unused struct rte_dpaa_driver *dpaa_drv, dmadev->fp_obj->submit = dpaa_qdma_submit; dmadev->fp_obj->completed = dpaa_qdma_dequeue; dmadev->fp_obj->completed_status = dpaa_qdma_dequeue_status; + dmadev->fp_obj->burst_capacity = dpaa_qdma_burst_capacity; /* Invoke PMD device initialization function */ ret = dpaa_qdma_init(dmadev); diff --git a/drivers/dma/dpaa/dpaa_qdma.h b/drivers/dma/dpaa/dpaa_qdma.h ind
[v2 16/30] dma/dpaa: add workaround for ERR050757
ERR050757 on LS104x indicates: For outbound PCIe read transactions, a completion buffer is used to store the PCIe completions till the data is passed back to the initiator. At most 16 outstanding transactions are allowed and maximum read request is 256 bytes. The completion buffer size inside the controller needs to be at least 4KB, but the PCIe controller has 3 KB of buffer. In case the size of pending outbound read transactions of more than 3KB, the PCIe controller may drop the incoming completions without notifying the initiator of the transaction, leaving transactions unfinished. All subsequent outbound reads to PCIe are blocked permanently. To avoid qDMA hang as it keeps waiting for data that was silently dropped, set stride mode for qDMA Signed-off-by: Gagandeep Singh --- config/arm/meson.build | 3 ++- doc/guides/dmadevs/dpaa.rst | 2 ++ drivers/dma/dpaa/dpaa_qdma.c | 18 ++ drivers/dma/dpaa/dpaa_qdma.h | 5 + 4 files changed, 27 insertions(+), 1 deletion(-) diff --git a/config/arm/meson.build b/config/arm/meson.build index 012935d5d7..f81e466318 100644 --- a/config/arm/meson.build +++ b/config/arm/meson.build @@ -468,7 +468,8 @@ soc_dpaa = { ['RTE_MACHINE', '"dpaa"'], ['RTE_LIBRTE_DPAA2_USE_PHYS_IOVA', false], ['RTE_MAX_LCORE', 16], -['RTE_MAX_NUMA_NODES', 1] +['RTE_MAX_NUMA_NODES', 1], + ['RTE_DMA_DPAA_ERRATA_ERR050757', true] ], 'numa': false } diff --git a/doc/guides/dmadevs/dpaa.rst b/doc/guides/dmadevs/dpaa.rst index f99bfc6087..746919ec6b 100644 --- a/doc/guides/dmadevs/dpaa.rst +++ b/doc/guides/dmadevs/dpaa.rst @@ -42,6 +42,8 @@ Compilation For builds using ``meson`` and ``ninja``, the driver will be built when the target platform is dpaa-based. No additional compilation steps are necessary. +- ``RTE_DMA_DPAA_ERRATA_ERR050757`` - enable software workaround for Errata-A050757 + Initialization -- diff --git a/drivers/dma/dpaa/dpaa_qdma.c b/drivers/dma/dpaa/dpaa_qdma.c index bb6b54e583..a21279293c 100644 --- a/drivers/dma/dpaa/dpaa_qdma.c +++ b/drivers/dma/dpaa/dpaa_qdma.c @@ -159,6 +159,10 @@ fsl_qdma_comp_fill_memcpy(struct fsl_qdma_comp *fsl_comp, dma_addr_t dst, dma_addr_t src, u32 len) { struct fsl_qdma_format *csgf_src, *csgf_dest; +#ifdef RTE_DMA_DPAA_ERRATA_ERR050757 + struct fsl_qdma_sdf *sdf; + u32 cfg = 0; +#endif /* Note: command table (fsl_comp->virt_addr) is getting filled * directly in cmd descriptors of queues while enqueuing the descriptor @@ -171,6 +175,20 @@ fsl_qdma_comp_fill_memcpy(struct fsl_qdma_comp *fsl_comp, csgf_src = (struct fsl_qdma_format *)fsl_comp->virt_addr + 2; csgf_dest = (struct fsl_qdma_format *)fsl_comp->virt_addr + 3; +#ifdef RTE_DMA_DPAA_ERRATA_ERR050757 + sdf = (struct fsl_qdma_sdf *)fsl_comp->desc_virt_addr; + sdf->cmd = rte_cpu_to_le_32(FSL_QDMA_CMD_RWTTYPE << + FSL_QDMA_CMD_RWTTYPE_OFFSET); + if (len > FSL_QDMA_CMD_SSS_DISTANCE) { + sdf->cmd |= rte_cpu_to_le_32(FSL_QDMA_CMD_SSEN); + cfg |= rte_cpu_to_le_32(FSL_QDMA_CMD_SSS_STRIDE << + FSL_QDMA_CFG_SSS_OFFSET | + FSL_QDMA_CMD_SSS_DISTANCE); + sdf->cfg = cfg; + } else + sdf->cfg = 0; +#endif + /* Status notification is enqueued to status queue. */ qdma_desc_addr_set64(csgf_src, src); qdma_csgf_set_len(csgf_src, len); diff --git a/drivers/dma/dpaa/dpaa_qdma.h b/drivers/dma/dpaa/dpaa_qdma.h index 2092fb39f5..361f88856b 100644 --- a/drivers/dma/dpaa/dpaa_qdma.h +++ b/drivers/dma/dpaa/dpaa_qdma.h @@ -81,6 +81,11 @@ #define FSL_QDMA_CMD_RWTTYPE_OFFSET28 #define FSL_QDMA_CMD_LWC_OFFSET16 +#define FSL_QDMA_CMD_SSEN BIT(19) +#define FSL_QDMA_CFG_SSS_OFFSET12 +#define FSL_QDMA_CMD_SSS_STRIDE128 +#define FSL_QDMA_CMD_SSS_DISTANCE 128 + #define QDMA_CCDF_STATUS 20 #define QDMA_CCDF_OFFSET 20 #define QDMA_CCDF_MASK GENMASK(28, 20) -- 2.25.1
[v2 21/30] dma/dpaa: dequeue status queue
From: Jun Yang To support multiple command queues of each block, status queue need support to notify which command queue of block is completed. The multiple command queues are balanced to blocks in setup. If multiple command queues are enabled in one block, de-queue status is performed instead of checking completion. DQ operation is not performed in silent mode. Signed-off-by: Jun Yang --- drivers/dma/dpaa/dpaa_qdma.c | 351 +-- drivers/dma/dpaa/dpaa_qdma.h | 38 +++- 2 files changed, 242 insertions(+), 147 deletions(-) diff --git a/drivers/dma/dpaa/dpaa_qdma.c b/drivers/dma/dpaa/dpaa_qdma.c index dc17aa4520..825dead5cf 100644 --- a/drivers/dma/dpaa/dpaa_qdma.c +++ b/drivers/dma/dpaa/dpaa_qdma.c @@ -34,6 +34,30 @@ qdma_ccdf_set_ser(struct fsl_qdma_format *ccdf, int status) ccdf->status = rte_cpu_to_le_32(QDMA_CCDF_SER | status); } +static inline void +qdma_ccdf_set_queue(struct fsl_qdma_format *ccdf, + uint8_t queue_idx) +{ + ccdf->queue = queue_idx; +} + +static inline int +qdma_ccdf_get_queue(struct fsl_qdma_format *ccdf, + uint8_t *queue_idx) +{ + uint64_t addr = ((uint64_t)ccdf->addr_hi) << 32 | ccdf->addr_lo; + + if (addr && queue_idx) + *queue_idx = ccdf->queue; + if (addr) { + ccdf->addr_hi = 0; + ccdf->addr_lo = 0; + return true; + } + + return false; +} + static inline void qdma_csgf_set_len(struct fsl_qdma_format *csgf, int len) { @@ -110,7 +134,8 @@ dma_pool_alloc(int size, int aligned, dma_addr_t *phy_addr) if (!virt_addr) return NULL; - *phy_addr = rte_mem_virt2iova(virt_addr); + if (phy_addr) + *phy_addr = rte_mem_virt2iova(virt_addr); return virt_addr; } @@ -121,6 +146,7 @@ dma_pool_alloc(int size, int aligned, dma_addr_t *phy_addr) static int fsl_qdma_pre_comp_sd_desc(struct fsl_qdma_queue *queue) { + struct fsl_qdma_engine *fsl_qdma = queue->engine; struct fsl_qdma_sdf *sdf; struct fsl_qdma_ddf *ddf; struct fsl_qdma_format *ccdf; @@ -175,7 +201,9 @@ fsl_qdma_pre_comp_sd_desc(struct fsl_qdma_queue *queue) ccdf = queue->cmd_desc; qdma_desc_addr_set64(ccdf, phy_ft); qdma_ccdf_set_format(ccdf, 0); - qdma_ccdf_set_ser(ccdf, 0); + if (!fsl_qdma->is_silent) + qdma_ccdf_set_ser(ccdf, 0); + qdma_ccdf_set_queue(ccdf, queue->queue_id); queue->cmd_desc++; } queue->cmd_desc = head; @@ -192,105 +220,91 @@ fsl_qdma_pre_comp_sd_desc(struct fsl_qdma_queue *queue) return -ENOMEM; } -static struct fsl_qdma_queue * +static int fsl_qdma_alloc_queue_resources(struct fsl_qdma_engine *fsl_qdma, int queue_id, int block_id) { - struct fsl_qdma_queue *queue_temp; + struct fsl_qdma_queue *cmd_queue; uint32_t queue_size; - queue_temp = rte_zmalloc("qdma: queue head", - sizeof(struct fsl_qdma_queue), 0); - if (!queue_temp) { - DPAA_QDMA_ERR("no memory to allocate queues\n"); - return NULL; - } + cmd_queue = &fsl_qdma->cmd_queues[block_id][queue_id]; + cmd_queue->engine = fsl_qdma; + queue_size = sizeof(struct fsl_qdma_format) * QDMA_QUEUE_SIZE; - queue_temp->cq = dma_pool_alloc(queue_size, - queue_size, &queue_temp->bus_addr); - if (!queue_temp->cq) { - rte_free(queue_temp); - return NULL; - } + cmd_queue->cq = dma_pool_alloc(queue_size, + queue_size, &cmd_queue->bus_addr); + if (!cmd_queue->cq) + return -ENOMEM; - memset(queue_temp->cq, 0x0, queue_size); + memset(cmd_queue->cq, 0x0, queue_size); - queue_temp->block_vir = fsl_qdma->block_base + + cmd_queue->block_vir = fsl_qdma->block_base + FSL_QDMA_BLOCK_BASE_OFFSET(fsl_qdma, block_id); - queue_temp->n_cq = QDMA_QUEUE_SIZE; - queue_temp->queue_id = queue_id; - queue_temp->pending = 0; - queue_temp->cmd_desc = queue_temp->cq; - queue_temp->ft = rte_malloc("Compound Frame Table", + cmd_queue->n_cq = QDMA_QUEUE_SIZE; + cmd_queue->queue_id = queue_id; + cmd_queue->block_id = block_id; + cmd_queue->pending = 0; + cmd_queue->cmd_desc = cmd_queue->cq; + cmd_queue->ft = rte_malloc("Compound Frame Table", sizeof(void *) * QDMA_QUEUE_SIZE, 0); - if (!queue_temp->ft) { - rte_free(queue_temp->cq); - rte_free(queue_temp); - return NULL; + if (!cmd_queue->ft) { + rte_free(cmd_queue->cq); + return -ENOMEM; } - queue_temp->df = rte_malloc("Descriptor Buffer", + cmd_queue->df = rte_malloc("Descriptor Buffer", sizeof(
[v2 22/30] dma/dpaa: add Scatter Gather support
From: Jun Yang Perform SG operation by copy_sg callback of DMA lib or burst request from application. Perform Simple operation if burst number is 1. Signed-off-by: Jun Yang --- drivers/dma/dpaa/dpaa_qdma.c | 856 ++- drivers/dma/dpaa/dpaa_qdma.h | 184 +--- 2 files changed, 763 insertions(+), 277 deletions(-) diff --git a/drivers/dma/dpaa/dpaa_qdma.c b/drivers/dma/dpaa/dpaa_qdma.c index 825dead5cf..f1ad60d1f2 100644 --- a/drivers/dma/dpaa/dpaa_qdma.c +++ b/drivers/dma/dpaa/dpaa_qdma.c @@ -4,45 +4,31 @@ #include #include +#include #include "dpaa_qdma.h" #include "dpaa_qdma_logs.h" +static int s_data_validation; +static int s_hw_err_check; +static int s_sg_disable; + static inline void -qdma_desc_addr_set64(struct fsl_qdma_format *ccdf, u64 addr) +qdma_desc_addr_set64(struct fsl_qdma_comp_cmd_desc *ccdf, u64 addr) { ccdf->addr_hi = upper_32_bits(addr); ccdf->addr_lo = rte_cpu_to_le_32(lower_32_bits(addr)); } static inline void -qdma_ccdf_set_format(struct fsl_qdma_format *ccdf, int offset) -{ - ccdf->cfg = rte_cpu_to_le_32(QDMA_CCDF_FOTMAT | offset); -} - -static inline int -qdma_ccdf_get_status(const struct fsl_qdma_format *ccdf) -{ - return (rte_le_to_cpu_32(ccdf->status) & QDMA_CCDF_MASK) - >> QDMA_CCDF_STATUS; -} - -static inline void -qdma_ccdf_set_ser(struct fsl_qdma_format *ccdf, int status) +qdma_desc_sge_addr_set64(struct fsl_qdma_comp_sg_desc *sge, u64 addr) { - ccdf->status = rte_cpu_to_le_32(QDMA_CCDF_SER | status); -} - -static inline void -qdma_ccdf_set_queue(struct fsl_qdma_format *ccdf, - uint8_t queue_idx) -{ - ccdf->queue = queue_idx; + sge->addr_hi = upper_32_bits(addr); + sge->addr_lo = rte_cpu_to_le_32(lower_32_bits(addr)); } static inline int -qdma_ccdf_get_queue(struct fsl_qdma_format *ccdf, +qdma_ccdf_get_queue(struct fsl_qdma_comp_cmd_desc *ccdf, uint8_t *queue_idx) { uint64_t addr = ((uint64_t)ccdf->addr_hi) << 32 | ccdf->addr_lo; @@ -58,18 +44,6 @@ qdma_ccdf_get_queue(struct fsl_qdma_format *ccdf, return false; } -static inline void -qdma_csgf_set_len(struct fsl_qdma_format *csgf, int len) -{ - csgf->cfg = rte_cpu_to_le_32(len & QDMA_SG_LEN_MASK); -} - -static inline void -qdma_csgf_set_f(struct fsl_qdma_format *csgf, int len) -{ - csgf->cfg = rte_cpu_to_le_32(QDMA_SG_FIN | (len & QDMA_SG_LEN_MASK)); -} - static inline int ilog2(int x) { int log = 0; @@ -126,11 +100,11 @@ qdma_writel_be(uint32_t val, void *addr) } static void * -dma_pool_alloc(int size, int aligned, dma_addr_t *phy_addr) +dma_pool_alloc(char *nm, int size, int aligned, dma_addr_t *phy_addr) { void *virt_addr; - virt_addr = rte_malloc("dma pool alloc", size, aligned); + virt_addr = rte_zmalloc(nm, size, aligned); if (!virt_addr) return NULL; @@ -149,28 +123,46 @@ fsl_qdma_pre_comp_sd_desc(struct fsl_qdma_queue *queue) struct fsl_qdma_engine *fsl_qdma = queue->engine; struct fsl_qdma_sdf *sdf; struct fsl_qdma_ddf *ddf; - struct fsl_qdma_format *ccdf; + struct fsl_qdma_comp_cmd_desc *ccdf; uint16_t i, j; - struct fsl_qdma_format *head; struct fsl_qdma_cmpd_ft *ft; struct fsl_qdma_df *df; - head = queue->cmd_desc; - for (i = 0; i < queue->n_cq; i++) { dma_addr_t phy_ft = 0, phy_df = 0; - queue->ft[i] = - dma_pool_alloc(sizeof(struct fsl_qdma_cmpd_ft), - RTE_CACHE_LINE_SIZE, &phy_ft); + queue->ft[i] = dma_pool_alloc(NULL, + sizeof(struct fsl_qdma_cmpd_ft), + RTE_CACHE_LINE_SIZE, &phy_ft); if (!queue->ft[i]) goto fail; - - queue->df[i] = - dma_pool_alloc(sizeof(struct fsl_qdma_df), - RTE_CACHE_LINE_SIZE, &phy_df); + if (((uint64_t)queue->ft[i]) & + (RTE_CACHE_LINE_SIZE - 1)) { + DPAA_QDMA_ERR("FD[%d] addr(%p) not cache aligned", + i, queue->ft[i]); + rte_free(queue->ft[i]); + queue->ft[i] = NULL; + goto fail; + } + if (((uint64_t)(&queue->ft[i]->desc_ssge[0])) & + (RTE_CACHE_LINE_SIZE - 1)) { + DPAA_QDMA_ERR("FD[%d] SGE addr(%p) not cache aligned", + i, &queue->ft[i]->desc_ssge[0]); + rte_free(queue->ft[i]); + queue->ft[i] = NULL; + goto fail; + } + queue->ft[i]->phy_ssge = phy_ft + + offsetof(struct fsl_qdma_cmpd_ft, desc_ssge); + queue->ft[i]->phy_dsge = phy_ft + +
[v2 24/30] dma/dpaa: improve congestion handling
From: Jun Yang The congestion issue occurs frequently on low speed device(PCIe). We should drain the command queue to make dma work when congestion occurs. Signed-off-by: Jun Yang --- drivers/dma/dpaa/dpaa_qdma.c | 157 +++ 1 file changed, 85 insertions(+), 72 deletions(-) diff --git a/drivers/dma/dpaa/dpaa_qdma.c b/drivers/dma/dpaa/dpaa_qdma.c index de5ecc7d0b..eaa5f81f6d 100644 --- a/drivers/dma/dpaa/dpaa_qdma.c +++ b/drivers/dma/dpaa/dpaa_qdma.c @@ -535,73 +535,6 @@ fsl_qdma_reg_init(struct fsl_qdma_engine *fsl_qdma) return 0; } -static int -fsl_qdma_enqueue_desc_to_ring(struct fsl_qdma_queue *fsl_queue, - int is_burst) -{ - uint16_t i, num = fsl_queue->pending_num, idx, start; - int ret; - - num = is_burst ? fsl_queue->pending_num : 1; - - fsl_queue->desc_in_hw[fsl_queue->ci] = num; - ret = rte_ring_enqueue(fsl_queue->complete_burst, - &fsl_queue->desc_in_hw[fsl_queue->ci]); - if (ret) { - DPAA_QDMA_ERR("%s: Queue is full, try dequeue first", - __func__); - DPAA_QDMA_ERR("%s: submitted:%"PRIu64", completed:%"PRIu64"", - __func__, fsl_queue->stats.submitted, - fsl_queue->stats.completed); - return ret; - } - start = fsl_queue->pending_start; - for (i = 0; i < num; i++) { - idx = (start + i) & (fsl_queue->pending_max - 1); - ret = rte_ring_enqueue(fsl_queue->complete_desc, - &fsl_queue->pending_desc[idx]); - if (ret) { - DPAA_QDMA_ERR("Descriptors eq failed!\r\n"); - return ret; - } - } - - return 0; -} - -static int -fsl_qdma_enqueue_desc_single(struct fsl_qdma_queue *fsl_queue, - dma_addr_t dst, dma_addr_t src, size_t len) -{ - uint8_t *block = fsl_queue->block_vir; - struct fsl_qdma_comp_sg_desc *csgf_src, *csgf_dest; - struct fsl_qdma_cmpd_ft *ft; - int ret; - - ft = fsl_queue->ft[fsl_queue->ci]; - csgf_src = &ft->desc_sbuf; - csgf_dest = &ft->desc_dbuf; - qdma_desc_sge_addr_set64(csgf_src, src); - csgf_src->length = len; - csgf_src->extion = 0; - qdma_desc_sge_addr_set64(csgf_dest, dst); - csgf_dest->length = len; - csgf_dest->extion = 0; - /* This entry is the last entry. */ - csgf_dest->final = 1; - - ret = fsl_qdma_enqueue_desc_to_ring(fsl_queue, 0); - if (ret) - return ret; - fsl_queue->ci = (fsl_queue->ci + 1) & (fsl_queue->n_cq - 1); - - qdma_writel(fsl_queue->le_cqmr | FSL_QDMA_BCQMR_EI, - block + FSL_QDMA_BCQMR(fsl_queue->queue_id)); - fsl_queue->stats.submitted++; - - return 0; -} - static uint16_t dpaa_qdma_block_dequeue(struct fsl_qdma_engine *fsl_qdma, uint8_t block_id) @@ -633,7 +566,6 @@ dpaa_qdma_block_dequeue(struct fsl_qdma_engine *fsl_qdma, ret = qdma_ccdf_get_queue(&cq[start], &qid); if (ret == true) { cmd_queue = &fsl_qdma->cmd_queues[block_id][qid]; - cmd_queue->stats.completed++; ret = rte_ring_dequeue(cmd_queue->complete_burst, (void **)&dq_complete); @@ -677,6 +609,87 @@ dpaa_qdma_block_dequeue(struct fsl_qdma_engine *fsl_qdma, return count; } +static int +fsl_qdma_enqueue_desc_to_ring(struct fsl_qdma_queue *fsl_queue, + int is_burst) +{ + uint16_t i, num = fsl_queue->pending_num, idx, start, dq; + int ret, dq_cnt; + + num = is_burst ? fsl_queue->pending_num : 1; + + fsl_queue->desc_in_hw[fsl_queue->ci] = num; +eq_again: + ret = rte_ring_enqueue(fsl_queue->complete_burst, + &fsl_queue->desc_in_hw[fsl_queue->ci]); + if (ret) { + DPAA_QDMA_DP_DEBUG("%s: Queue is full, try dequeue first", + __func__); + DPAA_QDMA_DP_DEBUG("%s: submitted:%"PRIu64", completed:%"PRIu64"", + __func__, fsl_queue->stats.submitted, + fsl_queue->stats.completed); + dq_cnt = 0; +dq_again: + dq = dpaa_qdma_block_dequeue(fsl_queue->engine, + fsl_queue->block_id); + dq_cnt++; + if (dq > 0) { + goto eq_again; + } else { + if (dq_cnt < 100) + goto dq_again; + DPAA_QDMA_ERR("%s: Dq block%d failed!", + __func__, fsl_queue->block_id); + } + return ret; + } + start = fsl_queue->pending_start; + for (i = 0; i < num; i++) { + idx = (start + i) & (fsl_queue->pending_max - 1); + ret = rte_r
[v2 23/30] dma/dpaa: block dequeue
From: Jun Yang Perform block dequeue to identify which queue of this block is completed. Signed-off-by: Jun Yang --- drivers/dma/dpaa/dpaa_qdma.c | 252 --- 1 file changed, 116 insertions(+), 136 deletions(-) diff --git a/drivers/dma/dpaa/dpaa_qdma.c b/drivers/dma/dpaa/dpaa_qdma.c index f1ad60d1f2..de5ecc7d0b 100644 --- a/drivers/dma/dpaa/dpaa_qdma.c +++ b/drivers/dma/dpaa/dpaa_qdma.c @@ -442,86 +442,6 @@ fsl_qdma_data_validation(struct fsl_qdma_desc *desc[], } } -static int -fsl_qdma_queue_drain(struct fsl_qdma_queue *fsl_queue) -{ - uint32_t reg; - int count = 0, ret; - uint8_t *block = fsl_queue->block_vir; - uint16_t *dq_complete = NULL, drain_num = 0; - struct fsl_qdma_desc *desc[FSL_QDMA_SG_MAX_ENTRY]; - - while (1) { - if (rte_ring_free_count(fsl_queue->complete_pool) < - (FSL_QDMA_SG_MAX_ENTRY * 2)) - break; - reg = qdma_readl_be(block + FSL_QDMA_BSQSR); - if (reg & FSL_QDMA_BSQSR_QE_BE) - break; - - qdma_writel_be(FSL_QDMA_BSQMR_DI, block + FSL_QDMA_BSQMR); - ret = rte_ring_dequeue(fsl_queue->complete_burst, - (void **)&dq_complete); - if (ret) - DPAA_QDMA_ERR("DQ desc number failed!\n"); - - ret = rte_ring_dequeue_bulk(fsl_queue->complete_desc, - (void **)desc, *dq_complete, NULL); - if (ret != (*dq_complete)) { - DPAA_QDMA_ERR("DQ %d descs failed!(%d)\n", - *dq_complete, ret); - } - - fsl_qdma_data_validation(desc, *dq_complete, fsl_queue); - - ret = rte_ring_enqueue_bulk(fsl_queue->complete_pool, - (void **)desc, (*dq_complete), NULL); - if (ret != (*dq_complete)) { - DPAA_QDMA_ERR("EQ %d descs to return queue failed!(%d)\n", - *dq_complete, ret); - } - - drain_num += *dq_complete; - fsl_queue->complete_start = - (fsl_queue->complete_start + (*dq_complete)) & - (fsl_queue->pending_max - 1); - fsl_queue->stats.completed++; - - count++; - } - - return drain_num; -} - -static int -fsl_qdma_queue_transfer_complete(struct fsl_qdma_queue *fsl_queue, - const uint16_t nb_cpls, uint16_t *last_idx, - enum rte_dma_status_code *status) -{ - int ret; - uint16_t dq_num = 0, i; - struct fsl_qdma_desc *desc_complete[nb_cpls]; - - ret = fsl_qdma_queue_drain(fsl_queue); - if (ret < 0) { - DPAA_QDMA_ERR("Drain TX%d/Q%d failed!(%d)", - fsl_queue->block_id, fsl_queue->queue_id, - ret); - } - - dq_num = rte_ring_dequeue_burst(fsl_queue->complete_pool, - (void **)desc_complete, nb_cpls, NULL); - for (i = 0; i < dq_num; i++) - last_idx[i] = desc_complete[i]->flag; - - if (status) { - for (i = 0; i < dq_num; i++) - status[i] = RTE_DMA_STATUS_SUCCESSFUL; - } - - return dq_num; -} - static int fsl_qdma_reg_init(struct fsl_qdma_engine *fsl_qdma) { @@ -682,13 +602,90 @@ fsl_qdma_enqueue_desc_single(struct fsl_qdma_queue *fsl_queue, return 0; } +static uint16_t +dpaa_qdma_block_dequeue(struct fsl_qdma_engine *fsl_qdma, + uint8_t block_id) +{ + struct fsl_qdma_status_queue *stat_queue; + struct fsl_qdma_queue *cmd_queue; + struct fsl_qdma_comp_cmd_desc *cq; + uint16_t start, count = 0; + uint8_t qid = 0; + uint32_t reg; + int ret; + uint8_t *block; + uint16_t *dq_complete = NULL; + struct fsl_qdma_desc *desc[FSL_QDMA_SG_MAX_ENTRY]; + + stat_queue = &fsl_qdma->stat_queues[block_id]; + cq = stat_queue->cq; + start = stat_queue->complete; + + block = fsl_qdma->block_base + + FSL_QDMA_BLOCK_BASE_OFFSET(fsl_qdma, block_id); + + do { + reg = qdma_readl_be(block + FSL_QDMA_BSQSR); + if (reg & FSL_QDMA_BSQSR_QE_BE) + break; + + qdma_writel_be(FSL_QDMA_BSQMR_DI, block + FSL_QDMA_BSQMR); + ret = qdma_ccdf_get_queue(&cq[start], &qid); + if (ret == true) { + cmd_queue = &fsl_qdma->cmd_queues[block_id][qid]; + cmd_queue->stats.completed++; + + ret = rte_ring_dequeue(cmd_queue->complete_burst, + (void **)&dq_complete); + if (ret) + DPAA_QDMA_ERR("DQ desc number failed!\n"); + + ret = rte_ring_dequeue_bulk(cmd_queue->complete_desc, +
[v2 25/30] dma/dpaa: disable SG descriptor as default
From: Jun Yang Burst operation used for SG copy as default until SG issue is fixed. Signed-off-by: Jun Yang --- drivers/dma/dpaa/dpaa_qdma.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/dma/dpaa/dpaa_qdma.c b/drivers/dma/dpaa/dpaa_qdma.c index eaa5f81f6d..8492d0de5b 100644 --- a/drivers/dma/dpaa/dpaa_qdma.c +++ b/drivers/dma/dpaa/dpaa_qdma.c @@ -11,7 +11,7 @@ static int s_data_validation; static int s_hw_err_check; -static int s_sg_disable; +static int s_sg_disable = 1; static inline void qdma_desc_addr_set64(struct fsl_qdma_comp_cmd_desc *ccdf, u64 addr) @@ -335,7 +335,6 @@ fsl_qdma_free_stq_res(struct fsl_qdma_status_queue *queue) rte_free(queue->cq); } - static int fsl_qdma_prep_status_queue(struct fsl_qdma_engine *fsl_qdma, uint32_t block_id) -- 2.25.1
[v2 26/30] dma/dpaa: improve ERRATA workaround solution
From: Jun Yang Fix issue of ERRATA 050757/050265 workaround which is not effective in burst mode. SDF/DDF is referred by first entry of compound frame table, move the DF to compound frame table description which is suitable to adapt single copy and SG/burst copy. Fix SG issue which was caused by memset clearing phy address of SGE in compound frame table. Signed-off-by: Jun Yang --- drivers/dma/dpaa/dpaa_qdma.c | 215 +-- drivers/dma/dpaa/dpaa_qdma.h | 7 +- 2 files changed, 107 insertions(+), 115 deletions(-) diff --git a/drivers/dma/dpaa/dpaa_qdma.c b/drivers/dma/dpaa/dpaa_qdma.c index 8492d0de5b..5d91ad2d70 100644 --- a/drivers/dma/dpaa/dpaa_qdma.c +++ b/drivers/dma/dpaa/dpaa_qdma.c @@ -11,7 +11,10 @@ static int s_data_validation; static int s_hw_err_check; -static int s_sg_disable = 1; +static int s_sg_enable = 1; +#ifdef RTE_DMA_DPAA_ERRATA_ERR050757 +static int s_pci_read = 1; +#endif static inline void qdma_desc_addr_set64(struct fsl_qdma_comp_cmd_desc *ccdf, u64 addr) @@ -126,10 +129,9 @@ fsl_qdma_pre_comp_sd_desc(struct fsl_qdma_queue *queue) struct fsl_qdma_comp_cmd_desc *ccdf; uint16_t i, j; struct fsl_qdma_cmpd_ft *ft; - struct fsl_qdma_df *df; for (i = 0; i < queue->n_cq; i++) { - dma_addr_t phy_ft = 0, phy_df = 0; + dma_addr_t phy_ft = 0; queue->ft[i] = dma_pool_alloc(NULL, sizeof(struct fsl_qdma_cmpd_ft), @@ -156,25 +158,14 @@ fsl_qdma_pre_comp_sd_desc(struct fsl_qdma_queue *queue) offsetof(struct fsl_qdma_cmpd_ft, desc_ssge); queue->ft[i]->phy_dsge = phy_ft + offsetof(struct fsl_qdma_cmpd_ft, desc_dsge); - - queue->df[i] = dma_pool_alloc(NULL, - sizeof(struct fsl_qdma_df), - RTE_CACHE_LINE_SIZE, &phy_df); - if (!queue->df[i]) { - rte_free(queue->ft[i]); - queue->ft[i] = NULL; - goto fail; - } - - memset(queue->ft[i], 0, sizeof(struct fsl_qdma_cmpd_ft)); - memset(queue->df[i], 0, sizeof(struct fsl_qdma_df)); + queue->ft[i]->phy_df = phy_ft + + offsetof(struct fsl_qdma_cmpd_ft, df); ft = queue->ft[i]; - df = queue->df[i]; - sdf = &df->sdf; - ddf = &df->ddf; + sdf = &ft->df.sdf; + ddf = &ft->df.ddf; /* Compound Command Descriptor(Frame List Table) */ - qdma_desc_sge_addr_set64(&ft->desc_buf, phy_df); + qdma_desc_sge_addr_set64(&ft->desc_buf, ft->phy_df); /* It must be 32 as Compound S/G Descriptor */ ft->desc_buf.length = sizeof(struct fsl_qdma_df); @@ -198,10 +189,8 @@ fsl_qdma_pre_comp_sd_desc(struct fsl_qdma_queue *queue) return 0; fail: - for (j = 0; j < i; j++) { + for (j = 0; j < i; j++) rte_free(queue->ft[j]); - rte_free(queue->df[j]); - } return -ENOMEM; } @@ -247,23 +236,12 @@ fsl_qdma_alloc_queue_resources(struct fsl_qdma_engine *fsl_qdma, rte_free(cmd_queue->cq); return -ENOMEM; } - sprintf(nm, "Descriptor Buf_%d_%d", - block_id, queue_id); - cmd_queue->df = rte_zmalloc(nm, - sizeof(void *) * QDMA_QUEUE_SIZE, 0); - if (!cmd_queue->df) { - DPAA_QDMA_ERR("%s zmalloc failed!", nm); - rte_free(cmd_queue->ft); - rte_free(cmd_queue->cq); - return -ENOMEM; - } sprintf(nm, "Pending_desc_%d_%d", block_id, queue_id); cmd_queue->pending_desc = rte_zmalloc(nm, sizeof(struct fsl_qdma_desc) * FSL_QDMA_MAX_DESC_NUM, 0); if (!cmd_queue->pending_desc) { DPAA_QDMA_ERR("%s zmalloc failed!", nm); - rte_free(cmd_queue->df); rte_free(cmd_queue->ft); rte_free(cmd_queue->cq); return -ENOMEM; @@ -278,7 +256,6 @@ fsl_qdma_alloc_queue_resources(struct fsl_qdma_engine *fsl_qdma, rte_free(cmd_queue->pending_desc); rte_free(cmd_queue->ft); rte_free(cmd_queue->cq); - rte_free(cmd_queue->df); return -ENOMEM; } sprintf(nm, "complete-desc_ring_%d_%d", @@ -292,7 +269,6 @@ fsl_qdma_alloc_queue_resources(struct fsl_qdma_engine *fsl_qdma, rte_free(cmd_queue->pending_desc); rte_free(cmd_queue->ft); rte_free(cmd_queue->cq); - rte_free(cmd_queue->df); return -ENOMEM; } sprintf(nm, "complete-pool-desc_ring_%d_%d", @@ -307,7 +283,6 @@ fsl_qdma_alloc_queue_resources(struct fsl_qdma_engine *f
[v2 27/30] dma/dpaa: improve silent mode support
From: Jun Yang Don't save eq context in silent mode, check HW status only to identify if queue is full. Signed-off-by: Jun Yang --- drivers/dma/dpaa/dpaa_qdma.c | 14 -- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/dma/dpaa/dpaa_qdma.c b/drivers/dma/dpaa/dpaa_qdma.c index 5d91ad2d70..8f5b6c6ea5 100644 --- a/drivers/dma/dpaa/dpaa_qdma.c +++ b/drivers/dma/dpaa/dpaa_qdma.c @@ -586,9 +586,13 @@ static int fsl_qdma_enqueue_desc_to_ring(struct fsl_qdma_queue *fsl_queue, int is_burst) { + struct fsl_qdma_engine *fsl_qdma = fsl_queue->engine; uint16_t i, num = fsl_queue->pending_num, idx, start, dq; int ret, dq_cnt; + if (fsl_qdma->is_silent) + return 0; + num = is_burst ? fsl_queue->pending_num : 1; fsl_queue->desc_in_hw[fsl_queue->ci] = num; @@ -697,7 +701,7 @@ fsl_qdma_enqueue_overflow(struct fsl_qdma_queue *fsl_queue) check_num = 0; overflow_check: - if (unlikely(s_hw_err_check)) { + if (fsl_qdma->is_silent || unlikely(s_hw_err_check)) { reg = qdma_readl_be(block + FSL_QDMA_BCQSR(fsl_queue->queue_id)); overflow = (reg & FSL_QDMA_BCQSR_QF_XOFF_BE) ? @@ -707,8 +711,14 @@ fsl_qdma_enqueue_overflow(struct fsl_qdma_queue *fsl_queue) QDMA_QUEUE_CR_WM) ? 1 : 0; } - if (likely(!overflow)) + if (likely(!overflow)) { return 0; + } else if (fsl_qdma->is_silent) { + check_num++; + if (check_num < 1000) + goto overflow_check; + return -ENOSPC; + } DPAA_QDMA_DP_DEBUG("TC%d/Q%d submitted(%"PRIu64")-completed(%"PRIu64") >= %d", fsl_queue->block_id, fsl_queue->queue_id, -- 2.25.1
[v2 28/30] dma/dpaa: support multiple SG copies
From: Jun Yang Split burst copies to multiple SG copies if burst number exceeds max number of SG entries. Signed-off-by: Jun Yang --- drivers/dma/dpaa/dpaa_qdma.c | 180 +++ drivers/dma/dpaa/dpaa_qdma.h | 2 +- 2 files changed, 98 insertions(+), 84 deletions(-) diff --git a/drivers/dma/dpaa/dpaa_qdma.c b/drivers/dma/dpaa/dpaa_qdma.c index 8f5b6c6ea5..383142fc75 100644 --- a/drivers/dma/dpaa/dpaa_qdma.c +++ b/drivers/dma/dpaa/dpaa_qdma.c @@ -584,17 +584,15 @@ dpaa_qdma_block_dequeue(struct fsl_qdma_engine *fsl_qdma, static int fsl_qdma_enqueue_desc_to_ring(struct fsl_qdma_queue *fsl_queue, - int is_burst) + uint16_t num) { struct fsl_qdma_engine *fsl_qdma = fsl_queue->engine; - uint16_t i, num = fsl_queue->pending_num, idx, start, dq; + uint16_t i, idx, start, dq; int ret, dq_cnt; if (fsl_qdma->is_silent) return 0; - num = is_burst ? fsl_queue->pending_num : 1; - fsl_queue->desc_in_hw[fsl_queue->ci] = num; eq_again: ret = rte_ring_enqueue(fsl_queue->complete_burst, @@ -634,6 +632,69 @@ fsl_qdma_enqueue_desc_to_ring(struct fsl_qdma_queue *fsl_queue, return 0; } +static int +fsl_qdma_enqueue_overflow(struct fsl_qdma_queue *fsl_queue) +{ + int overflow = 0; + uint32_t reg; + uint16_t blk_drain, check_num, drain_num; + uint8_t *block = fsl_queue->block_vir; + const struct rte_dma_stats *st = &fsl_queue->stats; + struct fsl_qdma_engine *fsl_qdma = fsl_queue->engine; + + check_num = 0; +overflow_check: + if (fsl_qdma->is_silent || unlikely(s_hw_err_check)) { + reg = qdma_readl_be(block + +FSL_QDMA_BCQSR(fsl_queue->queue_id)); + overflow = (reg & FSL_QDMA_BCQSR_QF_XOFF_BE) ? + 1 : 0; + } else { + overflow = (fsl_qdma_queue_bd_in_hw(fsl_queue) >= + QDMA_QUEUE_CR_WM) ? 1 : 0; + } + + if (likely(!overflow)) { + return 0; + } else if (fsl_qdma->is_silent) { + check_num++; + if (check_num >= 1) { + DPAA_QDMA_WARN("Waiting for HW complete in silent mode"); + check_num = 0; + } + goto overflow_check; + } + + DPAA_QDMA_DP_DEBUG("TC%d/Q%d submitted(%"PRIu64")-completed(%"PRIu64") >= %d", + fsl_queue->block_id, fsl_queue->queue_id, + st->submitted, st->completed, QDMA_QUEUE_CR_WM); + drain_num = 0; + +drain_again: + blk_drain = dpaa_qdma_block_dequeue(fsl_qdma, + fsl_queue->block_id); + if (!blk_drain) { + drain_num++; + if (drain_num >= 1) { + DPAA_QDMA_WARN("TC%d failed drain, Q%d's %"PRIu64" bd in HW.", + fsl_queue->block_id, fsl_queue->queue_id, + st->submitted - st->completed); + drain_num = 0; + } + goto drain_again; + } + check_num++; + if (check_num >= 1000) { + DPAA_QDMA_WARN("TC%d failed check, Q%d's %"PRIu64" bd in HW.", + fsl_queue->block_id, fsl_queue->queue_id, + st->submitted - st->completed); + check_num = 0; + } + goto overflow_check; + + return 0; +} + static int fsl_qdma_enqueue_desc_single(struct fsl_qdma_queue *fsl_queue, dma_addr_t dst, dma_addr_t src, size_t len) @@ -646,6 +707,10 @@ fsl_qdma_enqueue_desc_single(struct fsl_qdma_queue *fsl_queue, struct fsl_qdma_sdf *sdf; #endif + ret = fsl_qdma_enqueue_overflow(fsl_queue); + if (unlikely(ret)) + return ret; + ft = fsl_queue->ft[fsl_queue->ci]; #ifdef RTE_DMA_DPAA_ERRATA_ERR050757 @@ -677,7 +742,7 @@ fsl_qdma_enqueue_desc_single(struct fsl_qdma_queue *fsl_queue, /* This entry is the last entry. */ csgf_dest->final = 1; - ret = fsl_qdma_enqueue_desc_to_ring(fsl_queue, 0); + ret = fsl_qdma_enqueue_desc_to_ring(fsl_queue, 1); if (ret) return ret; fsl_queue->ci = (fsl_queue->ci + 1) & (fsl_queue->n_cq - 1); @@ -689,81 +754,30 @@ fsl_qdma_enqueue_desc_single(struct fsl_qdma_queue *fsl_queue, return 0; } -static int -fsl_qdma_enqueue_overflow(struct fsl_qdma_queue *fsl_queue) -{ - int overflow = 0; - uint32_t reg; - uint16_t blk_drain, check_num, drain_num; - uint8_t *block = fsl_queue->block_vir; - const struct rte_dma_stats *st = &fsl_queue->stats; - struct fsl_qdma_engine *fsl_qdma = fsl_queue->engine; - - check_num = 0; -overflow_check: - if (fsl_qdma->is_silent || unlikely(s_hw_err_check)) { - reg = qdma_readl_be(block + -FSL_QDMA_BCQSR(fsl_queue->queue_id))
[v2 29/30] dma/dpaa: support max SG entry size
From: Jun Yang SG transaction is not helpful for performance of large transaction size. Start single transaction for size > max SG entry size in SG copy. Default MAX SG entry size is 2000 bytes which is experiment data of mem to mem, user can change it according to experiment: export DPAA_QDMA_SG_MAX_ENTRY_SIZE=xxx Signed-off-by: Jun Yang --- drivers/dma/dpaa/dpaa_qdma.c | 41 1 file changed, 37 insertions(+), 4 deletions(-) diff --git a/drivers/dma/dpaa/dpaa_qdma.c b/drivers/dma/dpaa/dpaa_qdma.c index 383142fc75..e3f2777b40 100644 --- a/drivers/dma/dpaa/dpaa_qdma.c +++ b/drivers/dma/dpaa/dpaa_qdma.c @@ -12,6 +12,8 @@ static int s_data_validation; static int s_hw_err_check; static int s_sg_enable = 1; +static uint32_t s_sg_max_entry_sz = 2000; + #ifdef RTE_DMA_DPAA_ERRATA_ERR050757 static int s_pci_read = 1; #endif @@ -761,7 +763,7 @@ fsl_qdma_enqueue_desc_sg(struct fsl_qdma_queue *fsl_queue) struct fsl_qdma_comp_sg_desc *csgf_src, *csgf_dest; struct fsl_qdma_cmpd_ft *ft; uint32_t total_len; - uint16_t start, idx, num, i; + uint16_t start, idx, num, i, next_idx; int ret; #ifdef RTE_DMA_DPAA_ERRATA_ERR050757 struct fsl_qdma_sdf *sdf; @@ -770,13 +772,31 @@ fsl_qdma_enqueue_desc_sg(struct fsl_qdma_queue *fsl_queue) eq_sg: total_len = 0; start = fsl_queue->pending_start; + if (fsl_queue->pending_desc[start].len > s_sg_max_entry_sz || + fsl_queue->pending_num == 1) { + ret = fsl_qdma_enqueue_desc_single(fsl_queue, + fsl_queue->pending_desc[start].dst, + fsl_queue->pending_desc[start].src, + fsl_queue->pending_desc[start].len); + if (!ret) { + fsl_queue->pending_start = + (start + 1) & (fsl_queue->pending_max - 1); + fsl_queue->pending_num--; + } + if (fsl_queue->pending_num > 0) + goto eq_sg; + + return ret; + } + + ret = fsl_qdma_enqueue_overflow(fsl_queue); + if (unlikely(ret)) + return ret; + if (fsl_queue->pending_num > FSL_QDMA_SG_MAX_ENTRY) num = FSL_QDMA_SG_MAX_ENTRY; else num = fsl_queue->pending_num; - ret = fsl_qdma_enqueue_overflow(fsl_queue); - if (unlikely(ret)) - return ret; ft = fsl_queue->ft[fsl_queue->ci]; csgf_src = &ft->desc_sbuf; @@ -799,7 +819,16 @@ fsl_qdma_enqueue_desc_sg(struct fsl_qdma_queue *fsl_queue) ft->desc_dsge[i].length = fsl_queue->pending_desc[idx].len; ft->desc_dsge[i].final = 0; total_len += fsl_queue->pending_desc[idx].len; + if ((i + 1) != num) { + next_idx = (idx + 1) & (fsl_queue->pending_max - 1); + if (fsl_queue->pending_desc[next_idx].len > + s_sg_max_entry_sz) { + num = i + 1; + break; + } + } } + ft->desc_ssge[num - 1].final = 1; ft->desc_dsge[num - 1].final = 1; csgf_src->length = total_len; @@ -1297,6 +1326,10 @@ dpaa_qdma_init(struct rte_dma_dev *dmadev) if (penv) s_sg_enable = atoi(penv); + penv = getenv("DPAA_QDMA_SG_MAX_ENTRY_SIZE"); + if (penv) + s_sg_max_entry_sz = atoi(penv); + #ifdef RTE_DMA_DPAA_ERRATA_ERR050757 penv = getenv("DPAA_QDMA_PCI_READ"); if (penv) -- 2.25.1
[v2 30/30] bus/dpaa: add port bmi stats
From: Hemant Agrawal Add BMI statistics and fixing the existing extended statistics Signed-off-by: Hemant Agrawal Signed-off-by: Gagandeep Singh --- drivers/bus/dpaa/base/fman/fman_hw.c | 65 +++- drivers/bus/dpaa/include/fman.h | 4 +- drivers/bus/dpaa/include/fsl_fman.h | 12 + drivers/bus/dpaa/version.map | 4 ++ drivers/net/dpaa/dpaa_ethdev.c | 46 +--- drivers/net/dpaa/dpaa_ethdev.h | 12 + 6 files changed, 134 insertions(+), 9 deletions(-) diff --git a/drivers/bus/dpaa/base/fman/fman_hw.c b/drivers/bus/dpaa/base/fman/fman_hw.c index 24a99f7235..27b39a4975 100644 --- a/drivers/bus/dpaa/base/fman/fman_hw.c +++ b/drivers/bus/dpaa/base/fman/fman_hw.c @@ -244,8 +244,8 @@ fman_if_stats_get_all(struct fman_if *p, uint64_t *value, int n) uint64_t base_offset = offsetof(struct memac_regs, reoct_l); for (i = 0; i < n; i++) - value[i] = (((u64)in_be32((char *)regs + base_offset + 8 * i) | - (u64)in_be32((char *)regs + base_offset + + value[i] = ((u64)in_be32((char *)regs + base_offset + 8 * i) | + ((u64)in_be32((char *)regs + base_offset + 8 * i + 4)) << 32); } @@ -266,6 +266,67 @@ fman_if_stats_reset(struct fman_if *p) ; } +void +fman_if_bmi_stats_enable(struct fman_if *p) +{ + struct __fman_if *m = container_of(p, struct __fman_if, __if); + struct rx_bmi_regs *regs = (struct rx_bmi_regs *)m->bmi_map; + uint32_t tmp; + + tmp = in_be32(®s->fmbm_rstc); + + tmp |= FMAN_BMI_COUNTERS_EN; + + out_be32(®s->fmbm_rstc, tmp); +} + +void +fman_if_bmi_stats_disable(struct fman_if *p) +{ + struct __fman_if *m = container_of(p, struct __fman_if, __if); + struct rx_bmi_regs *regs = (struct rx_bmi_regs *)m->bmi_map; + uint32_t tmp; + + tmp = in_be32(®s->fmbm_rstc); + + tmp &= ~FMAN_BMI_COUNTERS_EN; + + out_be32(®s->fmbm_rstc, tmp); +} + +void +fman_if_bmi_stats_get_all(struct fman_if *p, uint64_t *value) +{ + struct __fman_if *m = container_of(p, struct __fman_if, __if); + struct rx_bmi_regs *regs = (struct rx_bmi_regs *)m->bmi_map; + int i = 0; + + value[i++] = (u32)in_be32(®s->fmbm_rfrc); + value[i++] = (u32)in_be32(®s->fmbm_rfbc); + value[i++] = (u32)in_be32(®s->fmbm_rlfc); + value[i++] = (u32)in_be32(®s->fmbm_rffc); + value[i++] = (u32)in_be32(®s->fmbm_rfdc); + value[i++] = (u32)in_be32(®s->fmbm_rfldec); + value[i++] = (u32)in_be32(®s->fmbm_rodc); + value[i++] = (u32)in_be32(®s->fmbm_rbdc); +} + +void +fman_if_bmi_stats_reset(struct fman_if *p) +{ + struct __fman_if *m = container_of(p, struct __fman_if, __if); + struct rx_bmi_regs *regs = (struct rx_bmi_regs *)m->bmi_map; + + out_be32(®s->fmbm_rfrc, 0); + out_be32(®s->fmbm_rfbc, 0); + out_be32(®s->fmbm_rlfc, 0); + out_be32(®s->fmbm_rffc, 0); + out_be32(®s->fmbm_rfdc, 0); + out_be32(®s->fmbm_rfldec, 0); + out_be32(®s->fmbm_rodc, 0); + out_be32(®s->fmbm_rbdc, 0); +} + void fman_if_promiscuous_enable(struct fman_if *p) { diff --git a/drivers/bus/dpaa/include/fman.h b/drivers/bus/dpaa/include/fman.h index 3a6dd555a7..60681068ea 100644 --- a/drivers/bus/dpaa/include/fman.h +++ b/drivers/bus/dpaa/include/fman.h @@ -56,6 +56,8 @@ #define FMAN_PORT_BMI_FIFO_UNITS 0x100 #define FMAN_PORT_IC_OFFSET_UNITS 0x10 +#define FMAN_BMI_COUNTERS_EN 0x8000 + #define FMAN_ENABLE_BPOOL_DEPLETION0xF0F0 #define HASH_CTRL_MCAST_EN 0x0100 @@ -260,7 +262,7 @@ struct rx_bmi_regs { /**< Buffer Manager pool Information-*/ uint32_t fmbm_acnt[FMAN_PORT_MAX_EXT_POOLS_NUM]; /**< Allocate Counter-*/ - uint32_t reserved0130[8]; + uint32_t reserved0120[16]; /**< 0x130/0x140 - 0x15F reserved -*/ uint32_t fmbm_rcgm[FMAN_PORT_CG_MAP_NUM]; /**< Congestion Group Map*/ diff --git a/drivers/bus/dpaa/include/fsl_fman.h b/drivers/bus/dpaa/include/fsl_fman.h index 20690f8329..5a9750ad0c 100644 --- a/drivers/bus/dpaa/include/fsl_fman.h +++ b/drivers/bus/dpaa/include/fsl_fman.h @@ -60,6 +60,18 @@ void fman_if_stats_reset(struct fman_if *p); __rte_internal void fman_if_stats_get_all(struct fman_if *p, uint64_t *value, int n); +__rte_internal +void fman_if_bmi_stats_enable(struct fman_if *p); + +__rte_internal +void fman_if_bmi_stats_disable(struct fman_if *p); + +__rte_internal +void fman_if_bmi_stats_get_all(struct fman_if *p, uint64_t *value); + +__rte_internal +void fman_if_bmi_stats_reset(struct fman_if *p); + /* Set ignore pause option for a specific interface */ void fman_if_set_rx_ignore_pause_frames(struct fman_if *p, bool enable); diff --git a
[v2 02/30] dma/dpaa2: support multiple HW queues
From: Jun Yang Initialize and Configure queues of dma device according to hw queues supported from mc bus. Because multiple queues per device are supported, virt queues implementation are dropped. Signed-off-by: Jun Yang --- drivers/dma/dpaa2/dpaa2_qdma.c | 312 +++-- drivers/dma/dpaa2/dpaa2_qdma.h | 6 +- 2 files changed, 140 insertions(+), 178 deletions(-) diff --git a/drivers/dma/dpaa2/dpaa2_qdma.c b/drivers/dma/dpaa2/dpaa2_qdma.c index 5954b552b5..945ba71e4a 100644 --- a/drivers/dma/dpaa2/dpaa2_qdma.c +++ b/drivers/dma/dpaa2/dpaa2_qdma.c @@ -478,9 +478,9 @@ dpdmai_dev_get_job_us(struct qdma_virt_queue *qdma_vq __rte_unused, static inline uint16_t dpdmai_dev_get_single_job_lf(struct qdma_virt_queue *qdma_vq, -const struct qbman_fd *fd, -struct rte_dpaa2_qdma_job **job, -uint16_t *nb_jobs) + const struct qbman_fd *fd, + struct rte_dpaa2_qdma_job **job, + uint16_t *nb_jobs) { struct qbman_fle *fle; struct rte_dpaa2_qdma_job **ppjob = NULL; @@ -512,9 +512,9 @@ dpdmai_dev_get_single_job_lf(struct qdma_virt_queue *qdma_vq, static inline uint16_t dpdmai_dev_get_sg_job_lf(struct qdma_virt_queue *qdma_vq, -const struct qbman_fd *fd, -struct rte_dpaa2_qdma_job **job, -uint16_t *nb_jobs) + const struct qbman_fd *fd, + struct rte_dpaa2_qdma_job **job, + uint16_t *nb_jobs) { struct qbman_fle *fle; struct rte_dpaa2_qdma_job **ppjob = NULL; @@ -548,12 +548,12 @@ dpdmai_dev_get_sg_job_lf(struct qdma_virt_queue *qdma_vq, /* Function to receive a QDMA job for a given device and queue*/ static int dpdmai_dev_dequeue_multijob_prefetch(struct qdma_virt_queue *qdma_vq, -uint16_t *vq_id, -struct rte_dpaa2_qdma_job **job, -uint16_t nb_jobs) + uint16_t *vq_id, + struct rte_dpaa2_qdma_job **job, + uint16_t nb_jobs) { struct dpaa2_dpdmai_dev *dpdmai_dev = qdma_vq->dpdmai_dev; - struct dpaa2_queue *rxq = &(dpdmai_dev->rx_queue[0]); + struct dpaa2_queue *rxq; struct qbman_result *dq_storage, *dq_storage1 = NULL; struct qbman_pull_desc pulldesc; struct qbman_swp *swp; @@ -562,7 +562,7 @@ dpdmai_dev_dequeue_multijob_prefetch(struct qdma_virt_queue *qdma_vq, uint8_t num_rx = 0; const struct qbman_fd *fd; uint16_t vqid, num_rx_ret; - uint16_t rx_fqid = rxq->fqid; + uint16_t rx_fqid; int ret, pull_size; if (qdma_vq->flags & DPAA2_QDMA_VQ_FD_SG_FORMAT) { @@ -575,15 +575,17 @@ dpdmai_dev_dequeue_multijob_prefetch(struct qdma_virt_queue *qdma_vq, if (unlikely(!DPAA2_PER_LCORE_DPIO)) { ret = dpaa2_affine_qbman_swp(); if (ret) { - DPAA2_QDMA_ERR( - "Failed to allocate IO portal, tid: %d\n", + DPAA2_QDMA_ERR("Failed to allocate IO portal, tid(%d)", rte_gettid()); return 0; } } swp = DPAA2_PER_LCORE_PORTAL; + rxq = &dpdmai_dev->rx_queue[qdma_vq->vq_id]; + rx_fqid = rxq->fqid; - pull_size = (nb_jobs > dpaa2_dqrr_size) ? dpaa2_dqrr_size : nb_jobs; + pull_size = (nb_jobs > dpaa2_dqrr_size) ? + dpaa2_dqrr_size : nb_jobs; q_storage = rxq->q_storage; if (unlikely(!q_storage->active_dqs)) { @@ -697,12 +699,12 @@ dpdmai_dev_dequeue_multijob_prefetch(struct qdma_virt_queue *qdma_vq, static int dpdmai_dev_dequeue_multijob_no_prefetch(struct qdma_virt_queue *qdma_vq, - uint16_t *vq_id, - struct rte_dpaa2_qdma_job **job, - uint16_t nb_jobs) + uint16_t *vq_id, + struct rte_dpaa2_qdma_job **job, + uint16_t nb_jobs) { struct dpaa2_dpdmai_dev *dpdmai_dev = qdma_vq->dpdmai_dev; - struct dpaa2_queue *rxq = &(dpdmai_dev->rx_queue[0]); + struct dpaa2_queue *rxq; struct qbman_result *dq_storage; struct qbman_pull_desc pulldesc; struct qbman_swp *swp; @@ -710,7 +712,7 @@ dpdmai_dev_dequeue_multijob_no_prefetch(struct qdma_virt_queue *qdma_vq, uint8_t num_rx = 0; const struct qbman_fd *fd; uint16_t vqid, num_rx_ret; - uint16_t rx_fqid = rxq->fqid; + uint16_t rx_fqid; int ret, next_pull, num_pulled = 0; if (qdma_vq->flags & DPAA2_QDMA_VQ_FD_SG_FORMAT) { @@ -725,15 +727,15 @@ dpdmai_dev_dequeue_multijob_no_prefetch(struct qdma_virt_queue *qdma_vq, if (unlikely(!DPAA2_PER_LCORE_DPIO)) { ret = dpaa2_affine_qbman_swp(); if (ret) { -
[v2 06/30] dma/dpaa2: include DPAA2 specific header files
From: Jun Yang Include dpaa2_hw_pvt.h and dpaa2_hw_dpio.h files Signed-off-by: Jun Yang --- drivers/dma/dpaa2/dpaa2_qdma.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/dma/dpaa2/dpaa2_qdma.h b/drivers/dma/dpaa2/dpaa2_qdma.h index 743a43fa14..eb02bff08f 100644 --- a/drivers/dma/dpaa2/dpaa2_qdma.h +++ b/drivers/dma/dpaa2/dpaa2_qdma.h @@ -5,6 +5,9 @@ #ifndef _DPAA2_QDMA_H_ #define _DPAA2_QDMA_H_ +#include "portal/dpaa2_hw_pvt.h" +#include "portal/dpaa2_hw_dpio.h" + #define DPAA2_QDMA_MAX_DESC4096 #define DPAA2_QDMA_MIN_DESC1 #define DPAA2_QDMA_MAX_VHANS 64 -- 2.25.1
[v2 07/30] dma/dpaa2: borrow flags of DMA operation to pass job context
From: Jun Yang For copy_sg: pass job index lists. For copy: pass job index. Signed-off-by: Jun Yang --- drivers/dma/dpaa2/dpaa2_qdma.c | 92 ++ drivers/dma/dpaa2/dpaa2_qdma.h | 7 ++ drivers/dma/dpaa2/rte_pmd_dpaa2_qdma.h | 15 - 3 files changed, 68 insertions(+), 46 deletions(-) diff --git a/drivers/dma/dpaa2/dpaa2_qdma.c b/drivers/dma/dpaa2/dpaa2_qdma.c index 7f6ebcb46b..7de4894b35 100644 --- a/drivers/dma/dpaa2/dpaa2_qdma.c +++ b/drivers/dma/dpaa2/dpaa2_qdma.c @@ -280,25 +280,22 @@ sg_entry_post_populate(const struct rte_dma_sge *src, const struct rte_dma_sge *dst, struct qdma_cntx_sg *sg_cntx, uint16_t nb_sge) { - uint16_t i = 0, idx; - uint32_t total_len = 0, len; + uint16_t i; + uint32_t total_len = 0; struct qdma_sg_entry *src_sge = sg_cntx->sg_src_entry; struct qdma_sg_entry *dst_sge = sg_cntx->sg_dst_entry; for (i = 0; i < (nb_sge - 1); i++) { if (unlikely(src[i].length != dst[i].length)) return -ENOTSUP; - len = RTE_DPAA2_QDMA_LEN_FROM_LENGTH(src[i].length); - idx = RTE_DPAA2_QDMA_IDX_FROM_LENGTH(src[i].length); src_sge->addr_lo = (uint32_t)src[i].addr; src_sge->addr_hi = (src[i].addr >> 32); - src_sge->data_len.data_len_sl0 = len; + src_sge->data_len.data_len_sl0 = src[i].length; dst_sge->addr_lo = (uint32_t)dst[i].addr; dst_sge->addr_hi = (dst[i].addr >> 32); - dst_sge->data_len.data_len_sl0 = len; - total_len += len; - sg_cntx->cntx_idx[i] = idx; + dst_sge->data_len.data_len_sl0 = dst[i].length; + total_len += dst[i].length; src_sge->ctrl.f = 0; dst_sge->ctrl.f = 0; @@ -309,19 +306,15 @@ sg_entry_post_populate(const struct rte_dma_sge *src, if (unlikely(src[i].length != dst[i].length)) return -ENOTSUP; - len = RTE_DPAA2_QDMA_LEN_FROM_LENGTH(src[i].length); - idx = RTE_DPAA2_QDMA_IDX_FROM_LENGTH(src[i].length); - src_sge->addr_lo = (uint32_t)src[i].addr; src_sge->addr_hi = (src[i].addr >> 32); - src_sge->data_len.data_len_sl0 = len; + src_sge->data_len.data_len_sl0 = src[i].length; dst_sge->addr_lo = (uint32_t)dst[i].addr; dst_sge->addr_hi = (dst[i].addr >> 32); - dst_sge->data_len.data_len_sl0 = len; + dst_sge->data_len.data_len_sl0 = dst[i].length; - total_len += len; - sg_cntx->cntx_idx[i] = idx; + total_len += dst[i].length; sg_cntx->job_nb = nb_sge; src_sge->ctrl.f = QDMA_SG_F; @@ -343,20 +336,18 @@ sg_entry_populate(const struct rte_dma_sge *src, const struct rte_dma_sge *dst, struct qdma_cntx_sg *sg_cntx, uint16_t nb_sge) { - uint16_t i, idx; - uint32_t total_len = 0, len; + uint16_t i; + uint32_t total_len = 0; struct qdma_sg_entry *src_sge = sg_cntx->sg_src_entry; struct qdma_sg_entry *dst_sge = sg_cntx->sg_dst_entry; for (i = 0; i < nb_sge; i++) { if (unlikely(src[i].length != dst[i].length)) return -ENOTSUP; - len = RTE_DPAA2_QDMA_LEN_FROM_LENGTH(src[i].length); - idx = RTE_DPAA2_QDMA_IDX_FROM_LENGTH(src[i].length); src_sge->addr_lo = (uint32_t)src[i].addr; src_sge->addr_hi = (src[i].addr >> 32); - src_sge->data_len.data_len_sl0 = len; + src_sge->data_len.data_len_sl0 = src[i].length; src_sge->ctrl.sl = QDMA_SG_SL_LONG; src_sge->ctrl.fmt = QDMA_SG_FMT_SDB; #ifdef RTE_LIBRTE_DPAA2_USE_PHYS_IOVA @@ -366,7 +357,7 @@ sg_entry_populate(const struct rte_dma_sge *src, #endif dst_sge->addr_lo = (uint32_t)dst[i].addr; dst_sge->addr_hi = (dst[i].addr >> 32); - dst_sge->data_len.data_len_sl0 = len; + dst_sge->data_len.data_len_sl0 = dst[i].length; dst_sge->ctrl.sl = QDMA_SG_SL_LONG; dst_sge->ctrl.fmt = QDMA_SG_FMT_SDB; #ifdef RTE_LIBRTE_DPAA2_USE_PHYS_IOVA @@ -374,8 +365,7 @@ sg_entry_populate(const struct rte_dma_sge *src, #else dst_sge->ctrl.bmt = QDMA_SG_BMT_DISABLE; #endif - total_len += len; - sg_cntx->cntx_idx[i] = idx; + total_len += src[i].length; if (i == (nb_sge - 1)) { src_sge->ctrl.f = QDMA_SG_F; @@ -606,14 +596,15 @@ dpaa2_qdma_copy_sg(void *dev_private, struct dpaa2_dpdmai_dev *dpdmai_dev = dev_private; struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev; struct qdma_virt_queue *qdma_vq = &qdma_dev->vqs[vchan]; - int ret = 0, expected; - uint32_t cntx_idx, len; + int ret = 0, expected, i; + uint32_t len;
[v2 00/30] NXP DMA driver fixes and Enhancements
V2 changes: * fix compilation issue on ubuntu 22.04 Gagandeep Singh (6): dma/dpaa: support multi channels dma/dpaa: fix job enqueue dma/dpaa: add workaround for ERR050757 dma/dpaa: qdma stall workaround for ERR050265 dma/dpaa: remove unwanted desc dma/dpaa: data path optimization Hemant Agrawal (1): bus/dpaa: add port bmi stats Jun Yang (22): dma/dpaa2: configure route by port by PCIe port param dma/dpaa2: support multiple HW queues dma/dpaa2: adapt DMA driver API dma/dpaa2: multiple process support dma/dpaa2: add sanity check for SG entry dma/dpaa2: include DPAA2 specific header files dma/dpaa2: borrow flags of DMA operation to pass job context bus/fslmc: enhance the qbman dq storage logic dma/dpaa2: add short FD support dma/dpaa2: limit the max descriptor number dma/dpaa2: change the DMA copy return value dma/dpaa2: move the qdma header to common place dma/dpaa: refactor driver dma/dpaa: dequeue status queue dma/dpaa: add Scatter Gather support dma/dpaa: block dequeue dma/dpaa: improve congestion handling dma/dpaa: disable SG descriptor as default dma/dpaa: improve ERRATA workaround solution dma/dpaa: improve silent mode support dma/dpaa: support multiple SG copies dma/dpaa: support max SG entry size Vanshika Shukla (1): dma/dpaa: add burst capacity API config/arm/meson.build|4 +- doc/api/doxy-api-index.md |2 +- doc/api/doxy-api.conf.in |2 +- doc/guides/dmadevs/dpaa.rst |3 + drivers/bus/dpaa/base/fman/fman_hw.c | 65 +- drivers/bus/dpaa/include/fman.h |4 +- drivers/bus/dpaa/include/fsl_fman.h | 12 + drivers/bus/dpaa/version.map |4 + drivers/bus/fslmc/portal/dpaa2_hw_dpci.c | 25 +- drivers/bus/fslmc/portal/dpaa2_hw_dpio.c |7 +- drivers/bus/fslmc/portal/dpaa2_hw_pvt.h | 38 +- .../bus/fslmc/qbman/include/fsl_qbman_base.h | 29 +- drivers/common/dpaax/meson.build |3 +- drivers/common/dpaax/rte_pmd_dpaax_qdma.h | 23 + drivers/crypto/dpaa2_sec/dpaa2_sec_dpseci.c | 23 +- drivers/crypto/dpaa2_sec/dpaa2_sec_raw_dp.c |4 +- drivers/dma/dpaa/dpaa_qdma.c | 1641 +++- drivers/dma/dpaa/dpaa_qdma.h | 289 +- drivers/dma/dpaa2/dpaa2_qdma.c| 2381 + drivers/dma/dpaa2/dpaa2_qdma.h| 243 +- drivers/dma/dpaa2/meson.build |4 +- drivers/dma/dpaa2/rte_pmd_dpaa2_qdma.h| 177 -- drivers/dma/dpaa2/version.map | 14 - drivers/net/dpaa/dpaa_ethdev.c| 46 +- drivers/net/dpaa/dpaa_ethdev.h| 12 + drivers/net/dpaa2/dpaa2_ethdev.c | 81 +- drivers/net/dpaa2/dpaa2_rxtx.c| 19 +- drivers/raw/dpaa2_cmdif/dpaa2_cmdif.c |4 +- 28 files changed, 2856 insertions(+), 2303 deletions(-) create mode 100644 drivers/common/dpaax/rte_pmd_dpaax_qdma.h delete mode 100644 drivers/dma/dpaa2/rte_pmd_dpaa2_qdma.h delete mode 100644 drivers/dma/dpaa2/version.map -- 2.25.1
[v2 18/30] dma/dpaa: remove unwanted desc
remove unwanted descriptor list maintenance and channels overhead. Signed-off-by: Gagandeep Singh --- drivers/dma/dpaa/dpaa_qdma.c | 594 +-- drivers/dma/dpaa/dpaa_qdma.h | 43 +-- 2 files changed, 221 insertions(+), 416 deletions(-) diff --git a/drivers/dma/dpaa/dpaa_qdma.c b/drivers/dma/dpaa/dpaa_qdma.c index f1878879af..8e8426b88d 100644 --- a/drivers/dma/dpaa/dpaa_qdma.c +++ b/drivers/dma/dpaa/dpaa_qdma.c @@ -111,96 +111,6 @@ static void return virt_addr; } -static void -dma_pool_free(void *addr) -{ - rte_free(addr); -} - -static void -fsl_qdma_free_chan_resources(struct fsl_qdma_chan *fsl_chan) -{ - struct fsl_qdma_queue *fsl_queue = fsl_chan->queue; - struct fsl_qdma_engine *fsl_qdma = fsl_chan->qdma; - struct fsl_qdma_comp *comp_temp, *_comp_temp; - int id; - - if (--fsl_queue->count) - goto finally; - - id = (fsl_qdma->block_base - fsl_queue->block_base) / - fsl_qdma->block_offset; - - while (rte_atomic32_read(&wait_task[id]) == 1) - rte_delay_us(QDMA_DELAY); - - list_for_each_entry_safe(comp_temp, _comp_temp, -&fsl_queue->comp_used, list) { - list_del(&comp_temp->list); - dma_pool_free(comp_temp->virt_addr); - dma_pool_free(comp_temp->desc_virt_addr); - rte_free(comp_temp); - } - - list_for_each_entry_safe(comp_temp, _comp_temp, -&fsl_queue->comp_free, list) { - list_del(&comp_temp->list); - dma_pool_free(comp_temp->virt_addr); - dma_pool_free(comp_temp->desc_virt_addr); - rte_free(comp_temp); - } - -finally: - fsl_qdma->desc_allocated--; -} - -static void -fsl_qdma_comp_fill_memcpy(struct fsl_qdma_comp *fsl_comp, - dma_addr_t dst, dma_addr_t src, u32 len) -{ - struct fsl_qdma_format *csgf_src, *csgf_dest; -#ifdef RTE_DMA_DPAA_ERRATA_ERR050757 - struct fsl_qdma_sdf *sdf; - u32 cfg = 0; -#endif - - /* Note: command table (fsl_comp->virt_addr) is getting filled -* directly in cmd descriptors of queues while enqueuing the descriptor -* please refer fsl_qdma_enqueue_desc -* frame list table (virt_addr) + 1) and source, -* destination descriptor table -* (fsl_comp->desc_virt_addr and fsl_comp->desc_virt_addr+1) move to -* the control path to fsl_qdma_pre_request_enqueue_comp_sd_desc -*/ - csgf_src = (struct fsl_qdma_format *)fsl_comp->virt_addr + 2; - csgf_dest = (struct fsl_qdma_format *)fsl_comp->virt_addr + 3; - -#ifdef RTE_DMA_DPAA_ERRATA_ERR050757 - sdf = (struct fsl_qdma_sdf *)fsl_comp->desc_virt_addr; - sdf->cmd = rte_cpu_to_le_32(FSL_QDMA_CMD_RWTTYPE << - FSL_QDMA_CMD_RWTTYPE_OFFSET); -#ifdef RTE_DMA_DPAA_ERRATA_ERR050265 - sdf->cmd |= rte_cpu_to_le_32(FSL_QDMA_CMD_PF); -#endif - if (len > FSL_QDMA_CMD_SSS_DISTANCE) { - sdf->cmd |= rte_cpu_to_le_32(FSL_QDMA_CMD_SSEN); - cfg |= rte_cpu_to_le_32(FSL_QDMA_CMD_SSS_STRIDE << - FSL_QDMA_CFG_SSS_OFFSET | - FSL_QDMA_CMD_SSS_DISTANCE); - sdf->cfg = cfg; - } else - sdf->cfg = 0; -#endif - - /* Status notification is enqueued to status queue. */ - qdma_desc_addr_set64(csgf_src, src); - qdma_csgf_set_len(csgf_src, len); - qdma_desc_addr_set64(csgf_dest, dst); - qdma_csgf_set_len(csgf_dest, len); - /* This entry is the last entry. */ - qdma_csgf_set_f(csgf_dest, len); -} - /* * Pre-request command descriptor and compound S/G for enqueue. */ @@ -209,42 +119,41 @@ fsl_qdma_pre_request_enqueue_comp_sd_desc( struct fsl_qdma_queue *queue, int size, int aligned) { - struct fsl_qdma_comp *comp_temp, *_comp_temp; struct fsl_qdma_sdf *sdf; struct fsl_qdma_ddf *ddf; struct fsl_qdma_format *csgf_desc; - int i; - - for (i = 0; i < (int)(queue->n_cq + COMMAND_QUEUE_OVERFLOW); i++) { - comp_temp = rte_zmalloc("qdma: comp temp", - sizeof(*comp_temp), 0); - if (!comp_temp) - return -ENOMEM; - - comp_temp->virt_addr = - dma_pool_alloc(size, aligned, &comp_temp->bus_addr); - if (!comp_temp->virt_addr) { - rte_free(comp_temp); + struct fsl_qdma_format *ccdf; + int i, j; + struct fsl_qdma_format *head; + + head = queue->virt_head; + + for (i = 0; i < (int)(queue->n_cq); i++) { + dma_addr_t bus_addr = 0, desc_bus_addr = 0; + + queue->virt_addr[i] = +
[v2 19/30] dma/dpaa: data path optimization
Remove unnessary status read before every send. Signed-off-by: Gagandeep Singh --- drivers/dma/dpaa/dpaa_qdma.c | 186 ++- drivers/dma/dpaa/dpaa_qdma.h | 7 ++ 2 files changed, 101 insertions(+), 92 deletions(-) diff --git a/drivers/dma/dpaa/dpaa_qdma.c b/drivers/dma/dpaa/dpaa_qdma.c index 8e8426b88d..4022ad6469 100644 --- a/drivers/dma/dpaa/dpaa_qdma.c +++ b/drivers/dma/dpaa/dpaa_qdma.c @@ -248,7 +248,8 @@ fsl_qdma_free_queue_resources(struct fsl_qdma_queue *queue) } static struct -fsl_qdma_queue *fsl_qdma_prep_status_queue(void) +fsl_qdma_queue *fsl_qdma_prep_status_queue(struct fsl_qdma_engine *fsl_qdma, + u32 id) { struct fsl_qdma_queue *status_head; unsigned int status_size; @@ -277,6 +278,8 @@ fsl_qdma_queue *fsl_qdma_prep_status_queue(void) sizeof(struct fsl_qdma_format)); status_head->n_cq = status_size; status_head->virt_head = status_head->cq; + status_head->queue_base = fsl_qdma->block_base + + FSL_QDMA_BLOCK_BASE_OFFSET(fsl_qdma, id); return status_head; } @@ -334,12 +337,9 @@ fsl_qdma_halt(struct fsl_qdma_engine *fsl_qdma) } static int -fsl_qdma_queue_transfer_complete(struct fsl_qdma_engine *fsl_qdma, -void *block, int id, const uint16_t nb_cpls, +fsl_qdma_queue_transfer_complete(void *block, const uint16_t nb_cpls, enum rte_dma_status_code *status) { - struct fsl_qdma_queue *fsl_status = fsl_qdma->status[id]; - struct fsl_qdma_format *status_addr; u32 reg; int count = 0; @@ -348,16 +348,7 @@ fsl_qdma_queue_transfer_complete(struct fsl_qdma_engine *fsl_qdma, if (reg & FSL_QDMA_BSQSR_QE_BE) return count; - status_addr = fsl_status->virt_head; - - reg = qdma_readl_be(block + FSL_QDMA_BSQMR); - reg |= FSL_QDMA_BSQMR_DI_BE; - - qdma_desc_addr_set64(status_addr, 0x0); - fsl_status->virt_head++; - if (fsl_status->virt_head == fsl_status->cq + fsl_status->n_cq) - fsl_status->virt_head = fsl_status->cq; - qdma_writel_be(reg, block + FSL_QDMA_BSQMR); + qdma_writel_be(FSL_QDMA_BSQMR_DI, block + FSL_QDMA_BSQMR); if (status != NULL) status[count] = RTE_DMA_STATUS_SUCCESSFUL; @@ -472,19 +463,37 @@ fsl_qdma_enqueue_desc(struct fsl_qdma_queue *fsl_queue, { void *block = fsl_queue->queue_base; struct fsl_qdma_format *csgf_src, *csgf_dest; - u32 reg; #ifdef RTE_DMA_DPAA_ERRATA_ERR050757 struct fsl_qdma_sdf *sdf; u32 cfg = 0; #endif +#ifdef CONFIG_RTE_DMA_DPAA_ERR_CHK + u32 reg; + /* retrieve and store the register value in big endian * to avoid bits swap */ reg = qdma_readl_be(block + FSL_QDMA_BCQSR(fsl_queue->id)); - if (reg & (FSL_QDMA_BCQSR_QF_XOFF_BE)) + if (reg & (FSL_QDMA_BCQSR_QF_XOFF_BE)) { + DPAA_QDMA_ERR("QDMA Engine is busy\n"); return -1; + } +#else + /* check whether critical watermark level reached, +* below check is valid for only single queue per block +*/ + if ((fsl_queue->stats.submitted - fsl_queue->stats.completed) + >= QDMA_QUEUE_CR_WM) { + DPAA_QDMA_DEBUG("Queue is full, try dequeue first\n"); + return -1; + } +#endif + if (unlikely(fsl_queue->pending == fsl_queue->n_cq)) { + DPAA_QDMA_DEBUG("Queue is full, try dma submit first\n"); + return -1; + } csgf_src = (struct fsl_qdma_format *)fsl_queue->virt_addr[fsl_queue->ci] + QDMA_SGF_SRC_OFF; @@ -512,19 +521,14 @@ fsl_qdma_enqueue_desc(struct fsl_qdma_queue *fsl_queue, qdma_csgf_set_len(csgf_dest, len); /* This entry is the last entry. */ qdma_csgf_set_f(csgf_dest, len); - fsl_queue->virt_head++; fsl_queue->ci++; - if (fsl_queue->virt_head == fsl_queue->cq + fsl_queue->n_cq) { - fsl_queue->virt_head = fsl_queue->cq; + if (fsl_queue->ci == fsl_queue->n_cq) fsl_queue->ci = 0; - } - if (flags & RTE_DMA_OP_FLAG_SUBMIT) { - reg = qdma_readl_be(block + FSL_QDMA_BCQMR(fsl_queue->id)); - reg |= FSL_QDMA_BCQMR_EI_BE; - qdma_writel_be(reg, block + FSL_QDMA_BCQMR(fsl_queue->id)); + qdma_writel_be(FSL_QDMA_BCQMR_EI, + block + FSL_QDMA_BCQMR(fsl_queue->id)); fsl_queue->stats.submitted++; } else { fsl_queue->pending++; @@ -618,12 +622,9 @@ dpaa_qdma_submit(void *dev_private, uint16_t vchan) struct fsl_qdma_queue *fsl_queue = fsl_qdma->
[v2 01/30] dma/dpaa2: configure route by port by PCIe port param
From: Jun Yang struct { uint64_t coreid : 4; /**--rbp.sportid / rbp.dportid*/ uint64_t pfid : 8; /**--rbp.spfid / rbp.dpfid*/ uint64_t vfen : 1; /**--rbp.svfa / rbp.dvfa*/ uint64_t vfid : 16; /**--rbp.svfid / rbp.dvfid*/ . } pcie; Signed-off-by: Jun Yang --- .../bus/fslmc/qbman/include/fsl_qbman_base.h | 29 ++--- drivers/dma/dpaa2/dpaa2_qdma.c| 59 +-- drivers/dma/dpaa2/dpaa2_qdma.h| 38 +++- drivers/dma/dpaa2/rte_pmd_dpaa2_qdma.h| 55 + drivers/dma/dpaa2/version.map | 1 - 5 files changed, 100 insertions(+), 82 deletions(-) diff --git a/drivers/bus/fslmc/qbman/include/fsl_qbman_base.h b/drivers/bus/fslmc/qbman/include/fsl_qbman_base.h index 48ffb1b46e..7528b610e1 100644 --- a/drivers/bus/fslmc/qbman/include/fsl_qbman_base.h +++ b/drivers/bus/fslmc/qbman/include/fsl_qbman_base.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: BSD-3-Clause * * Copyright (C) 2014 Freescale Semiconductor, Inc. - * Copyright 2017-2019 NXP + * Copyright 2017-2024 NXP * */ #ifndef _FSL_QBMAN_BASE_H @@ -141,12 +141,23 @@ struct qbman_fd { uint32_t saddr_hi; uint32_t len_sl:18; - uint32_t rsv1:14; - + uint32_t rsv13:2; + uint32_t svfid:6; + uint32_t rsv12:2; + uint32_t spfid:2; + uint32_t rsv1:2; uint32_t sportid:4; - uint32_t rsv2:22; + uint32_t rsv2:1; + uint32_t sca:1; + uint32_t sat:2; + uint32_t sattr:3; + uint32_t svfa:1; + uint32_t stc:3; uint32_t bmt:1; - uint32_t rsv3:1; + uint32_t dvfid:6; + uint32_t rsv3:2; + uint32_t dpfid:2; + uint32_t rsv31:2; uint32_t fmt:2; uint32_t sl:1; uint32_t rsv4:1; @@ -154,12 +165,14 @@ struct qbman_fd { uint32_t acc_err:4; uint32_t rsv5:4; uint32_t ser:1; - uint32_t rsv6:3; + uint32_t rsv6:2; + uint32_t wns:1; uint32_t wrttype:4; uint32_t dqos:3; uint32_t drbp:1; uint32_t dlwc:2; - uint32_t rsv7:2; + uint32_t rsv7:1; + uint32_t rns:1; uint32_t rdttype:4; uint32_t sqos:3; uint32_t srbp:1; @@ -182,7 +195,7 @@ struct qbman_fd { uint32_t saddr_lo; uint32_t saddr_hi:17; - uint32_t rsv1:15; + uint32_t rsv1_att:15; uint32_t len; diff --git a/drivers/dma/dpaa2/dpaa2_qdma.c b/drivers/dma/dpaa2/dpaa2_qdma.c index 2c91ceec13..5954b552b5 100644 --- a/drivers/dma/dpaa2/dpaa2_qdma.c +++ b/drivers/dma/dpaa2/dpaa2_qdma.c @@ -22,7 +22,7 @@ uint32_t dpaa2_coherent_alloc_cache; static inline int qdma_populate_fd_pci(phys_addr_t src, phys_addr_t dest, uint32_t len, struct qbman_fd *fd, -struct rte_dpaa2_qdma_rbp *rbp, int ser) +struct dpaa2_qdma_rbp *rbp, int ser) { fd->simple_pci.saddr_lo = lower_32_bits((uint64_t) (src)); fd->simple_pci.saddr_hi = upper_32_bits((uint64_t) (src)); @@ -93,7 +93,7 @@ qdma_populate_fd_ddr(phys_addr_t src, phys_addr_t dest, static void dpaa2_qdma_populate_fle(struct qbman_fle *fle, uint64_t fle_iova, - struct rte_dpaa2_qdma_rbp *rbp, + struct dpaa2_qdma_rbp *rbp, uint64_t src, uint64_t dest, size_t len, uint32_t flags, uint32_t fmt) { @@ -114,7 +114,6 @@ dpaa2_qdma_populate_fle(struct qbman_fle *fle, /* source */ sdd->read_cmd.portid = rbp->sportid; sdd->rbpcmd_simple.pfid = rbp->spfid; - sdd->rbpcmd_simple.vfa = rbp->vfa; sdd->rbpcmd_simple.vfid = rbp->svfid; if (rbp->srbp) { @@ -127,7 +126,6 @@ dpaa2_qdma_populate_fle(struct qbman_fle *fle, /* destination */ sdd->write_cmd.portid = rbp->dportid; sdd->rbpcmd_simple.pfid = rbp->dpfid; - sdd->rbpcmd_simple.vfa = rbp->vfa; sdd->rbpcmd_simple.vfid = rbp->dvfid; if (rbp->drbp) { @@ -178,7 +176,7 @@ dpdmai_dev_set_fd_us(struct qdma_virt_queue *qdma_vq,
[v2 20/30] dma/dpaa: refactor driver
From: Jun Yang Return complete index instead of total complete counter in complete callback. Signed-off-by: Jun Yang --- drivers/dma/dpaa/dpaa_qdma.c | 534 ++- drivers/dma/dpaa/dpaa_qdma.h | 109 +++ 2 files changed, 330 insertions(+), 313 deletions(-) diff --git a/drivers/dma/dpaa/dpaa_qdma.c b/drivers/dma/dpaa/dpaa_qdma.c index 4022ad6469..dc17aa4520 100644 --- a/drivers/dma/dpaa/dpaa_qdma.c +++ b/drivers/dma/dpaa/dpaa_qdma.c @@ -15,19 +15,6 @@ qdma_desc_addr_set64(struct fsl_qdma_format *ccdf, u64 addr) ccdf->addr_lo = rte_cpu_to_le_32(lower_32_bits(addr)); } -static inline u64 -qdma_ccdf_get_queue(const struct fsl_qdma_format *ccdf) -{ - return ccdf->cfg8b_w1 & 0xff; -} - -static inline int -qdma_ccdf_get_offset(const struct fsl_qdma_format *ccdf) -{ - return (rte_le_to_cpu_32(ccdf->cfg) & QDMA_CCDF_MASK) - >> QDMA_CCDF_OFFSET; -} - static inline void qdma_ccdf_set_format(struct fsl_qdma_format *ccdf, int offset) { @@ -59,8 +46,7 @@ qdma_csgf_set_f(struct fsl_qdma_format *csgf, int len) csgf->cfg = rte_cpu_to_le_32(QDMA_SG_FIN | (len & QDMA_SG_LEN_MASK)); } -static inline int -ilog2(int x) +static inline int ilog2(int x) { int log = 0; @@ -73,32 +59,50 @@ ilog2(int x) return log; } -static u32 +static inline int ilog2_qsize(uint32_t q_size) +{ + return (ilog2(q_size) - ilog2(64)); +} + +static inline int ilog2_qthld(uint32_t q_thld) +{ + return (ilog2(q_thld) - ilog2(16)); +} + +static inline int +fsl_qdma_queue_bd_in_hw(struct fsl_qdma_queue *fsl_queue) +{ + struct rte_dma_stats *stats = &fsl_queue->stats; + + return (stats->submitted - stats->completed); +} + +static uint32_t qdma_readl(void *addr) { return QDMA_IN(addr); } static void -qdma_writel(u32 val, void *addr) +qdma_writel(uint32_t val, void *addr) { QDMA_OUT(addr, val); } -static u32 +static uint32_t qdma_readl_be(void *addr) { return QDMA_IN_BE(addr); } static void -qdma_writel_be(u32 val, void *addr) +qdma_writel_be(uint32_t val, void *addr) { QDMA_OUT_BE(addr, val); } -static void -*dma_pool_alloc(int size, int aligned, dma_addr_t *phy_addr) +static void * +dma_pool_alloc(int size, int aligned, dma_addr_t *phy_addr) { void *virt_addr; @@ -115,47 +119,48 @@ static void * Pre-request command descriptor and compound S/G for enqueue. */ static int -fsl_qdma_pre_request_enqueue_comp_sd_desc( - struct fsl_qdma_queue *queue, - int size, int aligned) +fsl_qdma_pre_comp_sd_desc(struct fsl_qdma_queue *queue) { struct fsl_qdma_sdf *sdf; struct fsl_qdma_ddf *ddf; - struct fsl_qdma_format *csgf_desc; struct fsl_qdma_format *ccdf; - int i, j; + uint16_t i, j; struct fsl_qdma_format *head; + struct fsl_qdma_cmpd_ft *ft; + struct fsl_qdma_df *df; - head = queue->virt_head; + head = queue->cmd_desc; - for (i = 0; i < (int)(queue->n_cq); i++) { - dma_addr_t bus_addr = 0, desc_bus_addr = 0; + for (i = 0; i < queue->n_cq; i++) { + dma_addr_t phy_ft = 0, phy_df = 0; - queue->virt_addr[i] = - dma_pool_alloc(size, aligned, &bus_addr); - if (!queue->virt_addr[i]) + queue->ft[i] = + dma_pool_alloc(sizeof(struct fsl_qdma_cmpd_ft), + RTE_CACHE_LINE_SIZE, &phy_ft); + if (!queue->ft[i]) goto fail; - queue->desc_virt_addr[i] = - dma_pool_alloc(size, aligned, &desc_bus_addr); - if (!queue->desc_virt_addr[i]) { - rte_free(queue->virt_addr[i]); + queue->df[i] = + dma_pool_alloc(sizeof(struct fsl_qdma_df), + RTE_CACHE_LINE_SIZE, &phy_df); + if (!queue->df[i]) { + rte_free(queue->ft[i]); goto fail; } - memset(queue->virt_addr[i], 0, FSL_QDMA_COMMAND_BUFFER_SIZE); - memset(queue->desc_virt_addr[i], 0, - FSL_QDMA_DESCRIPTOR_BUFFER_SIZE); + memset(queue->ft[i], 0, sizeof(struct fsl_qdma_cmpd_ft)); + memset(queue->df[i], 0, sizeof(struct fsl_qdma_df)); - csgf_desc = (struct fsl_qdma_format *)queue->virt_addr[i] + - QDMA_DESC_OFF; - sdf = (struct fsl_qdma_sdf *)queue->desc_virt_addr[i]; - ddf = (struct fsl_qdma_ddf *)sdf + QDMA_DESC_OFF; + ft = queue->ft[i]; + df = queue->df[i]; + sdf = &df->sdf; + ddf = &df->ddf; /* Compound Command Descriptor(Frame List Table) */ - qdma_desc_addr_set64(csgf_desc, des
[v2 17/30] dma/dpaa: qdma stall workaround for ERR050265
Non-prefetchable read setting in the source descriptor may be required for targets other than local memory. Prefetchable read setting will offer better performance for misaligned transfers in the form of fewer transactions and should be set if possible. This patch also fixes QDMA stall issue due to unaligned transactions. Signed-off-by: Gagandeep Singh --- config/arm/meson.build | 3 ++- doc/guides/dmadevs/dpaa.rst | 1 + drivers/dma/dpaa/dpaa_qdma.c | 6 ++ drivers/dma/dpaa/dpaa_qdma.h | 1 + 4 files changed, 10 insertions(+), 1 deletion(-) diff --git a/config/arm/meson.build b/config/arm/meson.build index f81e466318..f63ef41130 100644 --- a/config/arm/meson.build +++ b/config/arm/meson.build @@ -469,7 +469,8 @@ soc_dpaa = { ['RTE_LIBRTE_DPAA2_USE_PHYS_IOVA', false], ['RTE_MAX_LCORE', 16], ['RTE_MAX_NUMA_NODES', 1], - ['RTE_DMA_DPAA_ERRATA_ERR050757', true] + ['RTE_DMA_DPAA_ERRATA_ERR050757', true], + ['RTE_DMA_DPAA_ERRATA_ERR050265', true] ], 'numa': false } diff --git a/doc/guides/dmadevs/dpaa.rst b/doc/guides/dmadevs/dpaa.rst index 746919ec6b..8a7c0befc3 100644 --- a/doc/guides/dmadevs/dpaa.rst +++ b/doc/guides/dmadevs/dpaa.rst @@ -43,6 +43,7 @@ For builds using ``meson`` and ``ninja``, the driver will be built when the target platform is dpaa-based. No additional compilation steps are necessary. - ``RTE_DMA_DPAA_ERRATA_ERR050757`` - enable software workaround for Errata-A050757 +- ``RTE_DMA_DPAA_ERRATA_ERR050265`` - enable software workaround for Errata-A050265 Initialization -- diff --git a/drivers/dma/dpaa/dpaa_qdma.c b/drivers/dma/dpaa/dpaa_qdma.c index a21279293c..f1878879af 100644 --- a/drivers/dma/dpaa/dpaa_qdma.c +++ b/drivers/dma/dpaa/dpaa_qdma.c @@ -179,6 +179,9 @@ fsl_qdma_comp_fill_memcpy(struct fsl_qdma_comp *fsl_comp, sdf = (struct fsl_qdma_sdf *)fsl_comp->desc_virt_addr; sdf->cmd = rte_cpu_to_le_32(FSL_QDMA_CMD_RWTTYPE << FSL_QDMA_CMD_RWTTYPE_OFFSET); +#ifdef RTE_DMA_DPAA_ERRATA_ERR050265 + sdf->cmd |= rte_cpu_to_le_32(FSL_QDMA_CMD_PF); +#endif if (len > FSL_QDMA_CMD_SSS_DISTANCE) { sdf->cmd |= rte_cpu_to_le_32(FSL_QDMA_CMD_SSEN); cfg |= rte_cpu_to_le_32(FSL_QDMA_CMD_SSS_STRIDE << @@ -247,6 +250,9 @@ fsl_qdma_pre_request_enqueue_comp_sd_desc( /* Descriptor Buffer */ sdf->cmd = rte_cpu_to_le_32(FSL_QDMA_CMD_RWTTYPE << FSL_QDMA_CMD_RWTTYPE_OFFSET); +#ifdef RTE_DMA_DPAA_ERRATA_ERR050265 + sdf->cmd |= rte_cpu_to_le_32(FSL_QDMA_CMD_PF); +#endif ddf->cmd = rte_cpu_to_le_32(FSL_QDMA_CMD_RWTTYPE << FSL_QDMA_CMD_RWTTYPE_OFFSET); ddf->cmd |= rte_cpu_to_le_32(FSL_QDMA_CMD_LWC << diff --git a/drivers/dma/dpaa/dpaa_qdma.h b/drivers/dma/dpaa/dpaa_qdma.h index 361f88856b..8cb4042bd0 100644 --- a/drivers/dma/dpaa/dpaa_qdma.h +++ b/drivers/dma/dpaa/dpaa_qdma.h @@ -80,6 +80,7 @@ #define FSL_QDMA_CMD_RWTTYPE_OFFSET28 #define FSL_QDMA_CMD_LWC_OFFSET16 +#define FSL_QDMA_CMD_PFBIT(17) #define FSL_QDMA_CMD_SSEN BIT(19) #define FSL_QDMA_CFG_SSS_OFFSET12 -- 2.25.1
RE: [PATCH v2] net/mlx5: replenish MPRQ buffers for miniCQEs
Hi, > -Original Message- > From: Alexander Kozyrev > Sent: Wednesday, November 1, 2023 15:57 > To: dev@dpdk.org > Cc: Suanming Mou ; Slava Ovsiienko > ; Raslan Darawsheh > Subject: [PATCH v2] net/mlx5: replenish MPRQ buffers for miniCQEs > > Keep unzipping if the next CQE is the miniCQE array in > rxq_cq_decompress_v() routine only for non-MPRQ scenario, MPRQ requires > buffer replenishment between the miniCQEs. > > Restore the check for the initial compressed CQE for SPRQ and check that the > current CQE is not compressed before copying it as a possible title CQE. > > Signed-off-by: Alexander Kozyrev Acked-by: Dariusz Sosnowski Best regards, Dariusz Sosnowski
Re: [PATCH v2] net/vdev: fix insert vdev core dump
On 7/16/2024 11:53 AM, Mingjin Ye wrote: In secondary processes, insert_vdev() may be called multiple times on the same device due to multi-process hot-plugging of the vdev bus and EAL parameters to add the same vdev. In this case, when rte_devargs_insert() is called, the devargs->name reference will be invalidated because rte_devargs_insert() destroys the just-allocated devargs and replaces the pointer from the devargs list. As a result, the reference to devargs->name stored in dev->device.name will be invalid. This patch fixes the issue by setting the device name after calling rte_devargs_insert(). Fixes: cdb068f031c6 ("bus/vdev: scan by multi-process channel") Cc: sta...@dpdk.org Signed-off-by: Mingjin Ye --- v2: Modify commit log. --- Forgot to add my review tag: Reviewed-by: Anatoly Burakov -- Thanks, Anatoly
Re: [V1] app/testpmd: restore VXLAN-GPE support
22/07/2024 11:36, Ferruh Yigit: > On 7/22/2024 8:10 AM, Minggang(Gavin) Li wrote: > > From: Ferruh Yigit > >> On 7/17/2024 8:11 AM, Gavin Li wrote: > >>> VXLAN-GPE support was removed from testpmd recently. Drivers which are > >>> not migrated are still using VXLAN-GPE in tests. > >>> > >>> This commit is to restore the support for VXLAN-GPE in testpmd. > >>> > >>> Fixes: da118115d95c ("app/testpmd: support matching any VXLAN field") > >>> Signed-off-by: Gavin Li > >>> > >> > >> Hi Gavin, > >> > >> The original patch was from you, right? What went wrong? > > The remove of VXLAN-GPE from testpmd is too aggressive since there are > > drivers which are not migrated are still using VXLAN-GPE. It's better to > > keep it till the day to remove the RTE item of VXLAN-GPE from DPDK. > > > > Sorry, I was not clear enough maybe, I was asking more details on the > problem? I remember I was suggesting Gavin to remove testpmd code based on the old API. And he realized it was too much agressive later. > With a net/vxlan commit [1] in this release, (this commit is part of > -rc1), VXLAN & VXLAN-GPE headers combined and VXLAN-GBP header added to > this combined struct. VXLAN-GPE header is marked as deprecated. > Testpmd is also updated to use new headers, that is the commit in the > fixes tag of this patch. > > But drivers using old, now depreciated, VXLAN structs won't able to use > testpmd to test, so I agree, may be I merged the patch too early. Yes, so this patch re-add testpmd code for the old API. > As this patch was part of -rc1, I wonder why we didn't get any complaint > about not able to test VXLAN-GPE? Maybe it is tested only with mlx5. > Btw, if we revert this patch, is there a way to test VXLAN-GBP? Because > it only exists as part of new combined VXLAN struct? > Instead of reverting the commit all together, is there way to keep old > capability in testpmd, but add feature to test VXLAN-GBP? It is not a complete revert, we keep GBP in the new API. > And another issue is, there can still some users of the VXLAN-GPE header > in the net library, perhaps that also deprecated immaturely. It is just marked as deprecated. > Can you please send a deprecation note for combining VXLAN headers and > removing VXLAN-GPE in v24.11? Please CC all drivers implementing this > flow pattern. This can be way to highlight the issue to driver > maintainers and communicate the change with end users. 24.11 is probably too early. I propose to keep it as deprecated for compatibility, and maybe remove in one year?
Re: [PATCH] dts: fix DPDK git tarball cast bug
Reviewed-by: Juraj Linkeš Thanks for catching this. On 19. 7. 2024 17:34, Alex Chapman wrote: When running DTS with a git revision specified the following error occurs: $ ./main.py --revision HEAD [...] FileNotFoundError: [Errno 2] No such file or directory: 'dts/' Fix by casting DPDKGitTarball to Path, in order to stringify it correctly. Bugzilla ID: 1496 Fixes: a23f22457dbd ("dts: constrain DPDK source argument") Signed-off-by: Alex Chapman Reviewed-by: Jack Bond-Preston Reviewed-by: Luca Vizzarro Reviewed-by: Paul Szczepanek --- Hello, Sending this fix to a bug that i discovered. Best Regards, Alex --- .mailmap | 1 + dts/framework/settings.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.mailmap b/.mailmap index 3f3f0442e5..a326e8e553 100644 --- a/.mailmap +++ b/.mailmap @@ -46,6 +46,7 @@ Aleksey Baulin Aleksey Katargin Ales Musil Alessio Igor Bogani +Alex Chapman Alexander Bechikov Alexander Belyakov Alexander Chernavin diff --git a/dts/framework/settings.py b/dts/framework/settings.py index f95876113f..f6303066d4 100644 --- a/dts/framework/settings.py +++ b/dts/framework/settings.py @@ -415,7 +415,7 @@ def get_settings() -> Settings: args = parser.parse_args() if args.dpdk_revision_id: -args.dpdk_tarball_path = DPDKGitTarball(args.dpdk_revision_id, args.output_dir) +args.dpdk_tarball_path = Path(DPDKGitTarball(args.dpdk_revision_id, args.output_dir)) args.test_suites = _process_test_suites(parser, args.test_suites)
Re: [PATCH v3 3/3] dts: mac filter test suite refactored for new dts
> > +This test suite ensures proper and expected behavior of Allowlist filtering > via mac > +addresses on devices bound to the Poll Mode Driver. If a packet received > on a device > +contains a mac address not contained with its mac address pool, the > packet should > +be dropped. Alternatively, if a packet is received that contains a > destination mac > +within the devices address pool, the packet should be accepted and > forwarded. This > +behavior should remain consistent across all packets, namely those > containing dot1q > +tags or otherwise. > This is pretty minor but you might want to change "with its mac address pool" to "within", just to stay consistent with the wording in the rest of the suite > > +received_packets = [ > +packets > +for packets in self.send_packet_and_capture(packet, > adjust_addresses=False) > +if hasattr(packets, "load") and "X" * 22 in str(packets.load) > +] > Also Jeremy said he mentioned Juraj's opinion on using Jeremy's method of turning off adjust addresses, if you end up doing that just make it depend on this patch: patch-1142113 ("add send_packets to test suites and rework packet addressing") Reviewed-by: Dean Marx On Thu, Jul 18, 2024 at 3:40 PM Nicholas Pratte wrote: > The mac address filter test suite, whose test cases are based on old > DTS's test cases, has been refactored to interface with the new DTS > framework. > > In porting over this test suite into the new framework, some > adjustments were made, namely in the EAL and TestPMD parameter provided > before executing the application. While the original test plan was > referenced, by and large, only for the individual test cases, I'll leave > the parameters the original test plan was asking for below for the sake > of discussion: > > --burst=1 --rxpt=0 --rxht=0 --rxwt=0 --txpt=36 --txht=0 --txwt=0 > --txfreet=32 --rxfreet=64 --mbcache=250 --portmask=0x3 > > Bugzilla ID: 1454 > Signed-off-by: Nicholas Pratte > > --- > v2: > * Refactored the address pool capacity tests to use all available >octets in the mac address. > * Change the payload to 'X' characters instead of 'P' characters. > --- > dts/framework/config/conf_yaml_schema.json | 3 +- > dts/tests/TestSuite_mac_filter.py | 223 + > 2 files changed, 225 insertions(+), 1 deletion(-) > create mode 100644 dts/tests/TestSuite_mac_filter.py > > diff --git a/dts/framework/config/conf_yaml_schema.json > b/dts/framework/config/conf_yaml_schema.json > index f02a310bb5..ad1f3757f7 100644 > --- a/dts/framework/config/conf_yaml_schema.json > +++ b/dts/framework/config/conf_yaml_schema.json > @@ -187,7 +187,8 @@ >"enum": [ > "hello_world", > "os_udp", > -"pmd_buffer_scatter" > +"pmd_buffer_scatter", > +"mac_filter" >] > }, > "test_target": { > diff --git a/dts/tests/TestSuite_mac_filter.py > b/dts/tests/TestSuite_mac_filter.py > new file mode 100644 > index 00..53a3331224 > --- /dev/null > +++ b/dts/tests/TestSuite_mac_filter.py > @@ -0,0 +1,223 @@ > +# SPDX-License-Identifier: BSD-3-Clause > +# Copyright(c) 2023-2024 University of New Hampshire > +"""Mac address filtering test suite. > + > +This test suite ensures proper and expected behavior of Allowlist > filtering via mac > +addresses on devices bound to the Poll Mode Driver. If a packet received > on a device > +contains a mac address not contained with its mac address pool, the > packet should > +be dropped. Alternatively, if a packet is received that contains a > destination mac > +within the devices address pool, the packet should be accepted and > forwarded. This > +behavior should remain consistent across all packets, namely those > containing dot1q > +tags or otherwise. > + > +The following test suite assesses behaviors based on the aforementioned > logic. > +Additionally, testing is done within the PMD itself to ensure that the > mac address > +allow list is behaving as expected. > +""" > + > +from time import sleep > + > +from scapy.layers.inet import IP # type: ignore[import-untyped] > +from scapy.layers.l2 import Dot1Q, Ether # type: ignore[import-untyped] > +from scapy.packet import Raw # type: ignore[import-untyped] > + > +from framework.exception import InteractiveCommandExecutionError > +from framework.remote_session.testpmd_shell import TestPmdShell > +from framework.test_suite import TestSuite > + > + > +class TestMacFilter(TestSuite): > +"""Mac address allowlist filtering test suite. > + > +Configure mac address filtering on a given port, and test the port's > filtering behavior > +using both a given port's hardware address as well as dummy > addresses. If a port accepts > +a packet that is not contained within its mac address allowlist, then > a given test case > +fails. Alternatively, if a port drops a packet that is designated > within its mac address > +allowlist, a given test case will fail.
Re: [PATCH v3 2/3] dts: add methods for setting mac and multicast addresses
I think Jeremy already mentioned this but just make sure you update vlan_set_filter_on/off to the new version that has an on boolean arg. Reviewed-by: Dean Marx On Thu, Jul 18, 2024 at 3:12 PM Nicholas Pratte wrote: > Several new methods have been added to TestPMDShell in order to produce > the mac filter's individual test cases: > - set_mac_addr > - set_multicast_mac_addr > - rx_vlan_add > - rx_vlan_rm > - vlan_filter_set_on > - vlan_filter_set_off > - set_promisc > > set_mac_addr and set_multicast_addr were created for the mac filter test > suite, enabling users to both add or remove mac and multicast > addresses based on a boolean 'add or remove' parameter. The success or > failure of each call can be verified if a user deems it necessary. > > The other methods listed are implemented in other respective test > suites, and their implementations have been copied, but are subject to > change; they are not the focus of this patch. > > Bugzilla ID: 1454 > Signed-off-by: Nicholas Pratte > --- > dts/framework/remote_session/testpmd_shell.py | 179 ++ > 1 file changed, 179 insertions(+) > > diff --git a/dts/framework/remote_session/testpmd_shell.py > b/dts/framework/remote_session/testpmd_shell.py > index ec22f72221..8122457ad1 100644 > --- a/dts/framework/remote_session/testpmd_shell.py > +++ b/dts/framework/remote_session/testpmd_shell.py > @@ -767,6 +767,185 @@ def show_port_info(self, port_id: int) -> > TestPmdPort: > > return TestPmdPort.parse(output) > > +def set_mac_addr(self, port_id: int, mac_address: str, add: bool, > verify: bool = True) -> None: > +"""Add or remove a mac address on a given port's Allowlist. > + > +Args: > +port_id: The port ID the mac address is set on. > +mac_address: The mac address to be added or removed to the > specified port. > +add: If :data:`True`, add the specified mac address. If > :data:`False`, remove specified > +mac address. > +verify: If :data:'True', assert that the 'mac_addr' operation > was successful. If > +:data:'False', run the command and skip this assertion. > + > +Raises: > +InteractiveCommandExecutionError: If the set mac address > operation fails. > +""" > +mac_cmd = "add" if add else "remove" > +output = self.send_command(f"mac_addr {mac_cmd} {port_id} > {mac_address}") > +if "Bad arguments" in output: > +self._logger.debug("Invalid argument provided to mac_addr") > +raise InteractiveCommandExecutionError("Invalid argument > provided") > + > +if verify: > +if "mac_addr_cmd error:" in output: > +self._logger.debug(f"Failed to {mac_cmd} {mac_address} on > port {port_id}") > +raise InteractiveCommandExecutionError( > +f"Failed to {mac_cmd} {mac_address} on port {port_id} > \n{output}" > +) > + > +def set_multicast_mac_addr( > +self, port_id: int, multi_addr: str, add: bool, verify: bool = > True > +) -> None: > +"""Add or remove multicast mac address to a specified port's > filter. > + > +Args: > +port_id: The port ID the multicast address is set on. > +multi_addr: The multicast address to be added to the filter. > +add: If :data:'True', add the specified multicast address to > the port filter. > +If :data:'False', remove the specified multicast address > from the port filter. > +verify: If :data:'True', assert that the 'mcast_addr' > operations was successful. > +If :data:'False', execute the 'mcast_addr' operation and > skip the assertion. > + > +Raises: > +InteractiveCommandExecutionError: If either the 'add' or > 'remove' operations fails. > +""" > +mcast_cmd = "add" if add else "remove" > +output = self.send_command(f"mcast_addr {mcast_cmd} {port_id} > {multi_addr}") > +if "Bad arguments" in output: > +self._logger.debug("Invalid arguments provided to mcast_addr") > +raise InteractiveCommandExecutionError("Invalid argument > provided") > + > +if verify: > +if ( > +"Invalid multicast_addr" in output > +or f'multicast address {"already" if add else "not"} > filtered by port' in output > +): > +self._logger.debug(f"Failed to {mcast_cmd} {multi_addr} > on port {port_id}") > +raise InteractiveCommandExecutionError( > +f"Failed to {mcast_cmd} {multi_addr} on port > {port_id} \n{output}" > +) > + > +def rx_vlan_add(self, vlan: int, port: int, verify: bool = True) -> > None: > +"""Add specified vlan tag to the filter list on a port. > + > +Args: > +vlan: The vlan tag to add, should be within 1-1005, 1-4094 > extended. >
Re: [PATCH v2 1/3] dts: add boolean to adjust addresses
Reviewed-by: Dean Marx On Tue, Jul 2, 2024 at 3:25 PM Nicholas Pratte wrote: > Various test cases in the mac filter test suite called for granular > manipulation of destination mac addresses to properly test mac address > filtering functionality. To compensate, there is now an > adjust_addresses boolean which the user can toggle if they wish to send > their own addressing; the boolean is true by default. > > Bugzilla ID: 1454 > Signed-off-by: Nicholas Pratte > --- > dts/framework/test_suite.py | 7 ++- > 1 file changed, 6 insertions(+), 1 deletion(-) > > diff --git a/dts/framework/test_suite.py b/dts/framework/test_suite.py > index 694b2eba65..551a587525 100644 > --- a/dts/framework/test_suite.py > +++ b/dts/framework/test_suite.py > @@ -185,6 +185,7 @@ def send_packet_and_capture( > packet: Packet, > filter_config: PacketFilteringConfig = PacketFilteringConfig(), > duration: float = 1, > +adjust_addresses: bool = True, > ) -> list[Packet]: > """Send and receive `packet` using the associated TG. > > @@ -195,11 +196,15 @@ def send_packet_and_capture( > packet: The packet to send. > filter_config: The filter to use when capturing packets. > duration: Capture traffic for this amount of time after > sending `packet`. > +adjust_addresses: If :data:'True', adjust addresses of the > egressing packet with > +a default addressing scheme. If :data:'False', do not > adjust the addresses of > +egressing packet. > > Returns: > A list of received packets. > """ > -packet = self._adjust_addresses(packet) > +if adjust_addresses: > +packet = self._adjust_addresses(packet) > return self.tg_node.send_packet_and_capture( > packet, > self._tg_port_egress, > -- > 2.44.0 > >
Re: [PATCH v2] ethdev: fix device init without socket-local memory
On 7/22/2024 11:02 AM, Bruce Richardson wrote: > When allocating memory for an ethdev, the rte_malloc_socket call used > only allocates memory on the NUMA node/socket local to the device. This > means that even if the user wanted to, they could never use a remote NIC > without also having memory on that NIC's socket. > > For example, if we change examples/skeleton/basicfwd.c to have > SOCKET_ID_ANY as the socket_id parameter for Rx and Tx rings, we should > be able to run the app cross-numa e.g. as below, where the two PCI > devices are on socket 1, and core 1 is on socket 0: > > ./build/examples/dpdk-skeleton -l 1 --legacy-mem --socket-mem=1024,0 \ > -a a8:00.0 -a b8:00.0 > > This fails however, with the error: > > ETHDEV: failed to allocate private data > PCI_BUS: Requested device :a8:00.0 cannot be used > > We can remove this restriction by doing a fallback call to general > rte_malloc after a call to rte_malloc_socket fails. This should be safe > to do because the later ethdev calls to setup Rx/Tx queues all take a > socket_id parameter, which can be used by applications to enforce the > requirement for local-only memory for a device, if so desired. [If > device-local memory is present it will be used as before, while if not > present the rte_eth_dev_configure call will now pass, but the subsequent > queue setup calls requesting local memory will fail]. > > Fixes: e489007a411c ("ethdev: add generic create/destroy ethdev APIs") > Fixes: dcd5c8112bc3 ("ethdev: add PCI driver helpers") > Cc: sta...@dpdk.org > > Signed-off-by: Bruce Richardson > Signed-off-by: Padraig Connolly > Reviewed-by: Ferruh Yigit Applied to dpdk-next-net/main, thanks.
Re: [PATCH] app/testpmd: fix build on signed comparison
On 7/22/2024 11:52 AM, Ferruh Yigit wrote: > Build error: > .../app/test-pmd/config.c: In function 'icmp_echo_config_setup': > .../app/test-pmd/config.c:5159:30: >error: comparison between signed and unsigned integer expressions > [-Werror=sign-compare] > if ((nb_txq * nb_fwd_ports) < nb_fwd_lcores) > ^ > All 'nb_txq', 'nb_fwd_ports' & 'nb_fwd_lcores' are unsigned variables, > but the warning is related to the integer promotion rules of C: > 'nb_txq' -> uint16_t, promoted to 'int' > 'nb_fwd_ports' -> uint16_t, promoted to 'int' > (nb_txq * nb_fwd_ports) -> result 'int' > nb_fwd_lcores -> 'uint32_t' > Ends up comparing 'int' vs 'uint32_t'. > > Fixing by adding the casting back which was initially part of the patch. > > Fixes: 2bf44dd14fa5 ("app/testpmd: fix lcore ID restriction") > Cc: sta...@dpdk.org > > Reported-by: Raslan Darawsheh > Signed-off-by: Ferruh Yigit > Hi Raslan, Ali, If you can test the patch, as I can't reproduce the build error, I can quickly merge the fix for -rc3. Thanks, ferruh
Re: [V1] app/testpmd: restore VXLAN-GPE support
On 7/22/2024 2:04 PM, Thomas Monjalon wrote: > 22/07/2024 11:36, Ferruh Yigit: >> On 7/22/2024 8:10 AM, Minggang(Gavin) Li wrote: >>> From: Ferruh Yigit On 7/17/2024 8:11 AM, Gavin Li wrote: > VXLAN-GPE support was removed from testpmd recently. Drivers which are > not migrated are still using VXLAN-GPE in tests. > > This commit is to restore the support for VXLAN-GPE in testpmd. > > Fixes: da118115d95c ("app/testpmd: support matching any VXLAN field") > Signed-off-by: Gavin Li > Hi Gavin, The original patch was from you, right? What went wrong? >>> The remove of VXLAN-GPE from testpmd is too aggressive since there are >>> drivers which are not migrated are still using VXLAN-GPE. It's better to >>> keep it till the day to remove the RTE item of VXLAN-GPE from DPDK. >>> >> >> Sorry, I was not clear enough maybe, I was asking more details on the >> problem? > > I remember I was suggesting Gavin to remove testpmd code based on the old API. > And he realized it was too much agressive later. > >> With a net/vxlan commit [1] in this release, (this commit is part of >> -rc1), VXLAN & VXLAN-GPE headers combined and VXLAN-GBP header added to >> this combined struct. VXLAN-GPE header is marked as deprecated. >> Testpmd is also updated to use new headers, that is the commit in the >> fixes tag of this patch. >> >> But drivers using old, now depreciated, VXLAN structs won't able to use >> testpmd to test, so I agree, may be I merged the patch too early. > > Yes, so this patch re-add testpmd code for the old API. > >> As this patch was part of -rc1, I wonder why we didn't get any complaint >> about not able to test VXLAN-GPE? > > Maybe it is tested only with mlx5. > >> Btw, if we revert this patch, is there a way to test VXLAN-GBP? Because >> it only exists as part of new combined VXLAN struct? >> Instead of reverting the commit all together, is there way to keep old >> capability in testpmd, but add feature to test VXLAN-GBP? > > It is not a complete revert, we keep GBP in the new API. > So what I was asking done already, let me check the patch again. >> And another issue is, there can still some users of the VXLAN-GPE header >> in the net library, perhaps that also deprecated immaturely. > > It is just marked as deprecated. > >> Can you please send a deprecation note for combining VXLAN headers and >> removing VXLAN-GPE in v24.11? Please CC all drivers implementing this >> flow pattern. This can be way to highlight the issue to driver >> maintainers and communicate the change with end users. > > 24.11 is probably too early. > I propose to keep it as deprecated for compatibility, > and maybe remove in one year? > No strong opinion, both v24.11 or v25.11 is OK. Normally I would go with v25.11, but for VXLAN I expect only small users will be impacted and only a few driver needs update, so perhaps v24.11 also can be a valid target. We can get comment from driver maintainers to the deprecation notice, but not able to get this from potential users.
[PATCH v5 1/3] net/ice: fix possible memory leak
This patch fixes possible memory leak inside the ice_hash_parse_raw_pattern() due to the lack of a call to rte_free() for previously allocated pkt_buf and msk_buf. Fixes: 1b9c68120a1c ("net/ice: enable protocol agnostic flow offloading in RSS") Cc: sta...@dpdk.org Reported-by: Michael Theodore Stolarchuk Signed-off-by: Vladimir Medvedkin --- drivers/net/ice/ice_hash.c | 22 ++ 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/drivers/net/ice/ice_hash.c b/drivers/net/ice/ice_hash.c index f923641533..6b3095e2c5 100644 --- a/drivers/net/ice/ice_hash.c +++ b/drivers/net/ice/ice_hash.c @@ -650,7 +650,7 @@ ice_hash_parse_raw_pattern(struct ice_adapter *ad, uint8_t *pkt_buf, *msk_buf; uint8_t tmp_val = 0; uint8_t tmp_c = 0; - int i, j; + int i, j, ret = 0; if (ad->psr == NULL) return -rte_errno; @@ -670,8 +670,10 @@ ice_hash_parse_raw_pattern(struct ice_adapter *ad, return -ENOMEM; msk_buf = rte_zmalloc(NULL, pkt_len, 0); - if (!msk_buf) + if (!msk_buf) { + rte_free(pkt_buf); return -ENOMEM; + } /* convert string to int array */ for (i = 0, j = 0; i < spec_len; i += 2, j++) { @@ -708,18 +710,22 @@ ice_hash_parse_raw_pattern(struct ice_adapter *ad, msk_buf[j] = tmp_val * 16 + tmp_c - '0'; } - if (ice_parser_run(ad->psr, pkt_buf, pkt_len, &rslt)) - return -rte_errno; + ret = ice_parser_run(ad->psr, pkt_buf, pkt_len, &rslt); + if (ret) + goto free_mem; - if (ice_parser_profile_init(&rslt, pkt_buf, msk_buf, - pkt_len, ICE_BLK_RSS, true, &prof)) - return -rte_errno; + ret = ice_parser_profile_init(&rslt, pkt_buf, msk_buf, + pkt_len, ICE_BLK_RSS, true, &prof); + if (ret) + goto free_mem; rte_memcpy(&meta->raw.prof, &prof, sizeof(prof)); +free_mem: rte_free(pkt_buf); rte_free(msk_buf); - return 0; + + return ret; } static void -- 2.34.1
[PATCH v5 2/3] net/ice: refactor raw pattern parsing function
Replace strlen with more secure strnlen in ice_hash_parse_raw_pattern. Signed-off-by: Vladimir Medvedkin tmp --- drivers/net/ice/ice_hash.c | 11 +++ 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/net/ice/ice_hash.c b/drivers/net/ice/ice_hash.c index 6b3095e2c5..aa76718313 100644 --- a/drivers/net/ice/ice_hash.c +++ b/drivers/net/ice/ice_hash.c @@ -658,10 +658,13 @@ ice_hash_parse_raw_pattern(struct ice_adapter *ad, raw_spec = item->spec; raw_mask = item->mask; - spec_len = strlen((char *)(uintptr_t)raw_spec->pattern); - if (strlen((char *)(uintptr_t)raw_mask->pattern) != - spec_len) - return -rte_errno; + spec_len = strnlen((char *)(uintptr_t)raw_spec->pattern, + raw_spec->length + 1); + if (spec_len != raw_spec->length) + return -EINVAL; + if (strnlen((char *)(uintptr_t)raw_mask->pattern, raw_spec->length + 1) != + spec_len) + return -EINVAL; pkt_len = spec_len / 2; -- 2.34.1
[PATCH v5 3/3] net/ice: fix return value for raw pattern parsing function
If the parser was not initialized when calling ice_hash_parse_raw_pattern() -rte_errno was returned. Replace returning rte_errno with ENOTSUP since rte_errno is meaningless in the context of ice_hash_parse_raw_pattern(). Fixes: 1b9c68120a1c ("net/ice: enable protocol agnostic flow offloading in RSS") Cc: sta...@dpdk.org Signed-off-by: Vladimir Medvedkin --- drivers/net/ice/ice_hash.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ice/ice_hash.c b/drivers/net/ice/ice_hash.c index aa76718313..b720e0f755 100644 --- a/drivers/net/ice/ice_hash.c +++ b/drivers/net/ice/ice_hash.c @@ -653,7 +653,7 @@ ice_hash_parse_raw_pattern(struct ice_adapter *ad, int i, j, ret = 0; if (ad->psr == NULL) - return -rte_errno; + return -ENOTSUP; raw_spec = item->spec; raw_mask = item->mask; -- 2.34.1
RE: [PATCH v4 2/3] net/ice: refactor raw pattern parsing function
-Original Message- From: Richardson, Bruce Sent: Monday, July 22, 2024 12:25 PM To: Medvedkin, Vladimir Cc: dev@dpdk.org Subject: Re: [PATCH v4 2/3] net/ice: refactor raw pattern parsing function On Mon, Jul 22, 2024 at 10:59:49AM +, Vladimir Medvedkin wrote: > Replace strlen with more secure strnlen in ice_hash_parse_raw_pattern. > > Signed-off-by: Vladimir Medvedkin > --- > drivers/net/ice/ice_hash.c | 6 +++--- > 1 file changed, 3 insertions(+), 3 deletions(-) > > diff --git a/drivers/net/ice/ice_hash.c b/drivers/net/ice/ice_hash.c > index 6b3095e2c5..506ea261e8 100644 > --- a/drivers/net/ice/ice_hash.c > +++ b/drivers/net/ice/ice_hash.c > @@ -658,9 +658,9 @@ ice_hash_parse_raw_pattern(struct ice_adapter *ad, > raw_spec = item->spec; > raw_mask = item->mask; > > - spec_len = strlen((char *)(uintptr_t)raw_spec->pattern); > - if (strlen((char *)(uintptr_t)raw_mask->pattern) != > - spec_len) > + spec_len = strnlen((char *)(uintptr_t)raw_spec->pattern, > raw_spec->length); > + if (strnlen((char *)(uintptr_t)raw_mask->pattern, raw_spec->length) != > + spec_len) Are we missing something by not checking the return values from the length calls for overflow? If spec_len == raw_spec->length, then we have an overflow, and if raw_mask similarly overflows the comparison would still pass and not flag an error. Fixed in v5 /Bruce > return -rte_errno; > > pkt_len = spec_len / 2; > -- > 2.34.1 >
Re: [PATCH] net/gve: Update TX queue state
On 7/22/2024 12:10 PM, Tathagat Priyadarshi wrote: > Fixing Typo in updating the TX queue state > > Signed-off-by: Tathagat Priyadarshi > Good catch, thanks Tathagat. I think impact is more than just typo, so let me update the patch title as: net/gve: fix Tx queue state on queue start Also needs fixes tag: Fixes: b044845bb015 ("net/gve: support queue start/stop") Cc: sta...@dpdk.org With above changes, Reviewed-by: Ferruh Yigit Applied to dpdk-next-net/main, thanks. > --- > drivers/net/gve/gve_tx.c | 2 +- > drivers/net/gve/gve_tx_dqo.c | 2 +- > 2 files changed, 2 insertions(+), 2 deletions(-) > > diff --git a/drivers/net/gve/gve_tx.c b/drivers/net/gve/gve_tx.c > index 70d3ef0..500ae31 100644 > --- a/drivers/net/gve/gve_tx.c > +++ b/drivers/net/gve/gve_tx.c > @@ -688,7 +688,7 @@ > > rte_write32(rte_cpu_to_be_32(GVE_IRQ_MASK), txq->ntfy_addr); > > - dev->data->rx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED; > + dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED; > > return 0; > } > diff --git a/drivers/net/gve/gve_tx_dqo.c b/drivers/net/gve/gve_tx_dqo.c > index a65e6aa..1b85557 100644 > --- a/drivers/net/gve/gve_tx_dqo.c > +++ b/drivers/net/gve/gve_tx_dqo.c > @@ -392,7 +392,7 @@ > > rte_write32(rte_cpu_to_be_32(GVE_IRQ_MASK), txq->ntfy_addr); > > - dev->data->rx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED; > + dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED; > > return 0; > }
Re: [PATCH] net/gve: Update TX queue state
Thanks Ferruh. On Mon, Jul 22, 2024 at 7:41 PM Ferruh Yigit wrote: > On 7/22/2024 12:10 PM, Tathagat Priyadarshi wrote: > > Fixing Typo in updating the TX queue state > > > > Signed-off-by: Tathagat Priyadarshi > > > > Good catch, thanks Tathagat. > > I think impact is more than just typo, so let me update the patch title as: > > net/gve: fix Tx queue state on queue start > > Also needs fixes tag: > Fixes: b044845bb015 ("net/gve: support queue start/stop") > Cc: sta...@dpdk.org > > With above changes, > Reviewed-by: Ferruh Yigit > > Applied to dpdk-next-net/main, thanks. > > > --- > > drivers/net/gve/gve_tx.c | 2 +- > > drivers/net/gve/gve_tx_dqo.c | 2 +- > > 2 files changed, 2 insertions(+), 2 deletions(-) > > > > diff --git a/drivers/net/gve/gve_tx.c b/drivers/net/gve/gve_tx.c > > index 70d3ef0..500ae31 100644 > > --- a/drivers/net/gve/gve_tx.c > > +++ b/drivers/net/gve/gve_tx.c > > @@ -688,7 +688,7 @@ > > > > rte_write32(rte_cpu_to_be_32(GVE_IRQ_MASK), txq->ntfy_addr); > > > > - dev->data->rx_queue_state[tx_queue_id] = > RTE_ETH_QUEUE_STATE_STARTED; > > + dev->data->tx_queue_state[tx_queue_id] = > RTE_ETH_QUEUE_STATE_STARTED; > > > > return 0; > > } > > diff --git a/drivers/net/gve/gve_tx_dqo.c b/drivers/net/gve/gve_tx_dqo.c > > index a65e6aa..1b85557 100644 > > --- a/drivers/net/gve/gve_tx_dqo.c > > +++ b/drivers/net/gve/gve_tx_dqo.c > > @@ -392,7 +392,7 @@ > > > > rte_write32(rte_cpu_to_be_32(GVE_IRQ_MASK), txq->ntfy_addr); > > > > - dev->data->rx_queue_state[tx_queue_id] = > RTE_ETH_QUEUE_STATE_STARTED; > > + dev->data->tx_queue_state[tx_queue_id] = > RTE_ETH_QUEUE_STATE_STARTED; > > > > return 0; > > } > >
Re: [V1] app/testpmd: restore VXLAN-GPE support
On 7/17/2024 8:11 AM, Gavin Li wrote: > diff --git a/app/test-pmd/cmdline_flow.c b/app/test-pmd/cmdline_flow.c > index a76b44bf39..51a8d4993e 100644 > --- a/app/test-pmd/cmdline_flow.c > +++ b/app/test-pmd/cmdline_flow.c > @@ -423,6 +423,12 @@ enum index { > ITEM_GENEVE_VNI, > ITEM_GENEVE_PROTO, > ITEM_GENEVE_OPTLEN, > + ITEM_VXLAN_GPE, > + ITEM_VXLAN_GPE_VNI, > + ITEM_VXLAN_GPE_PROTOCOL, > + ITEM_VXLAN_GPE_FLAGS, > + ITEM_VXLAN_GPE_RSVD0, > + ITEM_VXLAN_GPE_RSVD1, > With this addition we have both of following enum items, right? 'ITEM_VXLAN_GPE_PROTOCOL' 'ITEM_VXLAN_GPE_PROTO' 'ITEM_VXLAN_GPE_PROTOCOL' is for the old usage, which was previously 'ITEM_VXLAN_GPE_PROTO'. And 'ITEM_VXLAN_GPE_PROTO' is now for the new usage. This is confusing, and looks like it may live with us for a while if we remove them on v25.11. Does it make sense to keep 'ITEM_VXLAN_GPE_PROTO' as it is, add new one with a name that is more obvious that it is for new VXLAN struct, and some more comment to explain the reasoning of this redundant enum items? As these are testpmd internal, when old VXLAN structs removed, I assume we can easily rename new enum item back to 'ITEM_VXLAN_GPE_PROTO'. What do you think?
[PATCH] doc: announce cryptodev change to support EDDSA
Announce the additions in cryptodev ABI to support EDDSA algorithm. Signed-off-by: Gowrishankar Muthukrishnan -- RFC: https://patches.dpdk.org/project/dpdk/patch/0ae6a1afadac64050d80b0fd7712c4a6a8599e2c.1701273963.git.gmuthukri...@marvell.com/ --- doc/guides/rel_notes/deprecation.rst | 4 1 file changed, 4 insertions(+) diff --git a/doc/guides/rel_notes/deprecation.rst b/doc/guides/rel_notes/deprecation.rst index 6948641ff6..fcbec965b1 100644 --- a/doc/guides/rel_notes/deprecation.rst +++ b/doc/guides/rel_notes/deprecation.rst @@ -147,3 +147,7 @@ Deprecation Notices will be deprecated and subsequently removed in DPDK 24.11 release. Before this, the new port library API (functions rte_swx_port_*) will gradually transition from experimental to stable status. + +* cryptodev: The enum ``rte_crypto_asym_xform_type`` and struct ``rte_crypto_asym_op`` + will be extended to include new values to support EDDSA. This will break + ABI compatibility with existing applications that use these data types. -- 2.21.0
Re: [PATCH] lib: add get/set link settings interface
On 4/4/2024 12:38 AM, Stephen Hemminger wrote: > On Thu, 4 Apr 2024 00:08:41 +0200 > Marek Pazdan wrote: > >> I can remove this part (rte_eth_config), but the new API provides other >> link information which is not available from the existing API, like >> supported/advertising/partner_advertising link modes. >> Additionally from what I see, rte_eth_confg is available from >> rte_eth_dev_conf_get API and it's copy of requested configuration: >> `memcpy(dev_conf, &dev->data->dev_conf, sizeof(struct rte_eth_conf));` > > > Then the new info should go in the rte_eth_config in a future version > of DPDK. Having bits in different places makes the API confusing. > Hi Marek, Not able to get enough justification for the new set of ethdev APIs for the link. It can be an option to extend existing APIs, and have a more focused API for the missing part. This depends on what is missing. I am marking this patch and two relevant patches [1] as change requested, if the functionality is still relevant please start with a new RFC in next release with one of the above approaches to enable discussion again. Thanks, ferruh [1] - https://patches.dpdk.org/project/dpdk/patch/20240403135953.7209-1-mpaz...@arista.com/ - https://patches.dpdk.org/project/dpdk/patch/20240403225333.16260-1-mpaz...@arista.com/
[PATCH] doc: announce cryptodev changes to offload RSA in VirtIO
Announce cryptodev changes to offload RSA asymmetric operation in VirtIO PMD. Signed-off-by: Gowrishankar Muthukrishnan -- RFC: https://patches.dpdk.org/project/dpdk/patch/20230928095300.1353-2-gmuthukri...@marvell.com/ https://patches.dpdk.org/project/dpdk/patch/20230928095300.1353-3-gmuthukri...@marvell.com/ --- doc/guides/rel_notes/deprecation.rst | 11 +++ 1 file changed, 11 insertions(+) diff --git a/doc/guides/rel_notes/deprecation.rst b/doc/guides/rel_notes/deprecation.rst index 6948641ff6..26fec84aba 100644 --- a/doc/guides/rel_notes/deprecation.rst +++ b/doc/guides/rel_notes/deprecation.rst @@ -147,3 +147,14 @@ Deprecation Notices will be deprecated and subsequently removed in DPDK 24.11 release. Before this, the new port library API (functions rte_swx_port_*) will gradually transition from experimental to stable status. + +* cryptodev: The struct rte_crypto_rsa_padding will be moved from + rte_crypto_rsa_op_param struct to rte_crypto_rsa_xform struct, + breaking ABI. The new location is recommended to comply with + virtio-crypto specification. Applications and drivers using + this struct will be updated. + +* cryptodev: The rte_crypto_rsa_xform struct member to hold private key + in either exponent or quintuple format is changed from union to struct + data type. This change is to support ASN.1 syntax (RFC 3447 Appendix A.1.2). + This change will not break existing applications. -- 2.21.0
[PATCH] doc: announce vhost changes to support asymmetric operation
Announce vhost ABI changes to modify few functions to support asymmetric crypto operation. Signed-off-by: Gowrishankar Muthukrishnan -- RFC: https://patches.dpdk.org/project/dpdk/patch/20230928095300.1353-4-gmuthukri...@marvell.com/ --- doc/guides/rel_notes/deprecation.rst | 7 +++ 1 file changed, 7 insertions(+) diff --git a/doc/guides/rel_notes/deprecation.rst b/doc/guides/rel_notes/deprecation.rst index 6948641ff6..2f5c2c5a34 100644 --- a/doc/guides/rel_notes/deprecation.rst +++ b/doc/guides/rel_notes/deprecation.rst @@ -147,3 +147,10 @@ Deprecation Notices will be deprecated and subsequently removed in DPDK 24.11 release. Before this, the new port library API (functions rte_swx_port_*) will gradually transition from experimental to stable status. + +* vhost: The function ``rte_vhost_crypto_create`` will accept a new parameter + to specify rte_mempool for asymmetric crypto session. The function + ``rte_vhost_crypto_finalize_requests`` will accept two new parameters, + where the first one is to specify vhost device id and other one is to specify + the virtio queue index. These two modifications are required to support + asymmetric crypto operation in vhost crypto and will break ABI. -- 2.21.0
Re: [PATCH v2] net/mlx5: replenish MPRQ buffers for miniCQEs
Hi, From: Alexander Kozyrev Sent: Wednesday, November 1, 2023 4:57 PM To: dev@dpdk.org Cc: Suanming Mou; Slava Ovsiienko; Raslan Darawsheh Subject: [PATCH v2] net/mlx5: replenish MPRQ buffers for miniCQEs Keep unzipping if the next CQE is the miniCQE array in rxq_cq_decompress_v() routine only for non-MPRQ scenario, MPRQ requires buffer replenishment between the miniCQEs. Restore the check for the initial compressed CQE for SPRQ and check that the current CQE is not compressed before copying it as a possible title CQE. Signed-off-by: Alexander Kozyrev Patch applied to next-net-mlx, Kindest regards, Raslan Darawsheh
Re: [PATCH v5 1/3] net/ice: fix possible memory leak
On Mon, Jul 22, 2024 at 01:50:44PM +, Vladimir Medvedkin wrote: > This patch fixes possible memory leak inside the > ice_hash_parse_raw_pattern() due to the lack of a call to rte_free() > for previously allocated pkt_buf and msk_buf. > > Fixes: 1b9c68120a1c ("net/ice: enable protocol agnostic flow offloading in > RSS") > Cc: sta...@dpdk.org > > Reported-by: Michael Theodore Stolarchuk > Signed-off-by: Vladimir Medvedkin Acked-by: Bruce Richardson
Re: [PATCH v5 2/3] net/ice: refactor raw pattern parsing function
On Mon, Jul 22, 2024 at 01:50:45PM +, Vladimir Medvedkin wrote: > Replace strlen with more secure strnlen in ice_hash_parse_raw_pattern. > > Signed-off-by: Vladimir Medvedkin > I believe there are quite a number of other small things in this function that could do with improvement e.g. the processing of the hex strings has no error checks for reporting invalid (i.e. non-hex) characters. However, this patch does improve things a bit by enhancing the length checks, so Acked-by: Bruce Richardson > --- > drivers/net/ice/ice_hash.c | 11 +++ > 1 file changed, 7 insertions(+), 4 deletions(-) > > diff --git a/drivers/net/ice/ice_hash.c b/drivers/net/ice/ice_hash.c > index 6b3095e2c5..aa76718313 100644 > --- a/drivers/net/ice/ice_hash.c > +++ b/drivers/net/ice/ice_hash.c > @@ -658,10 +658,13 @@ ice_hash_parse_raw_pattern(struct ice_adapter *ad, > raw_spec = item->spec; > raw_mask = item->mask; > > - spec_len = strlen((char *)(uintptr_t)raw_spec->pattern); > - if (strlen((char *)(uintptr_t)raw_mask->pattern) != > - spec_len) > - return -rte_errno; > + spec_len = strnlen((char *)(uintptr_t)raw_spec->pattern, > + raw_spec->length + 1); > + if (spec_len != raw_spec->length) > + return -EINVAL; > + if (strnlen((char *)(uintptr_t)raw_mask->pattern, raw_spec->length + 1) > != > + spec_len) > + return -EINVAL; > > pkt_len = spec_len / 2; > > -- > 2.34.1 >
Re: [PATCH v5 3/3] net/ice: fix return value for raw pattern parsing function
On Mon, Jul 22, 2024 at 01:50:46PM +, Vladimir Medvedkin wrote: > If the parser was not initialized when calling ice_hash_parse_raw_pattern() > -rte_errno was returned. Replace returning rte_errno with ENOTSUP since > rte_errno is meaningless in the context of ice_hash_parse_raw_pattern(). > > Fixes: 1b9c68120a1c ("net/ice: enable protocol agnostic flow offloading in > RSS") > Cc: sta...@dpdk.org > > Signed-off-by: Vladimir Medvedkin Acked-by: Bruce Richardson > --- > drivers/net/ice/ice_hash.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/drivers/net/ice/ice_hash.c b/drivers/net/ice/ice_hash.c > index aa76718313..b720e0f755 100644 > --- a/drivers/net/ice/ice_hash.c > +++ b/drivers/net/ice/ice_hash.c > @@ -653,7 +653,7 @@ ice_hash_parse_raw_pattern(struct ice_adapter *ad, > int i, j, ret = 0; > > if (ad->psr == NULL) > - return -rte_errno; > + return -ENOTSUP; > > raw_spec = item->spec; > raw_mask = item->mask; > -- > 2.34.1 >
Re: [PATCH v5 1/3] net/ice: fix possible memory leak
On Mon, Jul 22, 2024 at 04:09:15PM +0100, Bruce Richardson wrote: > On Mon, Jul 22, 2024 at 01:50:44PM +, Vladimir Medvedkin wrote: > > This patch fixes possible memory leak inside the > > ice_hash_parse_raw_pattern() due to the lack of a call to rte_free() > > for previously allocated pkt_buf and msk_buf. > > > > Fixes: 1b9c68120a1c ("net/ice: enable protocol agnostic flow offloading in > > RSS") > > Cc: sta...@dpdk.org > > > > Reported-by: Michael Theodore Stolarchuk > > Signed-off-by: Vladimir Medvedkin > Acked-by: Bruce Richardson > Series applied to dpdk-next-net-intel. Thanks, /Bruce
[PATCH v2] power: fix number of uncore freqs
The number of uncore frequencies was defined in three places, and two of them were too small leading to test failures. All places should be using RTE_MAX_UNCORE_FREQS. Bugzilla ID: 1499 Fixes: 60b8a661a957 ("power: add Intel uncore frequency control") Signed-off-by: Stephen Hemminger --- v2 - drop the debug log changes app/test/test_power_intel_uncore.c | 4 +--- lib/power/power_intel_uncore.c | 5 ++--- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/app/test/test_power_intel_uncore.c b/app/test/test_power_intel_uncore.c index 80b45ce46e..049658627d 100644 --- a/app/test/test_power_intel_uncore.c +++ b/app/test/test_power_intel_uncore.c @@ -17,14 +17,12 @@ test_power_intel_uncore(void) #include #include -#define MAX_UNCORE_FREQS 32 - #define VALID_PKG 0 #define VALID_DIE 0 #define INVALID_PKG (rte_power_uncore_get_num_pkgs() + 1) #define INVALID_DIE (rte_power_uncore_get_num_dies(VALID_PKG) + 1) #define VALID_INDEX 1 -#define INVALID_INDEX (MAX_UNCORE_FREQS + 1) +#define INVALID_INDEX (RTE_MAX_UNCORE_FREQS + 1) static int check_power_uncore_init(void) { diff --git a/lib/power/power_intel_uncore.c b/lib/power/power_intel_uncore.c index 9c152e4ed2..0090ddd374 100644 --- a/lib/power/power_intel_uncore.c +++ b/lib/power/power_intel_uncore.c @@ -11,7 +11,6 @@ #include "power_intel_uncore.h" #include "power_common.h" -#define MAX_UNCORE_FREQS 32 #define MAX_NUMA_DIE 8 #define BUS_FREQ 10 #define FILTER_LENGTH 18 @@ -32,7 +31,7 @@ struct __rte_cache_aligned uncore_power_info { unsigned int die; /* Core die id */ unsigned int pkg; /* Package id */ - uint32_t freqs[MAX_UNCORE_FREQS]; /* Frequency array */ + uint32_t freqs[RTE_MAX_UNCORE_FREQS]; /* Frequency array */ uint32_t nb_freqs; /* Number of available freqs */ FILE *f_cur_min; /* FD of scaling_min */ FILE *f_cur_max; /* FD of scaling_max */ @@ -221,7 +220,7 @@ power_get_available_uncore_freqs(struct uncore_power_info *ui) uint32_t i, num_uncore_freqs = 0; num_uncore_freqs = (ui->init_max_freq - ui->init_min_freq) / BUS_FREQ + 1; - if (num_uncore_freqs >= MAX_UNCORE_FREQS) { + if (num_uncore_freqs >= RTE_MAX_UNCORE_FREQS) { POWER_LOG(ERR, "Too many available uncore frequencies: %d", num_uncore_freqs); goto out; -- 2.43.0
Re: [PATCH] app/testpmd: fix build on signed comparison
On 7/22/2024 11:52 AM, Ferruh Yigit wrote: > Build error: > .../app/test-pmd/config.c: In function 'icmp_echo_config_setup': > .../app/test-pmd/config.c:5159:30: >error: comparison between signed and unsigned integer expressions > [-Werror=sign-compare] > if ((nb_txq * nb_fwd_ports) < nb_fwd_lcores) > ^ > All 'nb_txq', 'nb_fwd_ports' & 'nb_fwd_lcores' are unsigned variables, > but the warning is related to the integer promotion rules of C: > 'nb_txq' -> uint16_t, promoted to 'int' > 'nb_fwd_ports' -> uint16_t, promoted to 'int' > (nb_txq * nb_fwd_ports) -> result 'int' > nb_fwd_lcores -> 'uint32_t' > Ends up comparing 'int' vs 'uint32_t'. > > Fixing by adding the casting back which was initially part of the patch. > > Fixes: 2bf44dd14fa5 ("app/testpmd: fix lcore ID restriction") > Cc: sta...@dpdk.org > > Reported-by: Raslan Darawsheh > Signed-off-by: Ferruh Yigit > Recheck-request: iol-unit-amd64-testing
[v3 00/30] NXP DMA driver fixes and Enhancements
V3 changes: * fix 32 bit compilation issue V2 changes: * fix compilation issue on ubuntu 22.04 Gagandeep Singh (6): dma/dpaa: support multi channels dma/dpaa: fix job enqueue dma/dpaa: add workaround for ERR050757 dma/dpaa: qdma stall workaround for ERR050265 dma/dpaa: remove unwanted desc dma/dpaa: data path optimization Hemant Agrawal (1): bus/dpaa: add port bmi stats Jun Yang (22): dma/dpaa2: configure route by port by PCIe port param dma/dpaa2: support multiple HW queues dma/dpaa2: adapt DMA driver API dma/dpaa2: multiple process support dma/dpaa2: add sanity check for SG entry dma/dpaa2: include DPAA2 specific header files dma/dpaa2: borrow flags of DMA operation to pass job context bus/fslmc: enhance the qbman dq storage logic dma/dpaa2: add short FD support dma/dpaa2: limit the max descriptor number dma/dpaa2: change the DMA copy return value dma/dpaa2: move the qdma header to common place dma/dpaa: refactor driver dma/dpaa: dequeue status queue dma/dpaa: add Scatter Gather support dma/dpaa: block dequeue dma/dpaa: improve congestion handling dma/dpaa: disable SG descriptor as default dma/dpaa: improve ERRATA workaround solution dma/dpaa: improve silent mode support dma/dpaa: support multiple SG copies dma/dpaa: support max SG entry size Vanshika Shukla (1): dma/dpaa: add burst capacity API config/arm/meson.build|4 +- doc/api/doxy-api-index.md |2 +- doc/api/doxy-api.conf.in |2 +- doc/guides/dmadevs/dpaa.rst |3 + drivers/bus/dpaa/base/fman/fman_hw.c | 65 +- drivers/bus/dpaa/include/fman.h |4 +- drivers/bus/dpaa/include/fsl_fman.h | 12 + drivers/bus/dpaa/version.map |4 + drivers/bus/fslmc/portal/dpaa2_hw_dpci.c | 25 +- drivers/bus/fslmc/portal/dpaa2_hw_dpio.c |7 +- drivers/bus/fslmc/portal/dpaa2_hw_pvt.h | 38 +- .../bus/fslmc/qbman/include/fsl_qbman_base.h | 29 +- drivers/common/dpaax/meson.build |3 +- drivers/common/dpaax/rte_pmd_dpaax_qdma.h | 23 + drivers/crypto/dpaa2_sec/dpaa2_sec_dpseci.c | 23 +- drivers/crypto/dpaa2_sec/dpaa2_sec_raw_dp.c |4 +- drivers/dma/dpaa/dpaa_qdma.c | 1641 +++- drivers/dma/dpaa/dpaa_qdma.h | 289 +- drivers/dma/dpaa2/dpaa2_qdma.c| 2381 + drivers/dma/dpaa2/dpaa2_qdma.h| 243 +- drivers/dma/dpaa2/meson.build |4 +- drivers/dma/dpaa2/rte_pmd_dpaa2_qdma.h| 177 -- drivers/dma/dpaa2/version.map | 14 - drivers/net/dpaa/dpaa_ethdev.c| 46 +- drivers/net/dpaa/dpaa_ethdev.h| 12 + drivers/net/dpaa2/dpaa2_ethdev.c | 81 +- drivers/net/dpaa2/dpaa2_rxtx.c| 19 +- drivers/raw/dpaa2_cmdif/dpaa2_cmdif.c |4 +- 28 files changed, 2856 insertions(+), 2303 deletions(-) create mode 100644 drivers/common/dpaax/rte_pmd_dpaax_qdma.h delete mode 100644 drivers/dma/dpaa2/rte_pmd_dpaa2_qdma.h delete mode 100644 drivers/dma/dpaa2/version.map -- 2.25.1
[v3 01/30] dma/dpaa2: configure route by port by PCIe port param
From: Jun Yang struct { uint64_t coreid : 4; /**--rbp.sportid / rbp.dportid*/ uint64_t pfid : 8; /**--rbp.spfid / rbp.dpfid*/ uint64_t vfen : 1; /**--rbp.svfa / rbp.dvfa*/ uint64_t vfid : 16; /**--rbp.svfid / rbp.dvfid*/ . } pcie; Signed-off-by: Jun Yang --- .../bus/fslmc/qbman/include/fsl_qbman_base.h | 29 ++--- drivers/dma/dpaa2/dpaa2_qdma.c| 59 +-- drivers/dma/dpaa2/dpaa2_qdma.h| 38 +++- drivers/dma/dpaa2/rte_pmd_dpaa2_qdma.h| 55 + drivers/dma/dpaa2/version.map | 1 - 5 files changed, 100 insertions(+), 82 deletions(-) diff --git a/drivers/bus/fslmc/qbman/include/fsl_qbman_base.h b/drivers/bus/fslmc/qbman/include/fsl_qbman_base.h index 48ffb1b46e..7528b610e1 100644 --- a/drivers/bus/fslmc/qbman/include/fsl_qbman_base.h +++ b/drivers/bus/fslmc/qbman/include/fsl_qbman_base.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: BSD-3-Clause * * Copyright (C) 2014 Freescale Semiconductor, Inc. - * Copyright 2017-2019 NXP + * Copyright 2017-2024 NXP * */ #ifndef _FSL_QBMAN_BASE_H @@ -141,12 +141,23 @@ struct qbman_fd { uint32_t saddr_hi; uint32_t len_sl:18; - uint32_t rsv1:14; - + uint32_t rsv13:2; + uint32_t svfid:6; + uint32_t rsv12:2; + uint32_t spfid:2; + uint32_t rsv1:2; uint32_t sportid:4; - uint32_t rsv2:22; + uint32_t rsv2:1; + uint32_t sca:1; + uint32_t sat:2; + uint32_t sattr:3; + uint32_t svfa:1; + uint32_t stc:3; uint32_t bmt:1; - uint32_t rsv3:1; + uint32_t dvfid:6; + uint32_t rsv3:2; + uint32_t dpfid:2; + uint32_t rsv31:2; uint32_t fmt:2; uint32_t sl:1; uint32_t rsv4:1; @@ -154,12 +165,14 @@ struct qbman_fd { uint32_t acc_err:4; uint32_t rsv5:4; uint32_t ser:1; - uint32_t rsv6:3; + uint32_t rsv6:2; + uint32_t wns:1; uint32_t wrttype:4; uint32_t dqos:3; uint32_t drbp:1; uint32_t dlwc:2; - uint32_t rsv7:2; + uint32_t rsv7:1; + uint32_t rns:1; uint32_t rdttype:4; uint32_t sqos:3; uint32_t srbp:1; @@ -182,7 +195,7 @@ struct qbman_fd { uint32_t saddr_lo; uint32_t saddr_hi:17; - uint32_t rsv1:15; + uint32_t rsv1_att:15; uint32_t len; diff --git a/drivers/dma/dpaa2/dpaa2_qdma.c b/drivers/dma/dpaa2/dpaa2_qdma.c index 2c91ceec13..5954b552b5 100644 --- a/drivers/dma/dpaa2/dpaa2_qdma.c +++ b/drivers/dma/dpaa2/dpaa2_qdma.c @@ -22,7 +22,7 @@ uint32_t dpaa2_coherent_alloc_cache; static inline int qdma_populate_fd_pci(phys_addr_t src, phys_addr_t dest, uint32_t len, struct qbman_fd *fd, -struct rte_dpaa2_qdma_rbp *rbp, int ser) +struct dpaa2_qdma_rbp *rbp, int ser) { fd->simple_pci.saddr_lo = lower_32_bits((uint64_t) (src)); fd->simple_pci.saddr_hi = upper_32_bits((uint64_t) (src)); @@ -93,7 +93,7 @@ qdma_populate_fd_ddr(phys_addr_t src, phys_addr_t dest, static void dpaa2_qdma_populate_fle(struct qbman_fle *fle, uint64_t fle_iova, - struct rte_dpaa2_qdma_rbp *rbp, + struct dpaa2_qdma_rbp *rbp, uint64_t src, uint64_t dest, size_t len, uint32_t flags, uint32_t fmt) { @@ -114,7 +114,6 @@ dpaa2_qdma_populate_fle(struct qbman_fle *fle, /* source */ sdd->read_cmd.portid = rbp->sportid; sdd->rbpcmd_simple.pfid = rbp->spfid; - sdd->rbpcmd_simple.vfa = rbp->vfa; sdd->rbpcmd_simple.vfid = rbp->svfid; if (rbp->srbp) { @@ -127,7 +126,6 @@ dpaa2_qdma_populate_fle(struct qbman_fle *fle, /* destination */ sdd->write_cmd.portid = rbp->dportid; sdd->rbpcmd_simple.pfid = rbp->dpfid; - sdd->rbpcmd_simple.vfa = rbp->vfa; sdd->rbpcmd_simple.vfid = rbp->dvfid; if (rbp->drbp) { @@ -178,7 +176,7 @@ dpdmai_dev_set_fd_us(struct qdma_virt_queue *qdma_vq,
[v3 02/30] dma/dpaa2: support multiple HW queues
From: Jun Yang Initialize and Configure queues of dma device according to hw queues supported from mc bus. Because multiple queues per device are supported, virt queues implementation are dropped. Signed-off-by: Jun Yang --- drivers/dma/dpaa2/dpaa2_qdma.c | 312 +++-- drivers/dma/dpaa2/dpaa2_qdma.h | 6 +- 2 files changed, 140 insertions(+), 178 deletions(-) diff --git a/drivers/dma/dpaa2/dpaa2_qdma.c b/drivers/dma/dpaa2/dpaa2_qdma.c index 5954b552b5..945ba71e4a 100644 --- a/drivers/dma/dpaa2/dpaa2_qdma.c +++ b/drivers/dma/dpaa2/dpaa2_qdma.c @@ -478,9 +478,9 @@ dpdmai_dev_get_job_us(struct qdma_virt_queue *qdma_vq __rte_unused, static inline uint16_t dpdmai_dev_get_single_job_lf(struct qdma_virt_queue *qdma_vq, -const struct qbman_fd *fd, -struct rte_dpaa2_qdma_job **job, -uint16_t *nb_jobs) + const struct qbman_fd *fd, + struct rte_dpaa2_qdma_job **job, + uint16_t *nb_jobs) { struct qbman_fle *fle; struct rte_dpaa2_qdma_job **ppjob = NULL; @@ -512,9 +512,9 @@ dpdmai_dev_get_single_job_lf(struct qdma_virt_queue *qdma_vq, static inline uint16_t dpdmai_dev_get_sg_job_lf(struct qdma_virt_queue *qdma_vq, -const struct qbman_fd *fd, -struct rte_dpaa2_qdma_job **job, -uint16_t *nb_jobs) + const struct qbman_fd *fd, + struct rte_dpaa2_qdma_job **job, + uint16_t *nb_jobs) { struct qbman_fle *fle; struct rte_dpaa2_qdma_job **ppjob = NULL; @@ -548,12 +548,12 @@ dpdmai_dev_get_sg_job_lf(struct qdma_virt_queue *qdma_vq, /* Function to receive a QDMA job for a given device and queue*/ static int dpdmai_dev_dequeue_multijob_prefetch(struct qdma_virt_queue *qdma_vq, -uint16_t *vq_id, -struct rte_dpaa2_qdma_job **job, -uint16_t nb_jobs) + uint16_t *vq_id, + struct rte_dpaa2_qdma_job **job, + uint16_t nb_jobs) { struct dpaa2_dpdmai_dev *dpdmai_dev = qdma_vq->dpdmai_dev; - struct dpaa2_queue *rxq = &(dpdmai_dev->rx_queue[0]); + struct dpaa2_queue *rxq; struct qbman_result *dq_storage, *dq_storage1 = NULL; struct qbman_pull_desc pulldesc; struct qbman_swp *swp; @@ -562,7 +562,7 @@ dpdmai_dev_dequeue_multijob_prefetch(struct qdma_virt_queue *qdma_vq, uint8_t num_rx = 0; const struct qbman_fd *fd; uint16_t vqid, num_rx_ret; - uint16_t rx_fqid = rxq->fqid; + uint16_t rx_fqid; int ret, pull_size; if (qdma_vq->flags & DPAA2_QDMA_VQ_FD_SG_FORMAT) { @@ -575,15 +575,17 @@ dpdmai_dev_dequeue_multijob_prefetch(struct qdma_virt_queue *qdma_vq, if (unlikely(!DPAA2_PER_LCORE_DPIO)) { ret = dpaa2_affine_qbman_swp(); if (ret) { - DPAA2_QDMA_ERR( - "Failed to allocate IO portal, tid: %d\n", + DPAA2_QDMA_ERR("Failed to allocate IO portal, tid(%d)", rte_gettid()); return 0; } } swp = DPAA2_PER_LCORE_PORTAL; + rxq = &dpdmai_dev->rx_queue[qdma_vq->vq_id]; + rx_fqid = rxq->fqid; - pull_size = (nb_jobs > dpaa2_dqrr_size) ? dpaa2_dqrr_size : nb_jobs; + pull_size = (nb_jobs > dpaa2_dqrr_size) ? + dpaa2_dqrr_size : nb_jobs; q_storage = rxq->q_storage; if (unlikely(!q_storage->active_dqs)) { @@ -697,12 +699,12 @@ dpdmai_dev_dequeue_multijob_prefetch(struct qdma_virt_queue *qdma_vq, static int dpdmai_dev_dequeue_multijob_no_prefetch(struct qdma_virt_queue *qdma_vq, - uint16_t *vq_id, - struct rte_dpaa2_qdma_job **job, - uint16_t nb_jobs) + uint16_t *vq_id, + struct rte_dpaa2_qdma_job **job, + uint16_t nb_jobs) { struct dpaa2_dpdmai_dev *dpdmai_dev = qdma_vq->dpdmai_dev; - struct dpaa2_queue *rxq = &(dpdmai_dev->rx_queue[0]); + struct dpaa2_queue *rxq; struct qbman_result *dq_storage; struct qbman_pull_desc pulldesc; struct qbman_swp *swp; @@ -710,7 +712,7 @@ dpdmai_dev_dequeue_multijob_no_prefetch(struct qdma_virt_queue *qdma_vq, uint8_t num_rx = 0; const struct qbman_fd *fd; uint16_t vqid, num_rx_ret; - uint16_t rx_fqid = rxq->fqid; + uint16_t rx_fqid; int ret, next_pull, num_pulled = 0; if (qdma_vq->flags & DPAA2_QDMA_VQ_FD_SG_FORMAT) { @@ -725,15 +727,15 @@ dpdmai_dev_dequeue_multijob_no_prefetch(struct qdma_virt_queue *qdma_vq, if (unlikely(!DPAA2_PER_LCORE_DPIO)) { ret = dpaa2_affine_qbman_swp(); if (ret) { -
[v3 03/30] dma/dpaa2: adapt DMA driver API
From: Jun Yang 1) Support DMA single copy and SG copy. 2) Silent mode support. Add index combined with length field. For Silent mode, this index is used to notify DMA driver which inner descriptor should be used. For none silent mode, this index is used to notify user which descriptor is completed. In addition, because dpaa2 qdma is not able to preserve order, "rte_dma_completed_t" returns multiple indexes instead of last index. Signed-off-by: Jun Yang --- drivers/dma/dpaa2/dpaa2_qdma.c | 1667 +++- drivers/dma/dpaa2/dpaa2_qdma.h | 126 +- drivers/dma/dpaa2/rte_pmd_dpaa2_qdma.h | 119 +- drivers/dma/dpaa2/version.map | 13 - 4 files changed, 799 insertions(+), 1126 deletions(-) delete mode 100644 drivers/dma/dpaa2/version.map diff --git a/drivers/dma/dpaa2/dpaa2_qdma.c b/drivers/dma/dpaa2/dpaa2_qdma.c index 945ba71e4a..15d3776603 100644 --- a/drivers/dma/dpaa2/dpaa2_qdma.c +++ b/drivers/dma/dpaa2/dpaa2_qdma.c @@ -16,218 +16,345 @@ #define DPAA2_QDMA_PREFETCH "prefetch" -uint32_t dpaa2_coherent_no_alloc_cache; -uint32_t dpaa2_coherent_alloc_cache; +static uint32_t dpaa2_coherent_no_alloc_cache; +static uint32_t dpaa2_coherent_alloc_cache; static inline int -qdma_populate_fd_pci(phys_addr_t src, phys_addr_t dest, -uint32_t len, struct qbman_fd *fd, -struct dpaa2_qdma_rbp *rbp, int ser) +qdma_cntx_idx_ring_eq(struct qdma_cntx_idx_ring *ring, + const uint16_t *elem, uint16_t nb, + uint16_t *free_space) { - fd->simple_pci.saddr_lo = lower_32_bits((uint64_t) (src)); - fd->simple_pci.saddr_hi = upper_32_bits((uint64_t) (src)); + if (unlikely(nb > ring->free_space)) + return 0; - fd->simple_pci.len_sl = len; + if ((ring->tail + nb) < DPAA2_QDMA_MAX_DESC) { + rte_memcpy(&ring->cntx_idx_ring[ring->tail], + elem, nb * sizeof(uint16_t)); + ring->tail += nb; + } else { + rte_memcpy(&ring->cntx_idx_ring[ring->tail], + elem, + (DPAA2_QDMA_MAX_DESC - ring->tail) * + sizeof(uint16_t)); + rte_memcpy(&ring->cntx_idx_ring[0], + &elem[DPAA2_QDMA_MAX_DESC - ring->tail], + (nb - DPAA2_QDMA_MAX_DESC + ring->tail) * + sizeof(uint16_t)); + ring->tail = (ring->tail + nb) & (DPAA2_QDMA_MAX_DESC - 1); + } + ring->free_space -= nb; + ring->nb_in_ring += nb; - fd->simple_pci.bmt = 1; - fd->simple_pci.fmt = 3; - fd->simple_pci.sl = 1; - fd->simple_pci.ser = ser; + if (free_space) + *free_space = ring->free_space; - fd->simple_pci.sportid = rbp->sportid; /*pcie 3 */ - fd->simple_pci.srbp = rbp->srbp; - if (rbp->srbp) - fd->simple_pci.rdttype = 0; - else - fd->simple_pci.rdttype = dpaa2_coherent_alloc_cache; + return nb; +} - /*dest is pcie memory */ - fd->simple_pci.dportid = rbp->dportid; /*pcie 3 */ - fd->simple_pci.drbp = rbp->drbp; - if (rbp->drbp) - fd->simple_pci.wrttype = 0; - else - fd->simple_pci.wrttype = dpaa2_coherent_no_alloc_cache; +static inline int +qdma_cntx_idx_ring_dq(struct qdma_cntx_idx_ring *ring, + uint16_t *elem, uint16_t max) +{ + int ret = ring->nb_in_ring > max ? max : ring->nb_in_ring; - fd->simple_pci.daddr_lo = lower_32_bits((uint64_t) (dest)); - fd->simple_pci.daddr_hi = upper_32_bits((uint64_t) (dest)); + if (!ret) + return 0; - return 0; + if ((ring->start + ret) < DPAA2_QDMA_MAX_DESC) { + rte_memcpy(elem, + &ring->cntx_idx_ring[ring->start], + ret * sizeof(uint16_t)); + ring->start += ret; + } else { + rte_memcpy(elem, + &ring->cntx_idx_ring[ring->start], + (DPAA2_QDMA_MAX_DESC - ring->start) * + sizeof(uint16_t)); + rte_memcpy(&elem[DPAA2_QDMA_MAX_DESC - ring->start], + &ring->cntx_idx_ring[0], + (ret - DPAA2_QDMA_MAX_DESC + ring->start) * + sizeof(uint16_t)); + ring->start = (ring->start + ret) & (DPAA2_QDMA_MAX_DESC - 1); + } + ring->free_space += ret; + ring->nb_in_ring -= ret; + + return ret; } -static inline int -qdma_populate_fd_ddr(phys_addr_t src, phys_addr_t dest, -uint32_t len, struct qbman_fd *fd, int ser) +static int +dpaa2_qdma_multi_eq(struct qdma_virt_queue *qdma_vq) { - fd->simple_ddr.saddr_lo = lower_32_bits((uint64_t) (src)); - fd->simple_ddr.saddr_hi = upper_32_bits((uint64_t) (src)); - - fd->simple_ddr.len = len; - - fd->simple_ddr.bmt = 1
[v3 04/30] dma/dpaa2: multiple process support
From: Jun Yang Support multiple processes for dpaa2 dma. 1) Move queue configuration procedure from init function to device configuration function which is called by user. 2) Instances of dpaa2_dpdmai_dev and qdma_device are allocated from primary process and shared between multiple processes. 3) MC reg is per process mapped. 4) User is responsible to check vq number configured before using dma device to identify if this device is occupied by other process. Signed-off-by: Jun Yang --- drivers/dma/dpaa2/dpaa2_qdma.c | 405 - drivers/dma/dpaa2/dpaa2_qdma.h | 6 +- 2 files changed, 254 insertions(+), 157 deletions(-) diff --git a/drivers/dma/dpaa2/dpaa2_qdma.c b/drivers/dma/dpaa2/dpaa2_qdma.c index 15d3776603..44b82c139e 100644 --- a/drivers/dma/dpaa2/dpaa2_qdma.c +++ b/drivers/dma/dpaa2/dpaa2_qdma.c @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: BSD-3-Clause - * Copyright 2018-2022 NXP + * Copyright 2018-2023 NXP */ #include @@ -19,6 +19,8 @@ static uint32_t dpaa2_coherent_no_alloc_cache; static uint32_t dpaa2_coherent_alloc_cache; +static struct fsl_mc_io s_proc_mc_reg; + static inline int qdma_cntx_idx_ring_eq(struct qdma_cntx_idx_ring *ring, const uint16_t *elem, uint16_t nb, @@ -960,6 +962,9 @@ dpaa2_qdma_info_get(const struct rte_dma_dev *dev, dev_info->max_vchans = dpdmai_dev->num_queues; dev_info->max_desc = DPAA2_QDMA_MAX_DESC; dev_info->min_desc = DPAA2_QDMA_MIN_DESC; + dev_info->dev_name = dev->device->name; + if (dpdmai_dev->qdma_dev) + dev_info->nb_vchans = dpdmai_dev->qdma_dev->num_vqs; return 0; } @@ -969,25 +974,102 @@ dpaa2_qdma_configure(struct rte_dma_dev *dev, const struct rte_dma_conf *dev_conf, uint32_t conf_sz) { - char name[32]; /* RTE_MEMZONE_NAMESIZE = 32 */ struct dpaa2_dpdmai_dev *dpdmai_dev = dev->data->dev_private; struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev; uint16_t i; + struct dpdmai_rx_queue_cfg rx_queue_cfg; + struct dpdmai_rx_queue_attr rx_attr; + struct dpdmai_tx_queue_attr tx_attr; + struct dpaa2_queue *rxq; + int ret = 0; DPAA2_QDMA_FUNC_TRACE(); RTE_SET_USED(conf_sz); - /* In case QDMA device is not in stopped state, return -EBUSY */ - if (qdma_dev->state == 1) { - DPAA2_QDMA_ERR("%s Not stopped, configure failed.", - dev->data->dev_name); - return -EBUSY; + if (dev_conf->nb_vchans > dpdmai_dev->num_queues) { + DPAA2_QDMA_ERR("%s config queues(%d) > hw queues(%d)", + dev->data->dev_name, dev_conf->nb_vchans, + dpdmai_dev->num_queues); + + return -ENOTSUP; + } + + if (qdma_dev->vqs) { + DPAA2_QDMA_DEBUG("%s: queues de-config(%d)/re-config(%d)", + dev->data->dev_name, + qdma_dev->num_vqs, dev_conf->nb_vchans); + for (i = 0; i < qdma_dev->num_vqs; i++) { + if ((qdma_dev->vqs[i].num_enqueues != + qdma_dev->vqs[i].num_dequeues) && + !qdma_dev->is_silent) { + DPAA2_QDMA_ERR("VQ(%d) %"PRIu64" jobs in dma.", + i, qdma_dev->vqs[i].num_enqueues - + qdma_dev->vqs[i].num_dequeues); + return -EBUSY; + } + } + for (i = 0; i < qdma_dev->num_vqs; i++) { + if (qdma_dev->vqs[i].fle_pool) { + rte_mempool_free(qdma_dev->vqs[i].fle_pool); + qdma_dev->vqs[i].fle_pool = NULL; + } + if (qdma_dev->vqs[i].ring_cntx_idx) { + rte_free(qdma_dev->vqs[i].ring_cntx_idx); + qdma_dev->vqs[i].ring_cntx_idx = NULL; + } + rxq = &dpdmai_dev->rx_queue[i]; + if (rxq->q_storage) { + DPAA2_QDMA_DEBUG("%s rxq[%d] re-configure", + dev->data->dev_name, i); + dpaa2_free_dq_storage(rxq->q_storage); + rte_free(rxq->q_storage); + rxq->q_storage = NULL; + } + } + rte_free(qdma_dev->vqs); + qdma_dev->vqs = NULL; + qdma_dev->num_vqs = 0; + } + + /* Set up Rx Queues */ + for (i = 0; i < dev_conf->nb_vchans; i++) { + memset(&rx_queue_cfg, 0, sizeof(struct dpdmai_rx_queue_cfg)); + rxq = &dpdmai_dev->rx_queue[i]; + ret = dpdmai_set_rx_queue(&s_proc_mc_reg, +
[v3 05/30] dma/dpaa2: add sanity check for SG entry
From: Jun Yang Make sure the SG entry number doesn't overflow. Signed-off-by: Jun Yang --- drivers/dma/dpaa2/dpaa2_qdma.c | 11 ++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/dma/dpaa2/dpaa2_qdma.c b/drivers/dma/dpaa2/dpaa2_qdma.c index 44b82c139e..7f6ebcb46b 100644 --- a/drivers/dma/dpaa2/dpaa2_qdma.c +++ b/drivers/dma/dpaa2/dpaa2_qdma.c @@ -615,8 +615,17 @@ dpaa2_qdma_copy_sg(void *dev_private, struct qbman_fle *fle; struct qdma_sdd *sdd; - if (unlikely(nb_src != nb_dst)) + if (unlikely(nb_src != nb_dst)) { + DPAA2_QDMA_ERR("SG entry src num(%d) != dst num(%d)", + nb_src, nb_dst); return -ENOTSUP; + } + + if (unlikely(nb_src > RTE_DPAA2_QDMA_JOB_SUBMIT_MAX)) { + DPAA2_QDMA_ERR("SG entry number(%d) > MAX(%d)", + nb_src, RTE_DPAA2_QDMA_JOB_SUBMIT_MAX); + return -EINVAL; + } memset(fd, 0, sizeof(struct qbman_fd)); -- 2.25.1
[v3 06/30] dma/dpaa2: include DPAA2 specific header files
From: Jun Yang Include dpaa2_hw_pvt.h and dpaa2_hw_dpio.h files Signed-off-by: Jun Yang --- drivers/dma/dpaa2/dpaa2_qdma.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/dma/dpaa2/dpaa2_qdma.h b/drivers/dma/dpaa2/dpaa2_qdma.h index 743a43fa14..eb02bff08f 100644 --- a/drivers/dma/dpaa2/dpaa2_qdma.h +++ b/drivers/dma/dpaa2/dpaa2_qdma.h @@ -5,6 +5,9 @@ #ifndef _DPAA2_QDMA_H_ #define _DPAA2_QDMA_H_ +#include "portal/dpaa2_hw_pvt.h" +#include "portal/dpaa2_hw_dpio.h" + #define DPAA2_QDMA_MAX_DESC4096 #define DPAA2_QDMA_MIN_DESC1 #define DPAA2_QDMA_MAX_VHANS 64 -- 2.25.1
[v3 07/30] dma/dpaa2: borrow flags of DMA operation to pass job context
From: Jun Yang For copy_sg: pass job index lists. For copy: pass job index. Signed-off-by: Jun Yang --- drivers/dma/dpaa2/dpaa2_qdma.c | 92 ++ drivers/dma/dpaa2/dpaa2_qdma.h | 7 ++ drivers/dma/dpaa2/rte_pmd_dpaa2_qdma.h | 15 - 3 files changed, 68 insertions(+), 46 deletions(-) diff --git a/drivers/dma/dpaa2/dpaa2_qdma.c b/drivers/dma/dpaa2/dpaa2_qdma.c index 7f6ebcb46b..7de4894b35 100644 --- a/drivers/dma/dpaa2/dpaa2_qdma.c +++ b/drivers/dma/dpaa2/dpaa2_qdma.c @@ -280,25 +280,22 @@ sg_entry_post_populate(const struct rte_dma_sge *src, const struct rte_dma_sge *dst, struct qdma_cntx_sg *sg_cntx, uint16_t nb_sge) { - uint16_t i = 0, idx; - uint32_t total_len = 0, len; + uint16_t i; + uint32_t total_len = 0; struct qdma_sg_entry *src_sge = sg_cntx->sg_src_entry; struct qdma_sg_entry *dst_sge = sg_cntx->sg_dst_entry; for (i = 0; i < (nb_sge - 1); i++) { if (unlikely(src[i].length != dst[i].length)) return -ENOTSUP; - len = RTE_DPAA2_QDMA_LEN_FROM_LENGTH(src[i].length); - idx = RTE_DPAA2_QDMA_IDX_FROM_LENGTH(src[i].length); src_sge->addr_lo = (uint32_t)src[i].addr; src_sge->addr_hi = (src[i].addr >> 32); - src_sge->data_len.data_len_sl0 = len; + src_sge->data_len.data_len_sl0 = src[i].length; dst_sge->addr_lo = (uint32_t)dst[i].addr; dst_sge->addr_hi = (dst[i].addr >> 32); - dst_sge->data_len.data_len_sl0 = len; - total_len += len; - sg_cntx->cntx_idx[i] = idx; + dst_sge->data_len.data_len_sl0 = dst[i].length; + total_len += dst[i].length; src_sge->ctrl.f = 0; dst_sge->ctrl.f = 0; @@ -309,19 +306,15 @@ sg_entry_post_populate(const struct rte_dma_sge *src, if (unlikely(src[i].length != dst[i].length)) return -ENOTSUP; - len = RTE_DPAA2_QDMA_LEN_FROM_LENGTH(src[i].length); - idx = RTE_DPAA2_QDMA_IDX_FROM_LENGTH(src[i].length); - src_sge->addr_lo = (uint32_t)src[i].addr; src_sge->addr_hi = (src[i].addr >> 32); - src_sge->data_len.data_len_sl0 = len; + src_sge->data_len.data_len_sl0 = src[i].length; dst_sge->addr_lo = (uint32_t)dst[i].addr; dst_sge->addr_hi = (dst[i].addr >> 32); - dst_sge->data_len.data_len_sl0 = len; + dst_sge->data_len.data_len_sl0 = dst[i].length; - total_len += len; - sg_cntx->cntx_idx[i] = idx; + total_len += dst[i].length; sg_cntx->job_nb = nb_sge; src_sge->ctrl.f = QDMA_SG_F; @@ -343,20 +336,18 @@ sg_entry_populate(const struct rte_dma_sge *src, const struct rte_dma_sge *dst, struct qdma_cntx_sg *sg_cntx, uint16_t nb_sge) { - uint16_t i, idx; - uint32_t total_len = 0, len; + uint16_t i; + uint32_t total_len = 0; struct qdma_sg_entry *src_sge = sg_cntx->sg_src_entry; struct qdma_sg_entry *dst_sge = sg_cntx->sg_dst_entry; for (i = 0; i < nb_sge; i++) { if (unlikely(src[i].length != dst[i].length)) return -ENOTSUP; - len = RTE_DPAA2_QDMA_LEN_FROM_LENGTH(src[i].length); - idx = RTE_DPAA2_QDMA_IDX_FROM_LENGTH(src[i].length); src_sge->addr_lo = (uint32_t)src[i].addr; src_sge->addr_hi = (src[i].addr >> 32); - src_sge->data_len.data_len_sl0 = len; + src_sge->data_len.data_len_sl0 = src[i].length; src_sge->ctrl.sl = QDMA_SG_SL_LONG; src_sge->ctrl.fmt = QDMA_SG_FMT_SDB; #ifdef RTE_LIBRTE_DPAA2_USE_PHYS_IOVA @@ -366,7 +357,7 @@ sg_entry_populate(const struct rte_dma_sge *src, #endif dst_sge->addr_lo = (uint32_t)dst[i].addr; dst_sge->addr_hi = (dst[i].addr >> 32); - dst_sge->data_len.data_len_sl0 = len; + dst_sge->data_len.data_len_sl0 = dst[i].length; dst_sge->ctrl.sl = QDMA_SG_SL_LONG; dst_sge->ctrl.fmt = QDMA_SG_FMT_SDB; #ifdef RTE_LIBRTE_DPAA2_USE_PHYS_IOVA @@ -374,8 +365,7 @@ sg_entry_populate(const struct rte_dma_sge *src, #else dst_sge->ctrl.bmt = QDMA_SG_BMT_DISABLE; #endif - total_len += len; - sg_cntx->cntx_idx[i] = idx; + total_len += src[i].length; if (i == (nb_sge - 1)) { src_sge->ctrl.f = QDMA_SG_F; @@ -606,14 +596,15 @@ dpaa2_qdma_copy_sg(void *dev_private, struct dpaa2_dpdmai_dev *dpdmai_dev = dev_private; struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev; struct qdma_virt_queue *qdma_vq = &qdma_dev->vqs[vchan]; - int ret = 0, expected; - uint32_t cntx_idx, len; + int ret = 0, expected, i; + uint32_t len;
[v3 08/30] bus/fslmc: enhance the qbman dq storage logic
From: Jun Yang Multiple DQ storages are used among multiple cores, the single dq storage of first union is leak if multiple storages are allocated. It does not make sense to keep the single dq storage of union, remove it and reuse the first dq storage of multiple storages for this case. Signed-off-by: Jun Yang --- drivers/bus/fslmc/portal/dpaa2_hw_dpci.c| 25 ++- drivers/bus/fslmc/portal/dpaa2_hw_dpio.c| 7 +- drivers/bus/fslmc/portal/dpaa2_hw_pvt.h | 38 +- drivers/crypto/dpaa2_sec/dpaa2_sec_dpseci.c | 23 ++ drivers/crypto/dpaa2_sec/dpaa2_sec_raw_dp.c | 4 +- drivers/dma/dpaa2/dpaa2_qdma.c | 43 ++- drivers/net/dpaa2/dpaa2_ethdev.c| 81 - drivers/net/dpaa2/dpaa2_rxtx.c | 19 +++-- drivers/raw/dpaa2_cmdif/dpaa2_cmdif.c | 4 +- 9 files changed, 103 insertions(+), 141 deletions(-) diff --git a/drivers/bus/fslmc/portal/dpaa2_hw_dpci.c b/drivers/bus/fslmc/portal/dpaa2_hw_dpci.c index 07256ed7ec..160126f6d6 100644 --- a/drivers/bus/fslmc/portal/dpaa2_hw_dpci.c +++ b/drivers/bus/fslmc/portal/dpaa2_hw_dpci.c @@ -81,22 +81,10 @@ rte_dpaa2_create_dpci_device(int vdev_fd __rte_unused, } /* Allocate DQ storage for the DPCI Rx queues */ - rxq = &(dpci_node->rx_queue[i]); - rxq->q_storage = rte_malloc("dq_storage", - sizeof(struct queue_storage_info_t), - RTE_CACHE_LINE_SIZE); - if (!rxq->q_storage) { - DPAA2_BUS_ERR("q_storage allocation failed\n"); - ret = -ENOMEM; + rxq = &dpci_node->rx_queue[i]; + ret = dpaa2_queue_storage_alloc(rxq, 1); + if (ret) goto err; - } - - memset(rxq->q_storage, 0, sizeof(struct queue_storage_info_t)); - ret = dpaa2_alloc_dq_storage(rxq->q_storage); - if (ret) { - DPAA2_BUS_ERR("dpaa2_alloc_dq_storage failed\n"); - goto err; - } } /* Enable the device */ @@ -141,12 +129,9 @@ rte_dpaa2_create_dpci_device(int vdev_fd __rte_unused, err: for (i = 0; i < DPAA2_DPCI_MAX_QUEUES; i++) { - struct dpaa2_queue *rxq = &(dpci_node->rx_queue[i]); + struct dpaa2_queue *rxq = &dpci_node->rx_queue[i]; - if (rxq->q_storage) { - dpaa2_free_dq_storage(rxq->q_storage); - rte_free(rxq->q_storage); - } + dpaa2_queue_storage_free(rxq, 1); } rte_free(dpci_node); diff --git a/drivers/bus/fslmc/portal/dpaa2_hw_dpio.c b/drivers/bus/fslmc/portal/dpaa2_hw_dpio.c index 4aec7b2cd8..a8afc772fd 100644 --- a/drivers/bus/fslmc/portal/dpaa2_hw_dpio.c +++ b/drivers/bus/fslmc/portal/dpaa2_hw_dpio.c @@ -574,6 +574,7 @@ dpaa2_free_dq_storage(struct queue_storage_info_t *q_storage) for (i = 0; i < NUM_DQS_PER_QUEUE; i++) { rte_free(q_storage->dq_storage[i]); + q_storage->dq_storage[i] = NULL; } } @@ -583,7 +584,7 @@ dpaa2_alloc_dq_storage(struct queue_storage_info_t *q_storage) int i = 0; for (i = 0; i < NUM_DQS_PER_QUEUE; i++) { - q_storage->dq_storage[i] = rte_malloc(NULL, + q_storage->dq_storage[i] = rte_zmalloc(NULL, dpaa2_dqrr_size * sizeof(struct qbman_result), RTE_CACHE_LINE_SIZE); if (!q_storage->dq_storage[i]) @@ -591,8 +592,10 @@ dpaa2_alloc_dq_storage(struct queue_storage_info_t *q_storage) } return 0; fail: - while (--i >= 0) + while (--i >= 0) { rte_free(q_storage->dq_storage[i]); + q_storage->dq_storage[i] = NULL; + } return -1; } diff --git a/drivers/bus/fslmc/portal/dpaa2_hw_pvt.h b/drivers/bus/fslmc/portal/dpaa2_hw_pvt.h index 169c7917ea..1ce481c88d 100644 --- a/drivers/bus/fslmc/portal/dpaa2_hw_pvt.h +++ b/drivers/bus/fslmc/portal/dpaa2_hw_pvt.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2016 Freescale Semiconductor, Inc. All rights reserved. - * Copyright 2016-2021 NXP + * Copyright 2016-2024 NXP * */ @@ -165,7 +165,9 @@ struct __rte_cache_aligned dpaa2_queue { uint64_t tx_pkts; uint64_t err_pkts; union { - struct queue_storage_info_t *q_storage; + /**Ingress*/ + struct queue_storage_info_t *q_storage[RTE_MAX_LCORE]; + /**Egress*/ struct qbman_result *cscn; }; struct rte_event ev; @@ -186,6 +188,38 @@ struct swp_active_dqs { uint64_t reserved[7]; }; +#define dpaa2_queue_storage_alloc(q, num) \ +({ \ + int ret = 0, i; \ + \ + for (i = 0; i < (num); i+
[v3 09/30] dma/dpaa2: add short FD support
From: Jun Yang Short FD can be used for single transfer scenario which shows higher performance than FLE. 1) Save index context in FD att field for short and FLE(NonSG). 2) Identify FD type by att of FD. 3) Force 48 bits address for source address and fle according to spec. Signed-off-by: Jun Yang --- drivers/dma/dpaa2/dpaa2_qdma.c | 314 +++-- drivers/dma/dpaa2/dpaa2_qdma.h | 69 -- drivers/dma/dpaa2/rte_pmd_dpaa2_qdma.h | 13 - 3 files changed, 285 insertions(+), 111 deletions(-) diff --git a/drivers/dma/dpaa2/dpaa2_qdma.c b/drivers/dma/dpaa2/dpaa2_qdma.c index 53caccecd7..d1358b686c 100644 --- a/drivers/dma/dpaa2/dpaa2_qdma.c +++ b/drivers/dma/dpaa2/dpaa2_qdma.c @@ -522,7 +522,6 @@ dpaa2_qdma_long_fmt_dump(const struct qbman_fle *fle) const struct qdma_cntx_fle_sdd *fle_sdd; const struct qdma_sdd *sdd; const struct qdma_cntx_sg *cntx_sg = NULL; - const struct qdma_cntx_long *cntx_long = NULL; fle_sdd = container_of(fle, const struct qdma_cntx_fle_sdd, fle[0]); sdd = fle_sdd->sdd; @@ -545,11 +544,8 @@ dpaa2_qdma_long_fmt_dump(const struct qbman_fle *fle) QBMAN_FLE_WORD4_FMT_SGE) { cntx_sg = container_of(fle_sdd, const struct qdma_cntx_sg, fle_sdd); - } else if (fle[DPAA2_QDMA_SRC_FLE].word4.fmt == + } else if (fle[DPAA2_QDMA_SRC_FLE].word4.fmt != QBMAN_FLE_WORD4_FMT_SBF) { - cntx_long = container_of(fle_sdd, const struct qdma_cntx_long, - fle_sdd); - } else { DPAA2_QDMA_ERR("Unsupported fle format:%d", fle[DPAA2_QDMA_SRC_FLE].word4.fmt); return; @@ -560,11 +556,6 @@ dpaa2_qdma_long_fmt_dump(const struct qbman_fle *fle) dpaa2_qdma_sdd_dump(&sdd[i]); } - if (cntx_long) { - DPAA2_QDMA_INFO("long format/Single buffer cntx idx:%d", - cntx_long->cntx_idx); - } - if (cntx_sg) { DPAA2_QDMA_INFO("long format/SG format, job number:%d", cntx_sg->job_nb); @@ -582,6 +573,8 @@ dpaa2_qdma_long_fmt_dump(const struct qbman_fle *fle) DPAA2_QDMA_INFO("cntx_idx[%d]:%d", i, cntx_sg->cntx_idx[i]); } + } else { + DPAA2_QDMA_INFO("long format/Single buffer cntx"); } } @@ -644,7 +637,7 @@ dpaa2_qdma_copy_sg(void *dev_private, offsetof(struct qdma_cntx_sg, fle_sdd) + offsetof(struct qdma_cntx_fle_sdd, fle); - DPAA2_SET_FD_ADDR(fd, fle_iova); + dpaa2_qdma_fd_set_addr(fd, fle_iova); DPAA2_SET_FD_COMPOUND_FMT(fd); DPAA2_SET_FD_FLC(fd, (uint64_t)cntx_sg); @@ -680,6 +673,7 @@ dpaa2_qdma_copy_sg(void *dev_private, if (unlikely(qdma_vq->flags & DPAA2_QDMA_DESC_DEBUG_FLAG)) dpaa2_qdma_long_fmt_dump(cntx_sg->fle_sdd.fle); + dpaa2_qdma_fd_save_att(fd, 0, DPAA2_QDMA_FD_SG); qdma_vq->fd_idx++; qdma_vq->silent_idx = (qdma_vq->silent_idx + 1) & (DPAA2_QDMA_MAX_DESC - 1); @@ -696,74 +690,178 @@ dpaa2_qdma_copy_sg(void *dev_private, return ret; } +static inline void +qdma_populate_fd_pci(uint64_t src, uint64_t dest, + uint32_t len, struct qbman_fd *fd, + struct dpaa2_qdma_rbp *rbp, int ser) +{ + fd->simple_pci.saddr_lo = lower_32_bits(src); + fd->simple_pci.saddr_hi = upper_32_bits(src); + + fd->simple_pci.len_sl = len; + + fd->simple_pci.bmt = DPAA2_QDMA_BMT_DISABLE; + fd->simple_pci.fmt = DPAA2_QDMA_FD_SHORT_FORMAT; + fd->simple_pci.sl = 1; + fd->simple_pci.ser = ser; + if (ser) + fd->simple.frc |= QDMA_SER_CTX; + + fd->simple_pci.sportid = rbp->sportid; + + fd->simple_pci.svfid = rbp->svfid; + fd->simple_pci.spfid = rbp->spfid; + fd->simple_pci.svfa = rbp->svfa; + fd->simple_pci.dvfid = rbp->dvfid; + fd->simple_pci.dpfid = rbp->dpfid; + fd->simple_pci.dvfa = rbp->dvfa; + + fd->simple_pci.srbp = rbp->srbp; + if (rbp->srbp) + fd->simple_pci.rdttype = 0; + else + fd->simple_pci.rdttype = dpaa2_coherent_alloc_cache; + + /*dest is pcie memory */ + fd->simple_pci.dportid = rbp->dportid; + fd->simple_pci.drbp = rbp->drbp; + if (rbp->drbp) + fd->simple_pci.wrttype = 0; + else + fd->simple_pci.wrttype = dpaa2_coherent_no_alloc_cache; + + fd->simple_pci.daddr_lo = lower_32_bits(dest); + fd->simple_pci.daddr_hi = upper_32_bits(dest); +} + +static inline void +qdma_populate_fd_ddr(uint64_t src, uint64_t dest, + uint32_t len, struct qbman_fd *fd, int ser) +{ + fd->simple_ddr.saddr_lo = lower_32_bits(src); + fd->simple_ddr.saddr_hi = upper_32_bits(src); + + fd->simple_ddr.len
[v3 10/30] dma/dpaa2: limit the max descriptor number
From: Jun Yang For non-SG format, the index is saved in FD with DPAA2_QDMA_FD_ATT_TYPE_OFFSET(13) bits width. The max descriptor number of ring is power of 2, so the eventual max number is: ((1 << DPAA2_QDMA_FD_ATT_TYPE_OFFSET) / 2) Signed-off-by: Jun Yang --- drivers/dma/dpaa2/dpaa2_qdma.h | 10 +++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/dma/dpaa2/dpaa2_qdma.h b/drivers/dma/dpaa2/dpaa2_qdma.h index 0be65e1cc6..250c83c83c 100644 --- a/drivers/dma/dpaa2/dpaa2_qdma.h +++ b/drivers/dma/dpaa2/dpaa2_qdma.h @@ -8,8 +8,6 @@ #include "portal/dpaa2_hw_pvt.h" #include "portal/dpaa2_hw_dpio.h" -#define DPAA2_QDMA_MAX_DESC4096 -#define DPAA2_QDMA_MIN_DESC1 #define DPAA2_QDMA_MAX_VHANS 64 #define DPAA2_DPDMAI_MAX_QUEUES16 @@ -169,10 +167,15 @@ enum dpaa2_qdma_fd_type { }; #define DPAA2_QDMA_FD_ATT_TYPE_OFFSET 13 +#define DPAA2_QDMA_FD_ATT_MAX_IDX \ + ((1 << DPAA2_QDMA_FD_ATT_TYPE_OFFSET) - 1) #define DPAA2_QDMA_FD_ATT_TYPE(att) \ (att >> DPAA2_QDMA_FD_ATT_TYPE_OFFSET) #define DPAA2_QDMA_FD_ATT_CNTX(att) \ - (att & ((1 << DPAA2_QDMA_FD_ATT_TYPE_OFFSET) - 1)) + (att & DPAA2_QDMA_FD_ATT_MAX_IDX) + +#define DPAA2_QDMA_MAX_DESC ((DPAA2_QDMA_FD_ATT_MAX_IDX + 1) / 2) +#define DPAA2_QDMA_MIN_DESC 1 static inline void dpaa2_qdma_fd_set_addr(struct qbman_fd *fd, @@ -186,6 +189,7 @@ static inline void dpaa2_qdma_fd_save_att(struct qbman_fd *fd, uint16_t job_idx, enum dpaa2_qdma_fd_type type) { + RTE_ASSERT(job_idx <= DPAA2_QDMA_FD_ATT_MAX_IDX); fd->simple_ddr.rsv1_att = job_idx | (type << DPAA2_QDMA_FD_ATT_TYPE_OFFSET); } -- 2.25.1