[PATCH] net/mlx5: fix counter query during port close
Currently, the counter query service thread queries all the ports which belongs to the same sh. In case one of the ports is closing the query may still be proceeded. This commit adds the pool list in shared context to manage the pool for avoiding query the port during port close. Fixes: 4d368e1da3a4 ("net/mlx5: support flow counter action for HWS") Signed-off-by: Suanming Mou Acked-by: Matan Azrad --- drivers/net/mlx5/mlx5.c | 3 +++ drivers/net/mlx5/mlx5.h | 2 ++ drivers/net/mlx5/mlx5_hws_cnt.c | 36 ++--- drivers/net/mlx5/mlx5_hws_cnt.h | 2 ++ 4 files changed, 31 insertions(+), 12 deletions(-) diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index 2cf21a1921..d6cb0d1c8a 100644 --- a/drivers/net/mlx5/mlx5.c +++ b/drivers/net/mlx5/mlx5.c @@ -1814,6 +1814,9 @@ mlx5_alloc_shared_dev_ctx(const struct mlx5_dev_spawn_data *spawn, LIST_INSERT_HEAD(&mlx5_dev_ctx_list, sh, next); rte_spinlock_init(&sh->geneve_tlv_opt_sl); mlx5_init_shared_dev_registers(sh); + /* Init counter pool list header and lock. */ + LIST_INIT(&sh->hws_cpool_list); + rte_spinlock_init(&sh->cpool_lock); exit: pthread_mutex_unlock(&mlx5_dev_ctx_list_mutex); return sh; diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index ee13ad6db2..f5eacb2c67 100644 --- a/drivers/net/mlx5/mlx5.h +++ b/drivers/net/mlx5/mlx5.h @@ -1521,6 +1521,8 @@ struct mlx5_dev_ctx_shared { uint32_t host_shaper_rate:8; uint32_t lwm_triggered:1; struct mlx5_hws_cnt_svc_mng *cnt_svc; + rte_spinlock_t cpool_lock; + LIST_HEAD(hws_cpool_list, mlx5_hws_cnt_pool) hws_cpool_list; /* Count pool list. */ struct mlx5_dev_registers registers; struct mlx5_dev_shared_port port[]; /* per device port data array. */ }; diff --git a/drivers/net/mlx5/mlx5_hws_cnt.c b/drivers/net/mlx5/mlx5_hws_cnt.c index f556a9fbcc..a3bea94811 100644 --- a/drivers/net/mlx5/mlx5_hws_cnt.c +++ b/drivers/net/mlx5/mlx5_hws_cnt.c @@ -294,26 +294,25 @@ mlx5_hws_cnt_svc(void *opaque) (struct mlx5_dev_ctx_shared *)opaque; uint64_t interval = (uint64_t)sh->cnt_svc->query_interval * (US_PER_S / MS_PER_S); - uint16_t port_id; + struct mlx5_hws_cnt_pool *hws_cpool; uint64_t start_cycle, query_cycle = 0; uint64_t query_us; uint64_t sleep_us; while (sh->cnt_svc->svc_running != 0) { + if (rte_spinlock_trylock(&sh->cpool_lock) == 0) + continue; start_cycle = rte_rdtsc(); - MLX5_ETH_FOREACH_DEV(port_id, sh->cdev->dev) { - struct mlx5_priv *opriv = - rte_eth_devices[port_id].data->dev_private; - if (opriv != NULL && - opriv->sh == sh && - opriv->hws_cpool != NULL) { - __mlx5_hws_cnt_svc(sh, opriv->hws_cpool); - if (opriv->hws_age_req) - mlx5_hws_aging_check(opriv, -opriv->hws_cpool); - } + /* 200ms for 16M counters. */ + LIST_FOREACH(hws_cpool, &sh->hws_cpool_list, next) { + struct mlx5_priv *opriv = hws_cpool->priv; + + __mlx5_hws_cnt_svc(sh, hws_cpool); + if (opriv->hws_age_req) + mlx5_hws_aging_check(opriv, hws_cpool); } query_cycle = rte_rdtsc() - start_cycle; + rte_spinlock_unlock(&sh->cpool_lock); query_us = query_cycle / (rte_get_timer_hz() / US_PER_S); sleep_us = interval - query_us; if (interval > query_us) @@ -665,6 +664,10 @@ mlx5_hws_cnt_pool_create(struct rte_eth_dev *dev, if (ret != 0) goto error; priv->sh->cnt_svc->refcnt++; + cpool->priv = priv; + rte_spinlock_lock(&priv->sh->cpool_lock); + LIST_INSERT_HEAD(&priv->sh->hws_cpool_list, cpool, next); + rte_spinlock_unlock(&priv->sh->cpool_lock); return cpool; error: mlx5_hws_cnt_pool_destroy(priv->sh, cpool); @@ -677,6 +680,13 @@ mlx5_hws_cnt_pool_destroy(struct mlx5_dev_ctx_shared *sh, { if (cpool == NULL) return; + /* +* 16M counter consumes 200ms to finish the query. +* Maybe blocked for at most 200ms here. +*/ + rte_spinlock_lock(&sh->cpool_lock); + LIST_REMOVE(cpool, next); + rte_spinlock_unlock(&sh->cpool_lock); if (cpool->cfg.host_cpool == NULL) { if (--sh->cnt_svc->refcnt == 0) mlx5_hws_cnt_svc_deinit(sh); @@ -1244,11 +1254,13 @@ mlx5_hws_age_pool_destroy(struct mlx5_priv *priv) { struct mlx5_age_inf
[PATCH] net/mlx5: fix job flow memory calculation
The upd_flow is the final object in the memory layout. This commit adjusts the new job memory start from upd_flow. Fixes: 63296851fadb ("net/mlx5: support flow rule update") Signed-off-by: Suanming Mou Acked-by: Dariusz Sosnowski --- drivers/net/mlx5/mlx5_flow_hw.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/mlx5/mlx5_flow_hw.c b/drivers/net/mlx5/mlx5_flow_hw.c index deb0be05fb..d512889682 100644 --- a/drivers/net/mlx5/mlx5_flow_hw.c +++ b/drivers/net/mlx5/mlx5_flow_hw.c @@ -9088,8 +9088,7 @@ flow_hw_configure(struct rte_eth_dev *dev, &priv->hw_q[nb_q_updated]; else priv->hw_q[i].job = (struct mlx5_hw_q_job **) - &job[_queue_attr[i - 1]->size - 1].items -[MLX5_HW_MAX_ITEMS]; + &job[_queue_attr[i - 1]->size - 1].upd_flow[1]; job = (struct mlx5_hw_q_job *) &priv->hw_q[i].job[_queue_attr[i]->size]; mhdr_cmd = (struct mlx5_modification_cmd *) -- 2.34.1
RE: [PATCH v3 4/5] pcapng: avoid using alloca()
> From: Stephen Hemminger [mailto:step...@networkplumber.org] > Sent: Wednesday, 8 November 2023 19.36 > > The function alloca() like VLA's has problems if the caller > passes a large value. Instead use a fixed size buffer (4K) > which will be more than sufficient for the info related blocks > in the file. Add bounds checks as well. > > Signed-off-by: Stephen Hemminger > --- I can't find the definition of BUFSIZ. Please make sure to add a comment to the definition of BUFSIZ mentioning - like in your patch description - that it will be more than sufficient for the info related blocks in the file. More comments inline below, regarding existing bugs found while reviewing. Assuming BUFSIZ has a comment describing the reason for its value, Acked-by: Morten Brørup > lib/pcapng/rte_pcapng.c | 34 +- > 1 file changed, 13 insertions(+), 21 deletions(-) > > diff --git a/lib/pcapng/rte_pcapng.c b/lib/pcapng/rte_pcapng.c > index 13fd2b97fb80..67f74d31aa32 100644 > --- a/lib/pcapng/rte_pcapng.c > +++ b/lib/pcapng/rte_pcapng.c > @@ -140,9 +140,8 @@ pcapng_section_block(rte_pcapng_t *self, > { > struct pcapng_section_header *hdr; > struct pcapng_option *opt; > - void *buf; > + uint8_t buf[BUFSIZ]; > uint32_t len; > - ssize_t cc; > > len = sizeof(*hdr); > if (hw) > @@ -158,8 +157,7 @@ pcapng_section_block(rte_pcapng_t *self, > len += pcapng_optlen(0); > len += sizeof(uint32_t); > > - buf = calloc(1, len); > - if (!buf) > + if (len > sizeof(buf)) > return -1; Existing bug: rte_errno must be set before returning -1. This bug occurs multiple times in rte_pcapng.c, probably also in code you're not updating in this patch. > > hdr = (struct pcapng_section_header *)buf; > @@ -193,10 +191,7 @@ pcapng_section_block(rte_pcapng_t *self, > /* clone block_length after option */ > memcpy(opt, &hdr->block_length, sizeof(uint32_t)); > > - cc = write(self->outfd, buf, len); > - free(buf); > - > - return cc; > + return write(self->outfd, buf, len); Existing bug: if write() returns -1, errno must be stored in rte_errno before returning -1. This bug might also occur multiple times in rte_pcapng.c.
RE: release candidate 23.11-rc2
> -Original Message- > From: Thomas Monjalon > Sent: Tuesday, November 7, 2023 6:22 AM > To: annou...@dpdk.org > Subject: release candidate 23.11-rc2 > > A new DPDK release candidate is ready for testing: >https://git.dpdk.org/dpdk/tag/?id=v23.11-rc2 > > There are 320 new patches in this snapshot. > > Release notes: >https://doc.dpdk.org/guides/rel_notes/release_23_11.html > > Highlights of 23.11-rc2: >- RSS algorithm management >- maximum Rx buffer size >- nfp vDPA driver >- graph application > > There were a lot of updates in drivers. > The driver features should be frozen now. > > Please test and report issues on bugs.dpdk.org. > > DPDK 23.11-rc3 is expected in approximately one week. > > Thank you everyone > Update the test status for Intel part. Till now dpdk23.11-rc2 test execution rate is 60%. found five new issues. New issues: 1. ice dcf get vf resource failed when launch testpmd. -> Intel dev is under investigating 2. multiprocess_iavf/test_multiprocess_symmetric_mp_packet: Failed to start two processes. -> Intel dev is under investigating 3. https://bugs.dpdk.org/show_bug.cgi?id=1308 meson_tests/driver: link_bonding_rssconf_autotest test failed. -> not fix yet 4. DPDK 23.11-RC2 TSO Tests failing. -> Intel dev is under investigating 5. mev_cpfl_rte_flow/test_mac_ipv4_udp_encap_vxlan_io_to_vf: received incorrect packet. -> has fix patch # Basic Intel(R) NIC testing * Build or compile: *Build: cover the build test combination with latest GCC/Clang version and the popular OS revision such as Ubuntu20.04.5, Ubuntu22.04.3, Fedora38, RHEL8.7/9.2, Centos7.9, OpenAnolis8.8, CBL-Mariner2.0 etc. - All test passed. *Compile: cover the CFLAGES(O0/O1/O2/O3) with popular OS such as Ubuntu22.04.2 and RHEL9.0. - All test passed with latest dpdk. * PF/VF(i40e, ixgbe, igc): test scenarios including PF/VF-RTE_FLOW/TSO/Jumboframe/checksum offload/VLAN/VXLAN, etc. - Execution rate is 90%. found 3 issues. * PF/VF(ice): test scenarios including Switch features/Package Management/Flow Director/Advanced Tx/Advanced RSS/ACL/DCF/Flexible Descriptor, etc. - Execution rate is 70%. found 1&2&3 issues. * MEV test: test scenarios including PF/TSO/MTU/Jumboframe/checksum offload, etc - Execution rate is 90%. found 5 issue. * Intel NIC single core/NIC performance: test scenarios including PF/VF single core performance test, RFC2544 Zero packet loss performance test, etc. - Execution rate is 70%. No new issue is found. * Power and IPsec: * Power: test scenarios including bi-direction/Telemetry/Empty Poll Lib/Priority Base Frequency, etc. - Execution rate is 60%. No new issue is found. * IPsec: test scenarios including ipsec/ipsec-gw/ipsec library basic test - QAT&SW/FIB library, etc. - Execution rate is 50%. found 4 issue. # Basic cryptodev and virtio testing * Virtio: both function and performance test are covered. Such as PVP/Virtio_loopback/virtio-user loopback/virtio-net VM2VM perf testing/VMAWARE ESXI 8.0, etc. - Execution rate is 70%. No new issue is found. * Cryptodev: *Function test: test scenarios including Cryptodev API testing/CompressDev ISA-L/QAT/ZLIB PMD Testing/FIPS, etc. - on going. No new issue is found. *Performance test: test scenarios including Throughput Performance /Cryptodev Latency, etc. - on going. No new issue is found. Regards, Xu, Hailin
[PATCH v1 0/2] fix IPv6 routing extension mismatching
Fix potential mismatching when IPv6 routing extension item enabled. Rongwei Liu (2): net/mlx5/hws: disable IPv6 routing extension relaxed mode net/mlx5/hws: fix srv6 mismatching drivers/net/mlx5/hws/mlx5dr_definer.c | 24 drivers/net/mlx5/hws/mlx5dr_definer.h | 1 + 2 files changed, 17 insertions(+), 8 deletions(-) -- 2.27.0
[PATCH v1 1/2] net/mlx5/hws: disable IPv6 routing extension relaxed mode
When relaxed mode is set, definer only programs the fields with mask in the pattern template. Assume a template like "ipv6_routing_ext ext_next_hdr mask 0xff ext_type mask 0xff / udp dst mask 0x" and rule like "ipv6_routing_ext ext_next_hdr spec 17 ext_next_hdr mask 0xff ext_type spec 4 ext_type mask 0xff / udp dst spec 100 dst mask 0x", there is a potential mis-matching. 1. User sends first packet as: Ether()/IPv6()/IPv6ExtHdrSegmentRouting()/UDP(sport=100,dport=200) Miss due to unexpected UDP dport. 2. User sends 2nd packet as: Ether()/IPv6()/UDP(sport=100,dport=100) Packet hit. Hardware cached the IPv6 routing extension information in the 1st packet and plus the UDP dport in the 2nd packet, it matches the rule by mistake. Signed-off-by: Rongwei Liu Cc: sta...@dpdk.org Reviewed-by: Erez Shitrit Acked-by: Suanming Mou --- drivers/net/mlx5/hws/mlx5dr_definer.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/mlx5/hws/mlx5dr_definer.c b/drivers/net/mlx5/hws/mlx5dr_definer.c index 7dffbfb9b9..c0ccde64e1 100644 --- a/drivers/net/mlx5/hws/mlx5dr_definer.c +++ b/drivers/net/mlx5/hws/mlx5dr_definer.c @@ -2143,6 +2143,9 @@ mlx5dr_definer_conv_item_ipv6_routing_ext(struct mlx5dr_definer_conv_data *cd, fc->tag_mask_set = &mlx5dr_definer_ones_set; DR_CALC_SET(fc, eth_l3, protocol_next_header, inner); } + } else { + rte_errno = ENOTSUP; + return rte_errno; } if (!m) -- 2.27.0
[PATCH v1 2/2] net/mlx5/hws: fix srv6 mismatching
When matching srv6 packets, the IPv6 protocol is set to 0x2b by srv6 callback in fc[IP_PROTO]. In the next layer, TCP/UDP callback reset the fc again to the corresponding values: 0x11/0x6. Signed-off-by: Rongwei Liu Fixes: 00e579166cc0 ("net/mlx5: support IPv6 routing extension matching") Cc: sta...@dpdk.org Reviewed-by: Erez Shitrit Acked-by: Suanming Mou --- drivers/net/mlx5/hws/mlx5dr_definer.c | 21 + drivers/net/mlx5/hws/mlx5dr_definer.h | 1 + 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/drivers/net/mlx5/hws/mlx5dr_definer.c b/drivers/net/mlx5/hws/mlx5dr_definer.c index c0ccde64e1..0b60479406 100644 --- a/drivers/net/mlx5/hws/mlx5dr_definer.c +++ b/drivers/net/mlx5/hws/mlx5dr_definer.c @@ -1069,10 +1069,12 @@ mlx5dr_definer_conv_item_udp(struct mlx5dr_definer_conv_data *cd, /* Set match on L4 type UDP */ if (!cd->relaxed) { fc = &cd->fc[DR_CALC_FNAME(IP_PROTOCOL, inner)]; - fc->item_idx = item_idx; - fc->tag_set = &mlx5dr_definer_udp_protocol_set; - fc->tag_mask_set = &mlx5dr_definer_ones_set; - DR_CALC_SET(fc, eth_l2, l4_type_bwc, inner); + if (!fc->not_overwrite) { + fc->item_idx = item_idx; + fc->tag_set = &mlx5dr_definer_udp_protocol_set; + fc->tag_mask_set = &mlx5dr_definer_ones_set; + DR_CALC_SET(fc, eth_l2, l4_type_bwc, inner); + } } if (!m) @@ -1115,10 +1117,12 @@ mlx5dr_definer_conv_item_tcp(struct mlx5dr_definer_conv_data *cd, /* Overwrite match on L4 type TCP */ if (!cd->relaxed) { fc = &cd->fc[DR_CALC_FNAME(IP_PROTOCOL, inner)]; - fc->item_idx = item_idx; - fc->tag_set = &mlx5dr_definer_tcp_protocol_set; - fc->tag_mask_set = &mlx5dr_definer_ones_set; - DR_CALC_SET(fc, eth_l2, l4_type_bwc, inner); + if (!fc->not_overwrite) { + fc->item_idx = item_idx; + fc->tag_set = &mlx5dr_definer_tcp_protocol_set; + fc->tag_mask_set = &mlx5dr_definer_ones_set; + DR_CALC_SET(fc, eth_l2, l4_type_bwc, inner); + } } if (!m) @@ -2141,6 +2145,7 @@ mlx5dr_definer_conv_item_ipv6_routing_ext(struct mlx5dr_definer_conv_data *cd, fc->item_idx = item_idx; fc->tag_set = &mlx5dr_definer_ipv6_routing_hdr_set; fc->tag_mask_set = &mlx5dr_definer_ones_set; + fc->not_overwrite = 1; DR_CALC_SET(fc, eth_l3, protocol_next_header, inner); } } else { diff --git a/drivers/net/mlx5/hws/mlx5dr_definer.h b/drivers/net/mlx5/hws/mlx5dr_definer.h index 791154a7dc..6f1c99e37a 100644 --- a/drivers/net/mlx5/hws/mlx5dr_definer.h +++ b/drivers/net/mlx5/hws/mlx5dr_definer.h @@ -166,6 +166,7 @@ struct mlx5dr_definer_fc { int bit_off; uint32_t bit_mask; enum mlx5dr_definer_fname fname; + uint8_t not_overwrite; void (*tag_set)(struct mlx5dr_definer_fc *fc, const void *item_spec, uint8_t *tag); -- 2.27.0
[PATCH 0/2] net/mlx5: fix flow rules for external SQ
If representor matching was enabled (device argument repr_matching_en is equal to 1, default configuration), then during registration of external SQs, mlx5 PMD would not create control flow rules in NIC Tx domain. This caused an issue with packet metadata. If a packet sent on external SQ had packet metadata attached, then it would be lost when it would go from NIC Tx to FDB domain. Meanwhile, the external SQ flow rules should be managed individually and not be destroyed internally by PMD. This series fixes these two issues for the external SQ flow rules in rte_pmd_mlx5_external_sq_enable(). Dariusz Sosnowski (1): net/mlx5: fix missing flow rules for external SQ Suanming Mou (1): net/mlx5: fix destroying external representor matched flows drivers/net/mlx5/mlx5.h | 41 ++ drivers/net/mlx5/mlx5_flow.h| 6 +- drivers/net/mlx5/mlx5_flow_hw.c | 132 drivers/net/mlx5/mlx5_trigger.c | 4 +- drivers/net/mlx5/mlx5_txq.c | 12 ++- 5 files changed, 176 insertions(+), 19 deletions(-) -- 2.34.1
[PATCH 1/2] net/mlx5: fix missing flow rules for external SQ
From: Dariusz Sosnowski mlx5 PMD exposes a capability to register externally created SQs as if it was an SQ of a given representor port. Registration would cause a creation of control flow rules in FDB domain used to forward traffic betwen SQ and destination represented port. Before this patch, if representor matching was enabled (device argument repr_matching_en is equal to 1, default configuration), then during registration of external SQs, mlx5 PMD would not create control flow rules in NIC Tx domain. This caused an issue with packet metadata. If a packet sent on external SQ had packet metadata attached, then it would be lost when it would go from NIC Tx to FDB domain. With representor matching disabled everything is working correctly, because in that mode there is a single global flow rule for preserving packet metadata. This flow rule matches whole traffic on NIC Tx domain. With representor matching enabled, NIC Tx flow rules are created per SQ. This patch fixes that behavior. If representor matching is enabled, then NIC Tx flow rules are created for each external SQ registered in rte_pmd_mlx5_external_sq_enable(). This patch also adds an ability to destroy SQ miss flow rules for a given port and SQ number. This is required for error rollback flow in rte_pmd_mlx5_external_sq_enable(). Fixes: 26e1eaf2dac4 ("net/mlx5: support device control for E-Switch default rule") Cc: sta...@dpdk.org Signed-off-by: Dariusz Sosnowski --- drivers/net/mlx5/mlx5.h | 40 drivers/net/mlx5/mlx5_flow.h| 2 + drivers/net/mlx5/mlx5_flow_hw.c | 107 +--- drivers/net/mlx5/mlx5_txq.c | 12 +++- 4 files changed, 149 insertions(+), 12 deletions(-) diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index f5eacb2c67..45ad0701f1 100644 --- a/drivers/net/mlx5/mlx5.h +++ b/drivers/net/mlx5/mlx5.h @@ -1705,10 +1705,50 @@ struct mlx5_obj_ops { #define MLX5_RSS_HASH_FIELDS_LEN RTE_DIM(mlx5_rss_hash_fields) +enum mlx5_hw_ctrl_flow_type { + MLX5_HW_CTRL_FLOW_TYPE_GENERAL, + MLX5_HW_CTRL_FLOW_TYPE_SQ_MISS_ROOT, + MLX5_HW_CTRL_FLOW_TYPE_SQ_MISS, + MLX5_HW_CTRL_FLOW_TYPE_DEFAULT_JUMP, + MLX5_HW_CTRL_FLOW_TYPE_TX_META_COPY, + MLX5_HW_CTRL_FLOW_TYPE_TX_REPR_MATCH, + MLX5_HW_CTRL_FLOW_TYPE_LACP_RX, + MLX5_HW_CTRL_FLOW_TYPE_DEFAULT_RX_RSS, +}; + +/** Additional info about control flow rule. */ +struct mlx5_hw_ctrl_flow_info { + /** Determines the kind of control flow rule. */ + enum mlx5_hw_ctrl_flow_type type; + union { + /** +* If control flow is a SQ miss flow (root or not), +* then fields contains matching SQ number. +*/ + uint32_t esw_mgr_sq; + /** +* If control flow is a Tx representor matching, +* then fields contains matching SQ number. +*/ + uint32_t tx_repr_sq; + }; +}; + +/** Entry for tracking control flow rules in HWS. */ struct mlx5_hw_ctrl_flow { LIST_ENTRY(mlx5_hw_ctrl_flow) next; + /** +* Owner device is a port on behalf of which flow rule was created. +* +* It's different from the port which really created the flow rule +* if and only if flow rule is created on transfer proxy port +* on behalf of representor port. +*/ struct rte_eth_dev *owner_dev; + /** Pointer to flow rule handle. */ struct rte_flow *flow; + /** Additional information about the control flow rule. */ + struct mlx5_hw_ctrl_flow_info info; }; /* diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h index 094be12715..d57b3b5465 100644 --- a/drivers/net/mlx5/mlx5_flow.h +++ b/drivers/net/mlx5/mlx5_flow.h @@ -2875,6 +2875,8 @@ int mlx5_flow_hw_flush_ctrl_flows(struct rte_eth_dev *dev); int mlx5_flow_hw_esw_create_sq_miss_flow(struct rte_eth_dev *dev, uint32_t sqn); +int mlx5_flow_hw_esw_destroy_sq_miss_flow(struct rte_eth_dev *dev, + uint32_t sqn); int mlx5_flow_hw_esw_create_default_jump_flow(struct rte_eth_dev *dev); int mlx5_flow_hw_create_tx_default_mreg_copy_flow(struct rte_eth_dev *dev); int mlx5_flow_hw_tx_repr_matching_flow(struct rte_eth_dev *dev, uint32_t sqn); diff --git a/drivers/net/mlx5/mlx5_flow_hw.c b/drivers/net/mlx5/mlx5_flow_hw.c index f57126e2ff..d512889682 100644 --- a/drivers/net/mlx5/mlx5_flow_hw.c +++ b/drivers/net/mlx5/mlx5_flow_hw.c @@ -11341,6 +11341,8 @@ const struct mlx5_flow_driver_ops mlx5_flow_hw_drv_ops = { * Pointer to flow rule actions. * @param action_template_idx * Index of an action template associated with @p table. + * @param info + * Additional info about control flow rule. * * @return * 0 on success, negative errno value otherwise and rte_errno set. @@ -11352,7 +11354,8 @@ flow_hw_create_ctrl
[PATCH 2/2] net/mlx5: fix destroying external representor matched flows
The external representor matched SQ flows are managed by external SQ, PMD traffic enable/disable should not touch these flows. This commit adds an extra external list for the external representor matched SQ flows. Fixes: 26e1eaf2dac4 ("net/mlx5: support device control for E-Switch default rule") Cc: sta...@dpdk.org Signed-off-by: Suanming Mou --- drivers/net/mlx5/mlx5.h | 1 + drivers/net/mlx5/mlx5_flow.h| 4 +-- drivers/net/mlx5/mlx5_flow_hw.c | 45 +++-- drivers/net/mlx5/mlx5_trigger.c | 4 +-- drivers/net/mlx5/mlx5_txq.c | 4 +-- 5 files changed, 39 insertions(+), 19 deletions(-) diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index 45ad0701f1..795748eddc 100644 --- a/drivers/net/mlx5/mlx5.h +++ b/drivers/net/mlx5/mlx5.h @@ -1855,6 +1855,7 @@ struct mlx5_priv { void *root_drop_action; /* Pointer to root drop action. */ rte_spinlock_t hw_ctrl_lock; LIST_HEAD(hw_ctrl_flow, mlx5_hw_ctrl_flow) hw_ctrl_flows; + LIST_HEAD(hw_ext_ctrl_flow, mlx5_hw_ctrl_flow) hw_ext_ctrl_flows; struct rte_flow_template_table *hw_esw_sq_miss_root_tbl; struct rte_flow_template_table *hw_esw_sq_miss_tbl; struct rte_flow_template_table *hw_esw_zero_tbl; diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h index d57b3b5465..8c0b9a4b60 100644 --- a/drivers/net/mlx5/mlx5_flow.h +++ b/drivers/net/mlx5/mlx5_flow.h @@ -2874,12 +2874,12 @@ int flow_null_counter_query(struct rte_eth_dev *dev, int mlx5_flow_hw_flush_ctrl_flows(struct rte_eth_dev *dev); int mlx5_flow_hw_esw_create_sq_miss_flow(struct rte_eth_dev *dev, -uint32_t sqn); +uint32_t sqn, bool external); int mlx5_flow_hw_esw_destroy_sq_miss_flow(struct rte_eth_dev *dev, uint32_t sqn); int mlx5_flow_hw_esw_create_default_jump_flow(struct rte_eth_dev *dev); int mlx5_flow_hw_create_tx_default_mreg_copy_flow(struct rte_eth_dev *dev); -int mlx5_flow_hw_tx_repr_matching_flow(struct rte_eth_dev *dev, uint32_t sqn); +int mlx5_flow_hw_tx_repr_matching_flow(struct rte_eth_dev *dev, uint32_t sqn, bool external); int mlx5_flow_actions_validate(struct rte_eth_dev *dev, const struct rte_flow_actions_template_attr *attr, const struct rte_flow_action actions[], diff --git a/drivers/net/mlx5/mlx5_flow_hw.c b/drivers/net/mlx5/mlx5_flow_hw.c index d512889682..8a23c7c281 100644 --- a/drivers/net/mlx5/mlx5_flow_hw.c +++ b/drivers/net/mlx5/mlx5_flow_hw.c @@ -9189,6 +9189,7 @@ flow_hw_configure(struct rte_eth_dev *dev, priv->nb_queue = nb_q_updated; rte_spinlock_init(&priv->hw_ctrl_lock); LIST_INIT(&priv->hw_ctrl_flows); + LIST_INIT(&priv->hw_ext_ctrl_flows); ret = flow_hw_create_ctrl_rx_tables(dev); if (ret) { rte_flow_error_set(error, -ret, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, @@ -11343,6 +11344,8 @@ const struct mlx5_flow_driver_ops mlx5_flow_hw_drv_ops = { * Index of an action template associated with @p table. * @param info * Additional info about control flow rule. + * @param external + * External ctrl flow. * * @return * 0 on success, negative errno value otherwise and rte_errno set. @@ -11355,7 +11358,8 @@ flow_hw_create_ctrl_flow(struct rte_eth_dev *owner_dev, uint8_t item_template_idx, struct rte_flow_action actions[], uint8_t action_template_idx, -struct mlx5_hw_ctrl_flow_info *info) +struct mlx5_hw_ctrl_flow_info *info, +bool external) { struct mlx5_priv *priv = proxy_dev->data->dev_private; uint32_t queue = CTRL_QUEUE_ID(priv); @@ -11406,7 +11410,10 @@ flow_hw_create_ctrl_flow(struct rte_eth_dev *owner_dev, entry->info = *info; else entry->info.type = MLX5_HW_CTRL_FLOW_TYPE_GENERAL; - LIST_INSERT_HEAD(&priv->hw_ctrl_flows, entry, next); + if (external) + LIST_INSERT_HEAD(&priv->hw_ext_ctrl_flows, entry, next); + else + LIST_INSERT_HEAD(&priv->hw_ctrl_flows, entry, next); rte_spinlock_unlock(&priv->hw_ctrl_lock); return 0; error: @@ -11580,11 +11587,23 @@ flow_hw_flush_all_ctrl_flows(struct rte_eth_dev *dev) mlx5_free(cf); cf = cf_next; } + cf = LIST_FIRST(&priv->hw_ext_ctrl_flows); + while (cf != NULL) { + cf_next = LIST_NEXT(cf, next); + ret = flow_hw_destroy_ctrl_flow(dev, cf->flow); + if (ret) { + rte_errno = ret; + return -ret; + } + LIST_REMOVE(cf, next); + mlx5_free(cf); + cf = cf_next; + } return 0; } int -mlx5_flow_hw_esw_crea
[Bug 1311] [dpdk-23.11] vhost_virtio_user_interrupt_with_power_monitor: Launch dpdk-l3fwd-power core dumped on SPR
https://bugs.dpdk.org/show_bug.cgi?id=1311 Bug ID: 1311 Summary: [dpdk-23.11] vhost_virtio_user_interrupt_with_power_monitor: Launch dpdk-l3fwd-power core dumped on SPR Product: DPDK Version: 23.11 Hardware: All OS: All Status: UNCONFIRMED Severity: normal Priority: Normal Component: vhost/virtio Assignee: dev@dpdk.org Reporter: linglix.c...@intel.com Target Milestone: --- [Environment] DPDK version: dpdk 23.11-rc1: 5f9426b061 OS: Ubuntu 22.04.3 LTS/5.15.0-84-generic Compiler: gcc version 11.4.0 Hardware platform: SPR(Intel(R) Xeon(R) Platinum 8490H) NIC hardware: Ethernet Controller E810-C for QSFP 1592 NIC firmware: 4.40 0x8001c2f1 1.3492.0 NIC kdriver: ice-1.13.0_rc99_4_gb8ad35697 [Test Setup] Steps to reproduce 1. ssh root@dpdk-yingya-spr2 2.bind NIC port to vfio-pci, ./usertools/dpdk-devbind.py -b vfio-pci 45:00.0 3. Launch vhost by below command:: ./x86_64-native-linuxapp-gcc/app/dpdk-testpmd -l 1-2 -n 4 --file-prefix=vhost --vdev 'net_vhost0,iface=./vhost-net,queues=1' -- -i --rxq=1 --txq=1 4. Launch virtio with l3fwd-power:: ./x86_64-native-linuxapp-gcc/examples/dpdk-l3fwd-power -l 3-4 -n 4 --no-pci --file-prefix=l3fwd-power --vdev=virtio_user0,path=./vhost-net --log-level='user1,7' -- -p 1 --config="(0,0,4)" --parse-ptype --pmd-mgmt=monitor [Show the output from the previous commands] EAL: Detected CPU lcores: 128 EAL: Detected NUMA nodes: 2 EAL: Detected static linkage of DPDK EAL: Multi-process socket /var/run/dpdk/l3fwd-power/mp_socket EAL: Selected IOVA mode 'VA' EAL: VFIO support initialized virtio_user_dev_init_mac(): (./vhost-net) No valid MAC in devargs or device, use random soft parse-ptype is enabled PMD power mgmt mode is enabled L3FWD_POWER: Selected operation mode: pmd mgmt Initializing port 0 ... Creating queues: nb_rxq=1 nb_txq=1... Port 0 modified RSS hash function based on hardware support,requested:0x20820 configured:0 Address:06:8F:52:89:DE:C0, Allocated mbuf pool on socket 0 LPM: Adding route 0x01010100 / 24 (0) LPM: Adding route 0x02010100 / 24 (1) LPM: Adding route 0x03010100 / 24 (2) LPM: Adding route 0x04010100 / 24 (3) LPM: Adding route 0x05010100 / 24 (4) LPM: Adding route 0x06010100 / 24 (5) LPM: Adding route 0x07010100 / 24 (6) LPM: Adding route 0x08010100 / 24 (7) txq=3,0,0 Initializing rx queues on lcore 3 ... Initializing rx queues on lcore 4 ... rxq=0,0,0 Port 0: softly parse packet type info Checking link statusdone Port 0 Link up at Unknown FDX Autoneg L3FWD_POWER: entering main telemetry loop on lcore 4 L3FWD_POWER: -- lcoreid=4 portid=0 rxqueueid=0 L3FWD_POWER: lcore 3 has nothing to do Segmentation fault (core dumped) [Expected Result] Launch dpdk-l3fwd-power normal. Regression Is this issue a regression: (Y/N) Y First bad commit 60943c04f3bcd0ed32a9844993d1182c917b978b Author: Tyler Retzlaff Date: Fri Aug 11 12:20:47 2023 -0700 eal/x86: use intrinsics for power management Inline assembly is not supported for MSVC x64 instead use _umonitor, _umwait and _tpause intrinsics. Signed-off-by: Tyler Retzlaff Acked-by: Morten Brørup Acked-by: Konstantin Ananyev test on SPR only The bad commit 60943c04f3 will build failed, fixed commit ac1d4db91e (eal/x86: fix build on systems with WAITPKG) -- You are receiving this mail because: You are the assignee for the bug.
Re: [PATCH v1 1/1] baseband/acc: fix for TB mode on VRB1
On 11/4/23 00:45, Nicolas Chautru wrote: The input size is computed incorrectly for the LDPC encoder TB processing. Fixes: e640f6cdfa84 ("baseband/acc200: add LDPC processing") Cc: sta...@dpdk.org Signed-off-by: Nicolas Chautru --- drivers/baseband/acc/rte_vrb_pmd.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/baseband/acc/rte_vrb_pmd.c b/drivers/baseband/acc/rte_vrb_pmd.c index ae230b828a..686e086a5c 100644 --- a/drivers/baseband/acc/rte_vrb_pmd.c +++ b/drivers/baseband/acc/rte_vrb_pmd.c @@ -2218,7 +2218,8 @@ vrb1_enqueue_ldpc_enc_one_op_tb(struct acc_queue *q, struct rte_bbdev_enc_op *op uint16_t init_enq_descs = enq_descs; uint32_t in_offset = 0, out_offset = 0; - input_len_B = ((op->ldpc_enc.basegraph == 1 ? 22 : 10) * op->ldpc_enc.z_c) >> 3; + input_len_B = ((op->ldpc_enc.basegraph == 1 ? 22 : 10) * op->ldpc_enc.z_c + - op->ldpc_enc.n_filler) >> 3; if (check_bit(op->ldpc_enc.op_flags, RTE_BBDEV_LDPC_CRC_24B_ATTACH)) input_len_B -= 3; Reviewed-by: Maxime Coquelin Thanks, Maxime
DPDK Release Status Meeting 2023-11-09
Release status meeting minutes 2023-11-09 = Agenda: * Release Dates * Subtrees * Roadmaps * LTS * Defects * Opens Participants: * AMD * ARM * Debian/Microsoft * Intel * Marvell * Nvidia * Red Hat Release Dates - The following are the current working dates for 23.11: * V1: 12 August 2023 * RC1: 11 October 2023 * RC2: 6 November 2023 * RC3: 13 November 2023 * RC4: 20 November 2023 * Release: 22 November 2023 Subtrees * next-net * RC2 released * Working on bugs/fixes. * next-net-intel * QA testing ongoing. * Some small fixes for post RC2. * next-net-mlx * No update. * next-net-mvl * QA testing ongoing. * Some small fixes to apply. * next-eventdev * QA testing ongoing. * Some small fixes to apply. * next-baseband * PR being prepared. * next-virtio * No update. * next-crypto * 8-9 patches for RC3 * Coccinelle script for Null checks. * Preparing PR. * main * RC2 released November 6th. * Provisional dates for the next phases of the release: * RC3: 13 November 2023 * RC4: 20 November 2023 * Release: 22 November 2023 Proposed Schedule for 2023 -- See notes above and http://core.dpdk.org/roadmap/#dates LTS --- LTS in the current cycle are in progress. * 22.11.3 - In progress * 21.11.6 - In progress * 20.11.10 - In progress * 19.11.15 * Will only be updated with CVE and critical fixes. * Distros * Debian 12 contains DPDK v22.11 * Ubuntu 22.04-LTS contains DPDK v21.11 * Ubuntu 23.04 contains DPDK v22.11 Defects --- * Bugzilla links, 'Bugs', added for hosted projects * https://www.dpdk.org/hosted-projects/ DPDK Release Status Meetings The DPDK Release Status Meeting is intended for DPDK Committers to discuss the status of the master tree and sub-trees, and for project managers to track progress or milestone dates. The meeting occurs on every Thursday at 9:30 UTC over Jitsi on https://meet.jit.si/DPDK You don't need an invite to join the meeting but if you want a calendar reminder just send an email to "John McNamara john.mcnam...@intel.com" for the invite.
[PATCH 2/2] app/test: fix uninitialized RSS configuration
Since RSS algorithm has been supported, and is checked in ethdev layer, it is better to initialize "struct rte_eth_rss_conf" before congiuring RSS. Otherwise, an error will occur. Fixes: 43b630244e7e ("app/test: add dynamic bonding RSS configuration") Fixes: bae3cfa520a7 ("ethdev: clarify RSS related fields usage") Cc: sta...@dpdk.org Signed-off-by: Jie Hai --- app/test/test_link_bonding_rssconf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/test/test_link_bonding_rssconf.c b/app/test/test_link_bonding_rssconf.c index cd94e9e5dced..3c9c82433507 100644 --- a/app/test/test_link_bonding_rssconf.c +++ b/app/test/test_link_bonding_rssconf.c @@ -324,7 +324,7 @@ test_propagate(void) uint8_t n; struct member_conf *port; uint8_t bond_rss_key[40]; - struct rte_eth_rss_conf bond_rss_conf; + struct rte_eth_rss_conf bond_rss_conf = {0}; int retval = 0; uint64_t rss_hf = 0; -- 2.30.0
[PATCH 0/2] app/test: fix unit test fail on RSS
This patch fixes some bugs on test_link_bonding_rssconf. Jie Hai (2): net/null: fix unit test fail on RSS app/test: fix uninitialized RSS configuration app/test/test_link_bonding_rssconf.c | 2 +- drivers/net/null/rte_eth_null.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) -- 2.30.0
[PATCH 1/2] net/null: fix unit test fail on RSS
The ethdev uses "dev_info->hash_key_size" to check RSS configuration. So drivers should report the correct info, This patch fixes it. For more details: https://bugs.dpdk.org/show_bug.cgi?id=1308 Fixes: bae3cfa520a7 ("ethdev: clarify RSS related fields usage") Cc: sta...@dpdk.org Signed-off-by: Jie Hai --- drivers/net/null/rte_eth_null.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/null/rte_eth_null.c b/drivers/net/null/rte_eth_null.c index d742bc415c8c..7c46004f1e33 100644 --- a/drivers/net/null/rte_eth_null.c +++ b/drivers/net/null/rte_eth_null.c @@ -316,6 +316,7 @@ eth_dev_info(struct rte_eth_dev *dev, dev_info->min_rx_bufsize = 0; dev_info->reta_size = internals->reta_size; dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads; + dev_info->hash_key_size = sizeof(internals->rss_key); return 0; } -- 2.30.0
Re: [PATCH v3 10/10] test/bbdev: update python script parameters
On 11/4/23 00:34, Nicolas Chautru wrote: +# This will become -t option for iter_max in next release +parser.add_argument("-x", "--iter-max", +type=int, +help="Max iterations", +default=6) No need for "-x", it is possible to only have the long name: parser.add_argument("--iter-max", type=int, help="Max iterations", default=6) If that works for you, I can do the change while applying. Regards, Maxime
Re: [PATCH v3 2/2] doc: update FlexRAN SDK links
On 10/28/23 00:03, Nicolas Chautru wrote: From: Hernan Vargas Update FlexRAN SDK module link to use FEC_SDK_23.07. Update compiler links to use ICX. Update build SDK build instructions. Signed-off-by: Hernan Vargas --- doc/guides/bbdevs/turbo_sw.rst | 53 +++--- 1 file changed, 30 insertions(+), 23 deletions(-) Reviewed-by: Maxime Coquelin Thanks, Maxime
RFC: default burst sizes in rte_config
> From: Morten Brørup [mailto:m...@smartsharesystems.com] > Sent: Wednesday, 8 November 2023 18.49 > > > From: Stephen Hemminger [mailto:step...@networkplumber.org] > > Sent: Wednesday, 8 November 2023 17.52 > > [...] > > > > Would it make sense to have an rte_config.h value for maximum burst > > size? > > I would support that! It would also be a good place to document the reasoning behind the choice of burst size, so application developers can better understand how to fine tune the values according to available hardware and application specific requirements. Those build-time configurable values should also be used by DPDK libraries, instead of more or less randomly chosen hardcoded burst sizes. E.g. when I implemented rte_pktmbuf_free_bulk(), I considered 64 plenty of burst capacity, because it was double the size of the traditional burst size of 32. But it is probably sub-optimal for applications using a default burst size of 128. > There could be a few burst size defines, e.g. > > - SMALL: used for small bursts (I think some drivers use bursts of 8) The reason for choosing 8 is probably rooted in cache alignment: Eight 64-bit pointers covers one cache line. I wonder if those drivers would perform better using bursts of 16 mbufs on 32-bit architectures, or on 64-bit architectures with 128 B cache line size? > - NORMAL: used for typical bursts This is usually a balance between latency and throughput: Using shorter bursts can reduce the latency (if the application is designed with this in mind). Using larger bursts improves processing performance, and thus increases throughput. There is also some upper limit: If the burst is too large, the amount of memory touched by a pipeline stage might not fit into the CPU data cache size, and performance drops like a rock. E.g. a CPU with 64 B cache line size and 32 KB L1 data cache per lcore can hold 512 cache lines in its L1 data cache, so a burst of 32 mbufs allows touching an average of 512/32 = 16 cache lines per packet. The mbuf structure itself uses 2 cache lines, so the max theoretical burst would be 512/2 = 256 if no other memory was touched. However, the array holding the mbuf pointers is also touched, so I would put 128 as the largest good burst size on such a CPU. > - LARGE: used for large bursts, e.g. mempool cache flush If kept at 512, like the magnitude of the mempool cache flushes/refills, it should only be used for moving mbuf pointers around, without touching the mbufs themselves, or the CPU's L1 data cache will overflow. > > Having these available at build time would also allow more > optimizations in DPDK libs and drivers for those specific burst sizes.
[RESEND v7 2/3] ring: add telemetry cmd to list rings
Add a telemetry command to list the rings used in the system. An example using this command is shown below: --> /ring/list { "/ring/list": [ "HT_:7d:00.2", "MP_mb_pool_0" ] } Signed-off-by: Jie Hai Acked-by: Konstantin Ananyev Reviewed-by: Honnappa Nagarahalli Acked-by: Huisong Li Acked-by: Chengwen Feng Acked-by: Morten Brørup --- lib/ring/meson.build | 1 + lib/ring/rte_ring.c | 40 2 files changed, 41 insertions(+) diff --git a/lib/ring/meson.build b/lib/ring/meson.build index c20685c689ac..7fca958ed7fa 100644 --- a/lib/ring/meson.build +++ b/lib/ring/meson.build @@ -18,3 +18,4 @@ indirect_headers += files ( 'rte_ring_rts.h', 'rte_ring_rts_elem_pvt.h', ) +deps += ['telemetry'] diff --git a/lib/ring/rte_ring.c b/lib/ring/rte_ring.c index 057d25ff6f2f..6a10280fb093 100644 --- a/lib/ring/rte_ring.c +++ b/lib/ring/rte_ring.c @@ -22,6 +22,7 @@ #include #include #include +#include #include "rte_ring.h" #include "rte_ring_elem.h" @@ -418,3 +419,42 @@ rte_ring_lookup(const char *name) return r; } + +static void +ring_walk(void (*func)(struct rte_ring *, void *), void *arg) +{ + struct rte_ring_list *ring_list; + struct rte_tailq_entry *tailq_entry; + + ring_list = RTE_TAILQ_CAST(rte_ring_tailq.head, rte_ring_list); + rte_mcfg_tailq_read_lock(); + + TAILQ_FOREACH(tailq_entry, ring_list, next) { + (*func)((struct rte_ring *) tailq_entry->data, arg); + } + + rte_mcfg_tailq_read_unlock(); +} + +static void +ring_list_cb(struct rte_ring *r, void *arg) +{ + struct rte_tel_data *d = (struct rte_tel_data *)arg; + + rte_tel_data_add_array_string(d, r->name); +} + +static int +ring_handle_list(const char *cmd __rte_unused, + const char *params __rte_unused, struct rte_tel_data *d) +{ + rte_tel_data_start_array(d, RTE_TEL_STRING_VAL); + ring_walk(ring_list_cb, d); + return 0; +} + +RTE_INIT(ring_init_telemetry) +{ + rte_telemetry_register_cmd("/ring/list", ring_handle_list, + "Returns list of available rings. Takes no parameters"); +} -- 2.30.0
[RESEND v7 3/3] ring: add telemetry cmd for ring info
This patch supports dump of ring information by its name. An example using this command is shown below: --> /ring/info,MP_mb_pool_0 { "/ring/info": { "name": "MP_mb_pool_0", "socket": 0, "flags": "0x0", "producer_type": "MP", "consumer_type": "MC", "size": 262144, "mask": "0x3", "capacity": 262143, "used_count": 153197, "mz_name": "RG_MP_mb_pool_0", "mz_len": 2097536, "mz_hugepage_sz": 1073741824, "mz_socket_id": 0, "mz_flags": "0x0" } } Signed-off-by: Jie Hai Reviewed-by: Honnappa Nagarahalli Acked-by: Konstantin Ananyev Acked-by: Huisong Li Acked-by: Chengwen Feng Acked-by: Morten Brørup --- lib/ring/rte_ring.c | 95 + 1 file changed, 95 insertions(+) diff --git a/lib/ring/rte_ring.c b/lib/ring/rte_ring.c index 6a10280fb093..5ee15037548e 100644 --- a/lib/ring/rte_ring.c +++ b/lib/ring/rte_ring.c @@ -453,8 +453,103 @@ ring_handle_list(const char *cmd __rte_unused, return 0; } +static const char * +ring_prod_sync_type_to_name(struct rte_ring *r) +{ + switch (r->prod.sync_type) { + case RTE_RING_SYNC_MT: + return "MP"; + case RTE_RING_SYNC_ST: + return "SP"; + case RTE_RING_SYNC_MT_RTS: + return "MP_RTS"; + case RTE_RING_SYNC_MT_HTS: + return "MP_HTS"; + default: + return "Unknown"; + } +} + +static const char * +ring_cons_sync_type_to_name(struct rte_ring *r) +{ + switch (r->cons.sync_type) { + case RTE_RING_SYNC_MT: + return "MC"; + case RTE_RING_SYNC_ST: + return "SC"; + case RTE_RING_SYNC_MT_RTS: + return "MC_RTS"; + case RTE_RING_SYNC_MT_HTS: + return "MC_HTS"; + default: + return "Unknown"; + } +} + +struct ring_info_cb_arg { + char *ring_name; + struct rte_tel_data *d; +}; + +static void +ring_info_cb(struct rte_ring *r, void *arg) +{ + struct ring_info_cb_arg *ring_arg = (struct ring_info_cb_arg *)arg; + struct rte_tel_data *d = ring_arg->d; + const struct rte_memzone *mz; + + if (strncmp(r->name, ring_arg->ring_name, RTE_RING_NAMESIZE)) + return; + + rte_tel_data_add_dict_string(d, "name", r->name); + rte_tel_data_add_dict_int(d, "socket", r->memzone->socket_id); + rte_tel_data_add_dict_uint_hex(d, "flags", r->flags, 0); + rte_tel_data_add_dict_string(d, "producer_type", + ring_prod_sync_type_to_name(r)); + rte_tel_data_add_dict_string(d, "consumer_type", + ring_cons_sync_type_to_name(r)); + rte_tel_data_add_dict_uint(d, "size", r->size); + rte_tel_data_add_dict_uint_hex(d, "mask", r->mask, 0); + rte_tel_data_add_dict_uint(d, "capacity", r->capacity); + rte_tel_data_add_dict_uint(d, "used_count", rte_ring_count(r)); + + mz = r->memzone; + if (mz == NULL) + return; + rte_tel_data_add_dict_string(d, "mz_name", mz->name); + rte_tel_data_add_dict_uint(d, "mz_len", mz->len); + rte_tel_data_add_dict_uint(d, "mz_hugepage_sz", mz->hugepage_sz); + rte_tel_data_add_dict_int(d, "mz_socket_id", mz->socket_id); + rte_tel_data_add_dict_uint_hex(d, "mz_flags", mz->flags, 0); +} + +static int +ring_handle_info(const char *cmd __rte_unused, const char *params, + struct rte_tel_data *d) +{ + char name[RTE_RING_NAMESIZE] = {0}; + struct ring_info_cb_arg ring_arg; + + if (params == NULL || strlen(params) == 0 || + strlen(params) >= RTE_RING_NAMESIZE) + return -EINVAL; + + rte_strlcpy(name, params, RTE_RING_NAMESIZE); + + ring_arg.ring_name = name; + ring_arg.d = d; + + rte_tel_data_start_dict(d); + ring_walk(ring_info_cb, &ring_arg); + + return 0; +} + RTE_INIT(ring_init_telemetry) { rte_telemetry_register_cmd("/ring/list", ring_handle_list, "Returns list of available rings. Takes no parameters"); + rte_telemetry_register_cmd("/ring/info", ring_handle_info, + "Returns ring info. Parameters: ring_name."); } -- 2.30.0
[RESEND v7 1/3] ring: fix unmatched type definition and usage
Field 'flags' of struct rte_ring is defined as int type. However, it is used as unsigned int. To ensure consistency, change the type of flags to unsigned int. Since these two types has the same byte size, this change is not an ABI change. Fixes: af75078fece3 ("first public release") Signed-off-by: Jie Hai Acked-by: Konstantin Ananyev Acked-by: Chengwen Feng Acked-by: Morten Brørup --- lib/ring/rte_ring_core.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/ring/rte_ring_core.h b/lib/ring/rte_ring_core.h index b7708730658a..14dac6495d83 100644 --- a/lib/ring/rte_ring_core.h +++ b/lib/ring/rte_ring_core.h @@ -119,7 +119,7 @@ struct rte_ring_hts_headtail { struct rte_ring { char name[RTE_RING_NAMESIZE] __rte_cache_aligned; /**< Name of the ring. */ - int flags; /**< Flags supplied at creation. */ + uint32_t flags; /**< Flags supplied at creation. */ const struct rte_memzone *memzone; /**< Memzone, if any, containing the rte_ring */ uint32_t size; /**< Size of ring. */ -- 2.30.0
[RESEND v7 0/3] add telemetry cmds for ring
This patch set supports telemetry cmd to list rings and dump information of a ring by its name. v1->v2: 1. Add space after "switch". 2. Fix wrong strlen parameter. v2->v3: 1. Remove prefix "rte_" for static function. 2. Add Acked-by Konstantin Ananyev for PATCH 1. 3. Introduce functions to return strings instead copy strings. 4. Check pointer to memzone of ring. 5. Remove redundant variable. 6. Hold lock when access ring data. v3->v4: 1. Update changelog according to reviews of Honnappa Nagarahalli. 2. Add Reviewed-by Honnappa Nagarahalli. 3. Correct grammar in help information. 4. Correct spell warning on "te" reported by checkpatch.pl. 5. Use ring_walk() to query ring info instead of rte_ring_lookup(). 6. Fix that type definition the flag field of rte_ring does not match the usage. 7. Use rte_tel_data_add_dict_uint_hex instead of rte_tel_data_add_dict_u64 for mask and flags. v4->v5: 1. Add Acked-by Konstantin Ananyev and Chengwen Feng. 2. Add ABI change explanation for commit message of patch 1/3. v5->v6: 1. Add Acked-by Morten Br?rup. 2. Fix incorrect reference of commit. v6->v7: 1. Remove prod/consumer head/tail info. Jie Hai (3): ring: fix unmatched type definition and usage ring: add telemetry cmd to list rings ring: add telemetry cmd for ring info lib/ring/meson.build | 1 + lib/ring/rte_ring.c | 135 +++ lib/ring/rte_ring_core.h | 2 +- 3 files changed, 137 insertions(+), 1 deletion(-) -- 2.30.0
Next year of LTSes
Hi, We should start thinking about the next year of LTSes and how to organize. 20.11 will be EOL after the next upcoming release being prepared now, as it's now 3 years old. So long, and thanks for all the fixes. Currently Kevin is taking care of 21.11 and Xueming of 22.11. Xueming, what do you think about, from next year (ie, after the current set is done) me taking over 22.11, and you switching to 23.11? The reason is that Debian uses "even" releases due to release cadence, so just like I had vested interest for 20.11 for Debian 11, Debian 12 uses 22.11. Kind regards, Luca Boccassi
Re: [PATCH] event/dlb2: fix name check in selftest
On Wed, Nov 8, 2023 at 7:02 AM Sevincer, Abdullah wrote: > > Acked-by: Abdullah Sevincer Applied to dpdk-next-eventdev/for-main. Thanks
RE: [RFC] mempool: CPU cache aligning mempool driver accesses
+TO: Andrew, mempool maintainer > From: Morten Brørup [mailto:m...@smartsharesystems.com] > Sent: Monday, 6 November 2023 11.29 > > > From: Bruce Richardson [mailto:bruce.richard...@intel.com] > > Sent: Monday, 6 November 2023 10.45 > > > > On Sat, Nov 04, 2023 at 06:29:40PM +0100, Morten Brørup wrote: > > > I tried a little experiment, which gave a 25 % improvement in > mempool > > > perf tests for long bursts (n_get_bulk=32 n_put_bulk=32 n_keep=512 > > > constant_n=0) on a Xeon E5-2620 v4 based system. > > > > > > This is the concept: > > > > > > If all accesses to the mempool driver goes through the mempool > cache, > > > we can ensure that these bulk load/stores are always CPU cache > > aligned, > > > by using cache->size when loading/storing to the mempool driver. > > > > > > Furthermore, it is rumored that most applications use the default > > > mempool cache size, so if the driver tests for that specific value, > > > it can use rte_memcpy(src,dst,N) with N known at build time, > allowing > > > optimal performance for copying the array of objects. > > > > > > Unfortunately, I need to change the flush threshold from 1.5 to 2 > to > > > be able to always use cache->size when loading/storing to the > mempool > > > driver. > > > > > > What do you think? It's the concept of accessing the underlying mempool in entire cache lines I am seeking feedback for. The provided code is just an example, mainly for testing performance of the concept. > > > > > > PS: If we can't get rid of the mempool cache size threshold factor, > > > we really need to expose it through public APIs. A job for another > > day. The concept that a mempool per-lcore cache can hold more objects than its size is extremely weird, and certainly unexpected by any normal developer. And thus it is likely to cause runtime errors for applications designing tightly sized mempools. So, if we move forward with this RFC, I propose eliminating the threshold factor, so the mempool per-lcore caches cannot hold more objects than their size. When doing this, we might also choose to double RTE_MEMPOOL_CACHE_MAX_SIZE, to prevent any performance degradation. > > > > > > Signed-off-by: Morten Brørup > > > --- > > Interesting, thanks. > > > > Out of interest, is there any different in performance you observe if > > using > > regular libc memcpy vs rte_memcpy for the ring copies? Since the copy > > amount is constant, a regular memcpy call should be expanded by the > > compiler itself, and so should be pretty efficient. > > I ran some tests without patching rte_ring_elem_pvt.h, i.e. without > introducing the constant-size copy loop. I got the majority of the > performance gain at this point. > > At this point, both pointers are CPU cache aligned when refilling the > mempool cache, and the destination pointer is CPU cache aligned when > draining the mempool cache. > > In other words: When refilling the mempool cache, it is both loading > and storing entire CPU cache lines. And when draining, it is storing > entire CPU cache lines. > > > Adding the fixed-size copy loop provided an additional performance > gain. I didn't test other constant-size copy methods than rte_memcpy. > > rte_memcpy should have optimal conditions in this patch, because N is > known to be 512 * 8 = 4 KiB at build time. Furthermore, both pointers > are CPU cache aligned when refilling the mempool cache, and the > destination pointer is CPU cache aligned when draining the mempool > cache. I don't recall if pointer alignment matters for rte_memcpy, > though. > > The memcpy in libc (or more correctly: intrinsic to the compiler) will > do non-temporal copying for large sizes, and I don't know what that > threshold is, so I think rte_memcpy is the safe bet here. Especially if > someone builds DPDK with a larger mempool cache size than 512 objects. > > On the other hand, non-temporal access to the objects in the ring might > be beneficial if the ring is so large that they go cold before the > application loads them from the ring again.
Re: [PATCH v3 1/2] baseband/acc: support ACC100 deRM corner case SDK
On 10/28/23 00:03, Nicolas Chautru wrote: From: Hernan Vargas Implement de-ratematch pre-processing for ACC100 SW corner cases. Some specific 5GUL FEC corner cases may cause unintended back pressure and in some cases a potential stability issue on the ACC100. The PMD can detect such code block configuration and issue an info message to the user. Signed-off-by: Hernan Vargas Signed-off-by: Nicolas Chautru --- drivers/baseband/acc/meson.build | 21 ++ drivers/baseband/acc/rte_acc100_pmd.c | 59 +-- 2 files changed, 76 insertions(+), 4 deletions(-) Reviewed-by: Maxime Coquelin Thanks, Maxime
[PATCH] doc/rel_notes: add note on libarchive dependencies
Since DPDK now registers an explicit dependency on libarchive, rather than just putting -larchive in link args, we need to add a documentation note about potential missing dependencies when static linking. Signed-off-by: Bruce Richardson --- doc/guides/rel_notes/release_23_11.rst | 10 ++ 1 file changed, 10 insertions(+) diff --git a/doc/guides/rel_notes/release_23_11.rst b/doc/guides/rel_notes/release_23_11.rst index ff28083e1c..0b33665868 100644 --- a/doc/guides/rel_notes/release_23_11.rst +++ b/doc/guides/rel_notes/release_23_11.rst @@ -75,6 +75,16 @@ New Features which also added support for standard atomics (Ref: https://releases.llvm.org/3.6.0/tools/clang/docs/ReleaseNotes.html) +* **Extra dependencies when linking against libarchive** + + When the libarchive development package is present on the system, + DPDK will use libarchive and register a dependency on it. + However, on a number of Linux distributions, including, for example, Fedora and Ubuntu, + installing the libarchive dev package does not cause all required dependencies for static linking to be automatically installed too. + These additional dev packages, such as ``liblz4-dev`` and ``libacl1-dev`` on Ubuntu, + will need to be installed manually (if not already present) + to prevent errors with linking against DPDK static libraries. + * **Added new build options.** * Enabling deprecated libraries is now done using -- 2.39.2
Re: [PATCH v4] app/test: append 'allow' parameters to secondary processes
On Wed, Sep 27, 2023 at 03:42:04AM +, Mingjin Ye wrote: > The 'allow' parameters are not passed to the secondary process, and all > devices will be loaded by default. When the primary process specifies > the 'allow' parameters and does not specify all devices that use the > vfio-pci driver, the secondary process will not load the devices as > expected, which will return incorrect test results. > > This patch fixes this issue by appending the 'allow' parameters to the > secondary process. > > Fixes: 086eb64db39e ("test/pdump: add unit test for pdump library") > Fixes: 148f963fb532 ("xen: core library changes") > Fixes: af75078fece3 ("first public release") > Fixes: b8d5e544e73e ("test: add procfs error message for multi-process > launch") > Cc: sta...@dpdk.org > > Signed-off-by: Mingjin Ye > --- > v4: Resolve patch conflicts and optimize code. No issue with the idea of this fix, some impovement suggestions inline below. Once addressed: Acked-by: Bruce Richardson > --- > app/test/process.h | 60 +++--- > 1 file changed, 57 insertions(+), 3 deletions(-) > > diff --git a/app/test/process.h b/app/test/process.h > index af7bc3e0de..1cdf28752e 100644 > --- a/app/test/process.h > +++ b/app/test/process.h > @@ -18,6 +18,8 @@ > > #include /* strlcpy */ > > +#include > + > #ifdef RTE_EXEC_ENV_FREEBSD > #define self "curproc" > #define exe "file" > @@ -34,6 +36,47 @@ extern uint16_t flag_for_send_pkts; > #endif > #endif > > +#define PREFIX_ALLOW "--allow" > + > +static int > +add_parameter_allow(char **argv, int max_capacity) > +{ > + struct rte_devargs *devargs; > + int count = 0; > + int name_length, data_length; > + char *dev; > + int malloc_size; > + > + RTE_EAL_DEVARGS_FOREACH(NULL, devargs) { > + if (count >= max_capacity) Since you need two slots for each element, you need to check that count+1 < max_capacity. However, a better solution is to have just one flag for each dev-args. So instead of returning an array with e.g. ["--allow", "a8:00.0", "--allow", "b8:00.0"] instead use the "--allow=" syntax to merge elements, returning: ["--allow=a8:00.0", "--allow=b8:00.0"] this would make your capacity check correct. > + return count; > + > + name_length = strlen(devargs->name); > + if (name_length == 0) > + continue; > + > + if (devargs->data != NULL) > + data_length = strlen(devargs->data); > + else > + data_length = 0; Minor suggestion. Rather than having an else leg here, just move the definitions of name_length and data_length inside the loop, so you can zero-initialize it each time. An even simpler option might be to just use a single malloc_size variable as you don't need the others: malloc_size = strlen(devargs->name) + 1; if (malloc_size == 0) continue; malloc_size += strlen(PREFIX_ALLOW); /* which now has the "=" */ if (devargs->data != NULL) malloc_size += strlen(devargs->data) > + > + malloc_size = name_length + data_length + 1; > + dev = malloc(malloc_size); > + Check for NULL return from malloc. > + memcpy(dev, devargs->name, name_length); > + if (data_length > 0) > + memcpy(dev + name_length, devargs->data, data_length); > + memset(dev + malloc_size - 1, 0, 1); > + > + *(argv + count) = strdup(PREFIX_ALLOW); > + count++; > + > + *(argv + count) = dev; > + count++; > + } > + return count; > +} > + > /* > * launches a second copy of the test process using the given argv > parameters, > * which should include argv[0] as the process name. To identify in the > @@ -44,7 +87,9 @@ static inline int > process_dup(const char *const argv[], int numargs, const char *env_value) > { > int num; > - char *argv_cpy[numargs + 1]; > + char **argv_cpy; > + unsigned int allow_num; > + unsigned int argv_num; > int i, status; > char path[32]; > #ifdef RTE_LIB_PDUMP > @@ -58,12 +103,15 @@ process_dup(const char *const argv[], int numargs, const > char *env_value) > if (pid < 0) > return -1; > else if (pid == 0) { > + allow_num = rte_devargs_type_count(RTE_DEVTYPE_ALLOWED); > + argv_num = numargs + allow_num * 2 + 1; > + argv_cpy = malloc(argv_num * sizeof(char *)); > /* make a copy of the arguments to be passed to exec */ > for (i = 0; i < numargs; i++) > argv_cpy[i] = strdup(argv[i]); > - argv_cpy[i] = NULL; > num = numargs; > - > + num += add_parameter_allow(&argv_cpy[i], allow_num * 2); If merging flags, you can remove the *2 here and in the argv_num = line. > + argv_cpy[argv_num
RE: [PATCH v4] net/iavf: fix mbuf release API selection
> -Original Message- > From: Kaiwen Deng > Sent: Thursday, November 9, 2023 12:59 PM > To: dev@dpdk.org > Cc: sta...@dpdk.org; Yang, Qiming ; Zhou, YidingX > ; Deng, KaiwenX ; Wu, > Jingjing ; Xing, Beilei ; Zeng, > ZhichaoX > Subject: [PATCH v4] net/iavf: fix mbuf release API selection > > When AVX2 is forcibly selected and outer checksum offload is configured, the > basic Tx path will be selected. Also, the txq mbuf release API is incorrectly > set > to iavf_tx_queue_release_mbufs_sse. This causes coredump. > > This commit selects release_txq_mbufs to releasing txq mbufs when selecting > the basic Tx path. > > Fixes: 22f1e7608ebc ("net/iavf: fix AVX2 Tx selection") > Cc: sta...@dpdk.org > > Signed-off-by: Kaiwen Deng Acked-by: Qi Zhang Applied to dpdk-next-net-intel. Thanks Qi
Re: [RESEND v7 1/3] ring: fix unmatched type definition and usage
Acked-by: Huisong Li 在 2023/11/9 18:20, Jie Hai 写道: Field 'flags' of struct rte_ring is defined as int type. However, it is used as unsigned int. To ensure consistency, change the type of flags to unsigned int. Since these two types has the same byte size, this change is not an ABI change. Fixes: af75078fece3 ("first public release") Signed-off-by: Jie Hai Acked-by: Konstantin Ananyev Acked-by: Chengwen Feng Acked-by: Morten Brørup --- lib/ring/rte_ring_core.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/ring/rte_ring_core.h b/lib/ring/rte_ring_core.h index b7708730658a..14dac6495d83 100644 --- a/lib/ring/rte_ring_core.h +++ b/lib/ring/rte_ring_core.h @@ -119,7 +119,7 @@ struct rte_ring_hts_headtail { struct rte_ring { char name[RTE_RING_NAMESIZE] __rte_cache_aligned; /**< Name of the ring. */ - int flags; /**< Flags supplied at creation. */ + uint32_t flags; /**< Flags supplied at creation. */ const struct rte_memzone *memzone; /**< Memzone, if any, containing the rte_ring */ uint32_t size; /**< Size of ring. */
Re: [RESEND v7 2/3] ring: add telemetry cmd to list rings
Acked-by: Huisong Li 在 2023/11/9 18:20, Jie Hai 写道: Add a telemetry command to list the rings used in the system. An example using this command is shown below: --> /ring/list { "/ring/list": [ "HT_:7d:00.2", "MP_mb_pool_0" ] } Signed-off-by: Jie Hai Acked-by: Konstantin Ananyev Reviewed-by: Honnappa Nagarahalli Acked-by: Huisong Li Acked-by: Chengwen Feng Acked-by: Morten Brørup --- lib/ring/meson.build | 1 + lib/ring/rte_ring.c | 40 2 files changed, 41 insertions(+) diff --git a/lib/ring/meson.build b/lib/ring/meson.build index c20685c689ac..7fca958ed7fa 100644 --- a/lib/ring/meson.build +++ b/lib/ring/meson.build @@ -18,3 +18,4 @@ indirect_headers += files ( 'rte_ring_rts.h', 'rte_ring_rts_elem_pvt.h', ) +deps += ['telemetry'] diff --git a/lib/ring/rte_ring.c b/lib/ring/rte_ring.c index 057d25ff6f2f..6a10280fb093 100644 --- a/lib/ring/rte_ring.c +++ b/lib/ring/rte_ring.c @@ -22,6 +22,7 @@ #include #include #include +#include #include "rte_ring.h" #include "rte_ring_elem.h" @@ -418,3 +419,42 @@ rte_ring_lookup(const char *name) return r; } + +static void +ring_walk(void (*func)(struct rte_ring *, void *), void *arg) +{ + struct rte_ring_list *ring_list; + struct rte_tailq_entry *tailq_entry; + + ring_list = RTE_TAILQ_CAST(rte_ring_tailq.head, rte_ring_list); + rte_mcfg_tailq_read_lock(); + + TAILQ_FOREACH(tailq_entry, ring_list, next) { + (*func)((struct rte_ring *) tailq_entry->data, arg); + } + + rte_mcfg_tailq_read_unlock(); +} + +static void +ring_list_cb(struct rte_ring *r, void *arg) +{ + struct rte_tel_data *d = (struct rte_tel_data *)arg; + + rte_tel_data_add_array_string(d, r->name); +} + +static int +ring_handle_list(const char *cmd __rte_unused, + const char *params __rte_unused, struct rte_tel_data *d) +{ + rte_tel_data_start_array(d, RTE_TEL_STRING_VAL); + ring_walk(ring_list_cb, d); + return 0; +} + +RTE_INIT(ring_init_telemetry) +{ + rte_telemetry_register_cmd("/ring/list", ring_handle_list, + "Returns list of available rings. Takes no parameters"); +}
Re: [RESEND v7 3/3] ring: add telemetry cmd for ring info
Acked-by: Huisong Li 在 2023/11/9 18:20, Jie Hai 写道: This patch supports dump of ring information by its name. An example using this command is shown below: --> /ring/info,MP_mb_pool_0 { "/ring/info": { "name": "MP_mb_pool_0", "socket": 0, "flags": "0x0", "producer_type": "MP", "consumer_type": "MC", "size": 262144, "mask": "0x3", "capacity": 262143, "used_count": 153197, "mz_name": "RG_MP_mb_pool_0", "mz_len": 2097536, "mz_hugepage_sz": 1073741824, "mz_socket_id": 0, "mz_flags": "0x0" } } Signed-off-by: Jie Hai Reviewed-by: Honnappa Nagarahalli Acked-by: Konstantin Ananyev Acked-by: Huisong Li Acked-by: Chengwen Feng Acked-by: Morten Brørup --- lib/ring/rte_ring.c | 95 + 1 file changed, 95 insertions(+) diff --git a/lib/ring/rte_ring.c b/lib/ring/rte_ring.c index 6a10280fb093..5ee15037548e 100644 --- a/lib/ring/rte_ring.c +++ b/lib/ring/rte_ring.c @@ -453,8 +453,103 @@ ring_handle_list(const char *cmd __rte_unused, return 0; } +static const char * +ring_prod_sync_type_to_name(struct rte_ring *r) +{ + switch (r->prod.sync_type) { + case RTE_RING_SYNC_MT: + return "MP"; + case RTE_RING_SYNC_ST: + return "SP"; + case RTE_RING_SYNC_MT_RTS: + return "MP_RTS"; + case RTE_RING_SYNC_MT_HTS: + return "MP_HTS"; + default: + return "Unknown"; + } +} + +static const char * +ring_cons_sync_type_to_name(struct rte_ring *r) +{ + switch (r->cons.sync_type) { + case RTE_RING_SYNC_MT: + return "MC"; + case RTE_RING_SYNC_ST: + return "SC"; + case RTE_RING_SYNC_MT_RTS: + return "MC_RTS"; + case RTE_RING_SYNC_MT_HTS: + return "MC_HTS"; + default: + return "Unknown"; + } +} + +struct ring_info_cb_arg { + char *ring_name; + struct rte_tel_data *d; +}; + +static void +ring_info_cb(struct rte_ring *r, void *arg) +{ + struct ring_info_cb_arg *ring_arg = (struct ring_info_cb_arg *)arg; + struct rte_tel_data *d = ring_arg->d; + const struct rte_memzone *mz; + + if (strncmp(r->name, ring_arg->ring_name, RTE_RING_NAMESIZE)) + return; + + rte_tel_data_add_dict_string(d, "name", r->name); + rte_tel_data_add_dict_int(d, "socket", r->memzone->socket_id); + rte_tel_data_add_dict_uint_hex(d, "flags", r->flags, 0); + rte_tel_data_add_dict_string(d, "producer_type", + ring_prod_sync_type_to_name(r)); + rte_tel_data_add_dict_string(d, "consumer_type", + ring_cons_sync_type_to_name(r)); + rte_tel_data_add_dict_uint(d, "size", r->size); + rte_tel_data_add_dict_uint_hex(d, "mask", r->mask, 0); + rte_tel_data_add_dict_uint(d, "capacity", r->capacity); + rte_tel_data_add_dict_uint(d, "used_count", rte_ring_count(r)); + + mz = r->memzone; + if (mz == NULL) + return; + rte_tel_data_add_dict_string(d, "mz_name", mz->name); + rte_tel_data_add_dict_uint(d, "mz_len", mz->len); + rte_tel_data_add_dict_uint(d, "mz_hugepage_sz", mz->hugepage_sz); + rte_tel_data_add_dict_int(d, "mz_socket_id", mz->socket_id); + rte_tel_data_add_dict_uint_hex(d, "mz_flags", mz->flags, 0); +} + +static int +ring_handle_info(const char *cmd __rte_unused, const char *params, + struct rte_tel_data *d) +{ + char name[RTE_RING_NAMESIZE] = {0}; + struct ring_info_cb_arg ring_arg; + + if (params == NULL || strlen(params) == 0 || + strlen(params) >= RTE_RING_NAMESIZE) + return -EINVAL; + + rte_strlcpy(name, params, RTE_RING_NAMESIZE); + + ring_arg.ring_name = name; + ring_arg.d = d; + + rte_tel_data_start_dict(d); + ring_walk(ring_info_cb, &ring_arg); + + return 0; +} + RTE_INIT(ring_init_telemetry) { rte_telemetry_register_cmd("/ring/list", ring_handle_list, "Returns list of available rings. Takes no parameters"); + rte_telemetry_register_cmd("/ring/info", ring_handle_info, + "Returns ring info. Parameters: ring_name."); }
[Bug 1312] When iterating through the mbufs, mbuf->nb_segs indicates there are 21 segments, but when reaching the 8th mbuf, its mbuf->next pointer is NULL
https://bugs.dpdk.org/show_bug.cgi?id=1312 Bug ID: 1312 Summary: When iterating through the mbufs, mbuf->nb_segs indicates there are 21 segments, but when reaching the 8th mbuf, its mbuf->next pointer is NULL Product: DPDK Version: 20.11 Hardware: ARM OS: Linux Status: UNCONFIRMED Severity: normal Priority: Normal Component: core Assignee: dev@dpdk.org Reporter: tingsong.zh...@gmail.com Target Milestone: --- The version of DPDK is 20.11. When iterating through the mbufs, mbuf->nb_segs indicates there are 21 segments, but when reaching the 8th mbuf, its mbuf->next pointer is NULL. I used ASAN tool to check and found no memory out-of-bounds errors before encountering the NULL value in the mbuf's 'next' pointer. My scenario involves sending approximately 30,000 large packets using a sender via the kernel protocol stack, where the network card's MTU is 1500. On the receiver side, I'm using DPDK to capture these packets. In Thread 1, I'm receiving the packets using rte_eth_rx_burst, then processing them through the IP fragment reassembly process. Upon receiving a complete packet, I enqueue the mbuf into an 'rte_ring' named 'test'. In Thread 2, I dequeue the mbufs from the 'test' rte_ring. I then iterate through these mbufs to copy data from each segment mbuf. However, I encounter an issue with some mbufs having a 'next' pointer set as NULL." -- You are receiving this mail because: You are the assignee for the bug.
Re: [PATCH v3 01/10] test/bbdev: fix python script subprocess
On 11/4/23 00:34, Nicolas Chautru wrote: From: Hernan Vargas test-bbdev.py relying on non-recommended subprocess Popen. This can lead to instabilities where the process cannot be stopped with a sig TERM. Use subprocess run with proper timeout argument. Fixes: f714a18885a6 ("app/testbbdev: add test application for bbdev") Cc: sta...@dpdk.org Signed-off-by: Hernan Vargas --- app/test-bbdev/test-bbdev.py | 29 + 1 file changed, 13 insertions(+), 16 deletions(-) Reviewed-by: Maxime Coquelin Thanks, Maxime
Re: [PATCH v3 02/10] test/bbdev: rename macros from acc200 to vrb
On 11/4/23 00:34, Nicolas Chautru wrote: Renaming ACC200 macros to use generic intel vRAN Boost (VRB). No functional impact. Fixes: 69a9d9e139d2 ("baseband/acc: rename files from acc200 to vrb") Cc: sta...@dpdk.org Signed-off-by: Hernan Vargas Signed-off-by: Nicolas Chautru --- app/test-bbdev/test_bbdev_perf.c | 91 1 file changed, 45 insertions(+), 46 deletions(-) Reviewed-by: Maxime Coquelin Thanks, Maxime
Re: [PATCH v3 03/10] test/bbdev: handle exception for LLR generation
On 11/4/23 00:34, Nicolas Chautru wrote: From: Hernan Vargas Add range limit to prevent LLR generation greater than the data buffer size. Fixes: 7831a9684356 ("test/bbdev: support BLER for 4G") Cc: sta...@dpdk.org Signed-off-by: Hernan Vargas --- app/test-bbdev/test_bbdev_perf.c | 6 ++ 1 file changed, 6 insertions(+) diff --git a/app/test-bbdev/test_bbdev_perf.c b/app/test-bbdev/test_bbdev_perf.c index 09e4549b10..468cfeccd2 100644 --- a/app/test-bbdev/test_bbdev_perf.c +++ b/app/test-bbdev/test_bbdev_perf.c @@ -1838,6 +1838,12 @@ generate_turbo_llr_input(uint16_t n, struct rte_bbdev_op_data *inputs, range = ref_op->turbo_dec.input.length; N0 = 1.0 / pow(10.0, get_snr() / 10.0); + if (range > inputs[0].data->data_len) { + printf("Warning: Limiting LLR generation to first segment (%d from %d)\n", + inputs[0].data->data_len, range); + range = inputs[0].data->data_len; + } + for (i = 0; i < n; ++i) { m = inputs[i].data; int8_t *llrs = rte_pktmbuf_mtod_offset(m, int8_t *, 0); Reviewed-by: Maxime Coquelin Thanks, Maxime
[PATCH] net/ena: fix coverity issues
From: Shai Brandes Changed the rte_memcpy call to use the precomputed buf_size. Rearranged the ena adapter structure and removed redundant '&' operators as a precaution. Coverity issue: 405363 Coverity issue: 405357 Coverity issue: 405359 Fixes: 92401abfbcb9 ("net/ena: support connection tracking stats") Signed-off-by: Shai Brandes --- drivers/net/ena/ena_ethdev.c | 21 ++--- drivers/net/ena/ena_ethdev.h | 4 ++-- 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/drivers/net/ena/ena_ethdev.c b/drivers/net/ena/ena_ethdev.c index dc846d2e84..53e7251874 100644 --- a/drivers/net/ena/ena_ethdev.c +++ b/drivers/net/ena/ena_ethdev.c @@ -531,8 +531,8 @@ ENA_PROXY_DESC(ena_com_get_eni_stats, ENA_MP_ENI_STATS_GET, ({ ENA_TOUCH(rsp); ENA_TOUCH(ena_dev); - if (stats != (struct ena_admin_eni_stats *)&adapter->metrics_stats) - rte_memcpy(stats, &adapter->metrics_stats, sizeof(*stats)); + if (stats != (struct ena_admin_eni_stats *)adapter->metrics_stats) + rte_memcpy(stats, adapter->metrics_stats, sizeof(*stats)); }), struct ena_com_dev *ena_dev, struct ena_admin_eni_stats *stats); @@ -590,9 +590,8 @@ ENA_PROXY_DESC(ena_com_get_customer_metrics, ENA_MP_CUSTOMER_METRICS_GET, ({ ENA_TOUCH(rsp); ENA_TOUCH(ena_dev); - ENA_TOUCH(buf_size); - if (buf != (char *)&adapter->metrics_stats) - rte_memcpy(buf, &adapter->metrics_stats, adapter->metrics_num * sizeof(uint64_t)); + if (buf != (char *)adapter->metrics_stats) + rte_memcpy(buf, adapter->metrics_stats, buf_size); }), struct ena_com_dev *ena_dev, char *buf, size_t buf_size); @@ -3240,7 +3239,7 @@ static uint16_t eth_ena_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, } static void ena_copy_customer_metrics(struct ena_adapter *adapter, uint64_t *buf, -size_t num_metrics) +size_t num_metrics) { struct ena_com_dev *ena_dev = &adapter->ena_dev; int rc; @@ -3252,10 +3251,10 @@ static void ena_copy_customer_metrics(struct ena_adapter *adapter, uint64_t *buf } rte_spinlock_lock(&adapter->admin_lock); rc = ENA_PROXY(adapter, - ena_com_get_customer_metrics, - &adapter->ena_dev, - (char *)buf, - num_metrics * sizeof(uint64_t)); + ena_com_get_customer_metrics, + &adapter->ena_dev, + (char *)buf, + num_metrics * sizeof(uint64_t)); rte_spinlock_unlock(&adapter->admin_lock); if (rc != 0) { PMD_DRV_LOG(WARNING, "Failed to get customer metrics, rc: %d\n", rc); @@ -4088,7 +4087,7 @@ ena_mp_primary_handle(const struct rte_mp_msg *mp_msg, const void *peer) case ENA_MP_CUSTOMER_METRICS_GET: res = ena_com_get_customer_metrics(ena_dev, (char *)adapter->metrics_stats, - sizeof(uint64_t) * adapter->metrics_num); + adapter->metrics_num * sizeof(uint64_t)); break; case ENA_MP_SRD_STATS_GET: res = ena_com_get_ena_srd_info(ena_dev, diff --git a/drivers/net/ena/ena_ethdev.h b/drivers/net/ena/ena_ethdev.h index 4988fbffb5..17d292101c 100644 --- a/drivers/net/ena/ena_ethdev.h +++ b/drivers/net/ena/ena_ethdev.h @@ -344,8 +344,8 @@ struct ena_adapter { * Helper variables for holding the information about the supported * metrics. */ - uint64_t metrics_stats[ENA_MAX_CUSTOMER_METRICS] __rte_cache_aligned; - uint16_t metrics_num; + uint16_t metrics_num __rte_cache_aligned; + uint64_t metrics_stats[ENA_MAX_CUSTOMER_METRICS]; struct ena_stats_srd srd_stats __rte_cache_aligned; }; -- 2.17.1
Re: [PATCH 0/2] app/test: fix unit test fail on RSS
On 11/9/2023 10:05 AM, Jie Hai wrote: > This patch fixes some bugs on test_link_bonding_rssconf. > > Jie Hai (2): > net/null: fix unit test fail on RSS > app/test: fix uninitialized RSS configuration > For series, Acked-by: Ferruh Yigit Commit logs updated while merging. Series applied to dpdk-next-net/main, thanks.
Re: [PATCH] net/ena: fix coverity issues
On 11/9/2023 2:08 PM, shaib...@amazon.com wrote: > From: Shai Brandes > > Changed the rte_memcpy call to use the precomputed buf_size. > Rearranged the ena adapter structure and removed redundant > '&' operators as a precaution. > What is the reason of the structure rearrange? > Coverity issue: 405363 > Coverity issue: 405357 > Coverity issue: 405359 > Can you please split the patch per each fix if they are not logically related or caused from same code. > Fixes: 92401abfbcb9 ("net/ena: support connection tracking stats") > Signed-off-by: Shai Brandes > --- > drivers/net/ena/ena_ethdev.c | 21 ++--- > drivers/net/ena/ena_ethdev.h | 4 ++-- > 2 files changed, 12 insertions(+), 13 deletions(-) > > diff --git a/drivers/net/ena/ena_ethdev.c b/drivers/net/ena/ena_ethdev.c > index dc846d2e84..53e7251874 100644 > --- a/drivers/net/ena/ena_ethdev.c > +++ b/drivers/net/ena/ena_ethdev.c > @@ -531,8 +531,8 @@ ENA_PROXY_DESC(ena_com_get_eni_stats, > ENA_MP_ENI_STATS_GET, > ({ > ENA_TOUCH(rsp); > ENA_TOUCH(ena_dev); > - if (stats != (struct ena_admin_eni_stats *)&adapter->metrics_stats) > - rte_memcpy(stats, &adapter->metrics_stats, sizeof(*stats)); > + if (stats != (struct ena_admin_eni_stats *)adapter->metrics_stats) > + rte_memcpy(stats, adapter->metrics_stats, sizeof(*stats)); > }), > struct ena_com_dev *ena_dev, struct ena_admin_eni_stats *stats); > > @@ -590,9 +590,8 @@ ENA_PROXY_DESC(ena_com_get_customer_metrics, > ENA_MP_CUSTOMER_METRICS_GET, > ({ > ENA_TOUCH(rsp); > ENA_TOUCH(ena_dev); > - ENA_TOUCH(buf_size); > - if (buf != (char *)&adapter->metrics_stats) > - rte_memcpy(buf, &adapter->metrics_stats, adapter->metrics_num * > sizeof(uint64_t)); > + if (buf != (char *)adapter->metrics_stats) > + rte_memcpy(buf, adapter->metrics_stats, buf_size); > }), > struct ena_com_dev *ena_dev, char *buf, size_t buf_size); > > @@ -3240,7 +3239,7 @@ static uint16_t eth_ena_xmit_pkts(void *tx_queue, > struct rte_mbuf **tx_pkts, > } > > static void ena_copy_customer_metrics(struct ena_adapter *adapter, uint64_t > *buf, > - size_t num_metrics) > + size_t num_metrics) > Please drop unrelated changed from the set. > { > struct ena_com_dev *ena_dev = &adapter->ena_dev; > int rc; > @@ -3252,10 +3251,10 @@ static void ena_copy_customer_metrics(struct > ena_adapter *adapter, uint64_t *buf > } > rte_spinlock_lock(&adapter->admin_lock); > rc = ENA_PROXY(adapter, > - ena_com_get_customer_metrics, > - &adapter->ena_dev, > - (char *)buf, > - num_metrics * sizeof(uint64_t)); > +ena_com_get_customer_metrics, > +&adapter->ena_dev, > +(char *)buf, > +num_metrics * sizeof(uint64_t)); > ditto > rte_spinlock_unlock(&adapter->admin_lock); > if (rc != 0) { > PMD_DRV_LOG(WARNING, "Failed to get customer metrics, > rc: %d\n", rc); > @@ -4088,7 +4087,7 @@ ena_mp_primary_handle(const struct rte_mp_msg *mp_msg, > const void *peer) > case ENA_MP_CUSTOMER_METRICS_GET: > res = ena_com_get_customer_metrics(ena_dev, > (char *)adapter->metrics_stats, > - sizeof(uint64_t) * adapter->metrics_num); > + adapter->metrics_num * sizeof(uint64_t)); > Does above change makes any difference? What is the motivation? > break; > case ENA_MP_SRD_STATS_GET: > res = ena_com_get_ena_srd_info(ena_dev, > diff --git a/drivers/net/ena/ena_ethdev.h b/drivers/net/ena/ena_ethdev.h > index 4988fbffb5..17d292101c 100644 > --- a/drivers/net/ena/ena_ethdev.h > +++ b/drivers/net/ena/ena_ethdev.h > @@ -344,8 +344,8 @@ struct ena_adapter { >* Helper variables for holding the information about the supported >* metrics. >*/ > - uint64_t metrics_stats[ENA_MAX_CUSTOMER_METRICS] __rte_cache_aligned; > - uint16_t metrics_num; > + uint16_t metrics_num __rte_cache_aligned; > + uint64_t metrics_stats[ENA_MAX_CUSTOMER_METRICS]; > struct ena_stats_srd srd_stats __rte_cache_aligned; > }; >
RE: [PATCH] net/ena: fix coverity issues
See inside > -Original Message- > From: Ferruh Yigit > Sent: Thursday, November 9, 2023 4:30 PM > To: Brandes, Shai > Cc: dev@dpdk.org; Beider, Ron ; Atrash, Wajeeh > ; Bernstein, Amit > Subject: RE: [EXTERNAL] [PATCH] net/ena: fix coverity issues > > CAUTION: This email originated from outside of the organization. Do not click > links or open attachments unless you can confirm the sender and know the > content is safe. > > > > On 11/9/2023 2:08 PM, shaib...@amazon.com wrote: > > From: Shai Brandes > > > > Changed the rte_memcpy call to use the precomputed buf_size. > > Rearranged the ena adapter structure and removed redundant '&' > > operators as a precaution. > > > > What is the reason of the structure rearrange? [Brandes, Shai] Sorry, I should have included a cover letter to better explain the problem. We debugged the Coverity issues and did not find any real issue, all addresses, lenghts and accesses were as expected (false-positive). However, as precaution we decided to change few locations in the code that might have confused the Coverity but can easily worked around. 1. In the case of the structure, we wanted to make sure that "metrics_stats" array and the "metrics_num" fields reside in the same cache line, We originally set "uint64_t metrics_stats[ENA_MAX_CUSTOMER_METRICS] __rte_cache_aligned; " but setting this alignment on array (as opposed to structure for example) might have confused Coverity (I know it shouldn't be the case, but still...). Switching the fields and setting the alignment on the "metrics_num" seems straight forward change and we verified with Pahole that both still reside in the same cache line (56 bytes total) and that the overall padding for both cases is identical (14B padding, just with different partition). 2. The other problematic change was to remove some redundant "&" when providing memcpy with the address of the destination array ("&array" instead of just "array"). The "&" is not needed but should be harmless C-wise (double checked it by printing this array address both ways, with and without the "&"). Again, we suspect it might have confuses Coverity, so decided to go on the safe side. Please acknowledge if these changes makes sense, and I will upload an updated patch. > > > > Coverity issue: 405363 > > Coverity issue: 405357 > > Coverity issue: 405359 > > > > Can you please split the patch per each fix if they are not logically related > or > caused from same code. [Brandes, Shai] all cases are related to the same exact rte_memcpy line that allegedly access the source buffer at 160B offset although its length is 48B. > > > Fixes: 92401abfbcb9 ("net/ena: support connection tracking stats") > > Signed-off-by: Shai Brandes > > --- > > drivers/net/ena/ena_ethdev.c | 21 ++--- > > drivers/net/ena/ena_ethdev.h | 4 ++-- > > 2 files changed, 12 insertions(+), 13 deletions(-) > > > > diff --git a/drivers/net/ena/ena_ethdev.c > > b/drivers/net/ena/ena_ethdev.c index dc846d2e84..53e7251874 100644 > > --- a/drivers/net/ena/ena_ethdev.c > > +++ b/drivers/net/ena/ena_ethdev.c > > @@ -531,8 +531,8 @@ ENA_PROXY_DESC(ena_com_get_eni_stats, > > ENA_MP_ENI_STATS_GET, ({ > > ENA_TOUCH(rsp); > > ENA_TOUCH(ena_dev); > > - if (stats != (struct ena_admin_eni_stats *)&adapter->metrics_stats) > > - rte_memcpy(stats, &adapter->metrics_stats, sizeof(*stats)); > > + if (stats != (struct ena_admin_eni_stats *)adapter->metrics_stats) > > + rte_memcpy(stats, adapter->metrics_stats, > > + sizeof(*stats)); > > }), > > struct ena_com_dev *ena_dev, struct ena_admin_eni_stats *stats); > > > > @@ -590,9 +590,8 @@ > ENA_PROXY_DESC(ena_com_get_customer_metrics, > > ENA_MP_CUSTOMER_METRICS_GET, ({ > > ENA_TOUCH(rsp); > > ENA_TOUCH(ena_dev); > > - ENA_TOUCH(buf_size); > > - if (buf != (char *)&adapter->metrics_stats) > > - rte_memcpy(buf, &adapter->metrics_stats, adapter->metrics_num > * sizeof(uint64_t)); > > + if (buf != (char *)adapter->metrics_stats) > > + rte_memcpy(buf, adapter->metrics_stats, buf_size); > > }), > > struct ena_com_dev *ena_dev, char *buf, size_t buf_size); > > > > @@ -3240,7 +3239,7 @@ static uint16_t eth_ena_xmit_pkts(void > > *tx_queue, struct rte_mbuf **tx_pkts, } > > > > static void ena_copy_customer_metrics(struct ena_adapter *adapter, > uint64_t *buf, > > - size_t num_metrics) > > + size_t num_metrics) > > > > Please drop unrelated changed from the set. [Brandes, Shai] ack > > > { > > struct ena_com_dev *ena_dev = &adapter->ena_dev; > > int rc; > > @@ -3252,10 +3251,10 @@ static void ena_copy_customer_metrics(struct > ena_adapter *adapter, uint64_t *buf > > } > > rte_spinlock_lock(&adapter->admin_lock); > > rc = ENA_PROXY(adapter, > > -
RE: [PATCH v3 10/10] test/bbdev: update python script parameters
OK with me Maxime, thanks. > -Original Message- > From: Maxime Coquelin > Sent: Thursday, November 9, 2023 2:16 AM > To: Chautru, Nicolas ; dev@dpdk.org > Cc: hemant.agra...@nxp.com; Marchand, David > ; Vargas, Hernan > ; sta...@dpdk.org > Subject: Re: [PATCH v3 10/10] test/bbdev: update python script parameters > > > > On 11/4/23 00:34, Nicolas Chautru wrote: > > +# This will become -t option for iter_max in next release > > +parser.add_argument("-x", "--iter-max", > > +type=int, > > +help="Max iterations", > > +default=6) > > No need for "-x", it is possible to only have the long name: > > parser.add_argument("--iter-max", > type=int, > help="Max iterations", > default=6) > > If that works for you, I can do the change while applying. > > Regards, > Maxime
Re: [PATCH v3 07/10] test/bbdev: add MLD support
On 11/4/23 00:34, Nicolas Chautru wrote: From: Hernan Vargas Adding test-bbdev support for the MLD-TS processing specific to the VRB2 variant. Signed-off-by: Hernan Vargas Reviewed-by: Maxime Coquelin --- app/test-bbdev/test_bbdev_perf.c | 519 + app/test-bbdev/test_bbdev_vector.c | 132 app/test-bbdev/test_bbdev_vector.h | 1 + 3 files changed, 652 insertions(+) diff --git a/app/test-bbdev/test_bbdev_perf.c b/app/test-bbdev/test_bbdev_perf.c index 5f1e5de002..d2e3542356 100644 --- a/app/test-bbdev/test_bbdev_perf.c +++ b/app/test-bbdev/test_bbdev_perf.c @@ -139,6 +139,7 @@ struct test_op_params { struct rte_bbdev_dec_op *ref_dec_op; struct rte_bbdev_enc_op *ref_enc_op; struct rte_bbdev_fft_op *ref_fft_op; + struct rte_bbdev_mldts_op *ref_mldts_op; uint16_t burst_sz; uint16_t num_to_process; uint16_t num_lcores; @@ -165,6 +166,7 @@ struct thread_params { struct rte_bbdev_dec_op *dec_ops[MAX_BURST]; struct rte_bbdev_enc_op *enc_ops[MAX_BURST]; struct rte_bbdev_fft_op *fft_ops[MAX_BURST]; + struct rte_bbdev_mldts_op *mldts_ops[MAX_BURST]; }; /* Stores time statistics */ @@ -472,6 +474,18 @@ check_dev_cap(const struct rte_bbdev_info *dev_info) return TEST_FAILED; } return TEST_SUCCESS; + } else if (op_cap->type == RTE_BBDEV_OP_MLDTS) { + const struct rte_bbdev_op_cap_mld *cap = &op_cap->cap.mld; + if (!flags_match(test_vector.mldts.op_flags, cap->capability_flags)) { + printf("Flag Mismatch\n"); + return TEST_FAILED; + } + if (nb_inputs > cap->num_buffers_src) { + printf("Too many inputs defined: %u, max: %u\n", + nb_inputs, cap->num_buffers_src); + return TEST_FAILED; + } + return TEST_SUCCESS; } } @@ -822,6 +836,9 @@ add_bbdev_dev(uint8_t dev_id, struct rte_bbdev_info *info, conf.arb_fft[i].gbr_threshold1 = VRB_QOS_GBR; conf.arb_fft[i].gbr_threshold1 = VRB_QOS_GBR; conf.arb_fft[i].round_robin_weight = VRB_QMGR_RR; + conf.arb_mld[i].gbr_threshold1 = VRB_QOS_GBR; + conf.arb_mld[i].gbr_threshold1 = VRB_QOS_GBR; + conf.arb_mld[i].round_robin_weight = VRB_QMGR_RR; } conf.input_pos_llr_1_bit = true; @@ -847,6 +864,10 @@ add_bbdev_dev(uint8_t dev_id, struct rte_bbdev_info *info, conf.q_fft.num_qgroups = VRB_QMGR_NUM_QGS; conf.q_fft.first_qgroup_index = VRB_QMGR_INVALID_IDX; conf.q_fft.num_aqs_per_groups = VRB_QMGR_NUM_AQS; + conf.q_mld.num_qgroups = VRB_QMGR_NUM_QGS; + conf.q_mld.first_qgroup_index = VRB_QMGR_INVALID_IDX; + conf.q_mld.num_aqs_per_groups = VRB_QMGR_NUM_AQS; + conf.q_mld.aq_depth_log2 = VRB_QMGR_AQ_DEPTH; /* setup PF with configuration information */ ret = rte_acc_configure(info->dev_name, &conf); @@ -1979,6 +2000,31 @@ copy_reference_fft_op(struct rte_bbdev_fft_op **ops, unsigned int n, } } +static void +copy_reference_mldts_op(struct rte_bbdev_mldts_op **ops, unsigned int n, + unsigned int start_idx, + struct rte_bbdev_op_data *q_inputs, + struct rte_bbdev_op_data *r_inputs, + struct rte_bbdev_op_data *outputs, + struct rte_bbdev_mldts_op *ref_op) +{ + unsigned int i, j; + struct rte_bbdev_op_mldts *mldts = &ref_op->mldts; + for (i = 0; i < n; i++) { + ops[i]->mldts.c_rep = mldts->c_rep; + ops[i]->mldts.num_layers = mldts->num_layers; + ops[i]->mldts.num_rbs = mldts->num_rbs; + ops[i]->mldts.op_flags = mldts->op_flags; + for (j = 0; j < RTE_BBDEV_MAX_MLD_LAYERS; j++) + ops[i]->mldts.q_m[j] = mldts->q_m[j]; + ops[i]->mldts.r_rep = mldts->r_rep; + ops[i]->mldts.c_rep = mldts->c_rep; + ops[i]->mldts.r_input = r_inputs[start_idx + i]; + ops[i]->mldts.qhy_input = q_inputs[start_idx + i]; + ops[i]->mldts.output = outputs[start_idx + i]; + } +} + static int check_dec_status_and_ordering(struct rte_bbdev_dec_op *op, unsigned int order_idx, const int expected_status) @@ -2039,6 +2085,21 @@ check_fft_status_and_ordering(struct rte_bbdev_fft_op *op, return TEST_SUCCESS; } +static int +check_mldts_status_and_ordering(struct rte_bbdev_mldts_op *op, + unsigned
Re: [PATCH v1 0/1] baseband/acc: fix for VRB1 PMD
On 11/4/23 00:45, Nicolas Chautru wrote: Fix bug for the VRB1 PMD in LDPC Encoder TB mode. Targeting the rc3 if possible. Nicolas Chautru (1): baseband/acc: fix for TB mode on VRB1 drivers/baseband/acc/rte_vrb_pmd.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) Applied to next-baseband/for-main. Thanks, Maxime
Re: [PATCH v3 0/2] FlexRAN SDK update for 23.11
On 10/28/23 00:03, Nicolas Chautru wrote: v3: rebase typo fixed. v2: rebase typo fixed. Upstreaming SDK workaround for ACC100 and updating documentation for new SDK release. Hernan Vargas (2): baseband/acc: support ACC100 deRM corner case SDK doc: update FlexRAN SDK links doc/guides/bbdevs/turbo_sw.rst| 53 +--- drivers/baseband/acc/meson.build | 21 ++ drivers/baseband/acc/rte_acc100_pmd.c | 59 +-- 3 files changed, 106 insertions(+), 27 deletions(-) Applied to next-baseband/for-main. Thanks, Maxime
Re: [PATCH v3 00/10] test-bbdev changes for 23.11
On 11/4/23 00:34, Nicolas Chautru wrote: v3: python script argument fix marked as deprecation first, with formal fix in following release. Maxime let me know if you want to something more formal in the release notes to call it out, currently embedded in script and commit message. typo correction. v2: adding fixes for some of the commits requested by Maxime. Update test-bbdev for 23.11. Hernan Vargas (8): test/bbdev: fix python script subprocess test/bbdev: handle exception for LLR generation test/bbdev: improve test log messages test/bbdev: assert failed test for queue configure test/bbdev: ldpc encoder concatenation vector test/bbdev: add MLD support test/bbdev: support new FFT capabilities test/bbdev: support 4 bit LLR compression Nicolas Chautru (2): test/bbdev: rename macros from acc200 to vrb test/bbdev: update python script parameters app/test-bbdev/main.c | 3 +- app/test-bbdev/test-bbdev.py | 55 ++- app/test-bbdev/test_bbdev.c| 3 +- app/test-bbdev/test_bbdev_perf.c | 673 ++--- app/test-bbdev/test_bbdev_vector.c | 199 - app/test-bbdev/test_bbdev_vector.h | 1 + 6 files changed, 843 insertions(+), 91 deletions(-) Applied to next-baseband/for-main with agreed changes. Thanks, Maxime
Re: [PATCH] net/ena: fix coverity issues
On 11/9/2023 3:25 PM, Brandes, Shai wrote: > See inside > >> -Original Message- >> From: Ferruh Yigit >> Sent: Thursday, November 9, 2023 4:30 PM >> To: Brandes, Shai >> Cc: dev@dpdk.org; Beider, Ron ; Atrash, Wajeeh >> ; Bernstein, Amit >> Subject: RE: [EXTERNAL] [PATCH] net/ena: fix coverity issues >> >> CAUTION: This email originated from outside of the organization. Do not click >> links or open attachments unless you can confirm the sender and know the >> content is safe. >> >> >> >> On 11/9/2023 2:08 PM, shaib...@amazon.com wrote: >>> From: Shai Brandes >>> >>> Changed the rte_memcpy call to use the precomputed buf_size. >>> Rearranged the ena adapter structure and removed redundant '&' >>> operators as a precaution. >>> >> >> What is the reason of the structure rearrange? > [Brandes, Shai] Sorry, I should have included a cover letter to better > explain the problem. > We debugged the Coverity issues and did not find any real issue, all > addresses, lenghts and accesses were as expected (false-positive). > However, as precaution we decided to change few locations in the code that > might have confused the Coverity but can easily worked around. > 1. In the case of the structure, we wanted to make sure that "metrics_stats" > array and the "metrics_num" fields reside in the same cache line, > We originally set "uint64_t metrics_stats[ENA_MAX_CUSTOMER_METRICS] > __rte_cache_aligned; " but setting this alignment on array (as opposed to > structure for example) might have confused Coverity (I know it shouldn't be > the case, but still...). > Switching the fields and setting the alignment on the "metrics_num" seems > straight forward change and we verified with Pahole that both still reside in > the same cache line (56 bytes total) and that the overall padding for both > cases is identical (14B padding, just with different partition). > 2. The other problematic change was to remove some redundant "&" when > providing memcpy with the address of the destination array ("&array" instead > of just "array"). The "&" is not needed but should be harmless C-wise (double > checked it by printing this array address both ways, with and without the > "&"). Again, we suspect it might have confuses Coverity, so decided to go on > the safe side. > Please acknowledge if these changes makes sense, and I will upload an updated > patch. > > Got it, thanks for the clarification. There are a set of 'rte_memcpy' related coverity warnings, I assume it is because Coverity is confused from the 'rte_memcpy' implementation. You may be observing same warnings, didn't check. As far as I understand this patch has some refactoring but it is not clear if these changes will help coverity issue or not. In that case commit log needs to be updated and coverity tag needs to be removed, but changes are not functional or doesn't help development, perhaps changes can wait until related code updated for some functional reason. >> >> >>> Coverity issue: 405363 >>> Coverity issue: 405357 >>> Coverity issue: 405359 >>> >> >> Can you please split the patch per each fix if they are not logically >> related or >> caused from same code. > [Brandes, Shai] all cases are related to the same exact rte_memcpy line that > allegedly access the source buffer at 160B offset although its length is 48B. > >> >>> Fixes: 92401abfbcb9 ("net/ena: support connection tracking stats") >>> Signed-off-by: Shai Brandes >>> --- >>> drivers/net/ena/ena_ethdev.c | 21 ++--- >>> drivers/net/ena/ena_ethdev.h | 4 ++-- >>> 2 files changed, 12 insertions(+), 13 deletions(-) >>> >>> diff --git a/drivers/net/ena/ena_ethdev.c >>> b/drivers/net/ena/ena_ethdev.c index dc846d2e84..53e7251874 100644 >>> --- a/drivers/net/ena/ena_ethdev.c >>> +++ b/drivers/net/ena/ena_ethdev.c >>> @@ -531,8 +531,8 @@ ENA_PROXY_DESC(ena_com_get_eni_stats, >>> ENA_MP_ENI_STATS_GET, ({ >>> ENA_TOUCH(rsp); >>> ENA_TOUCH(ena_dev); >>> - if (stats != (struct ena_admin_eni_stats *)&adapter->metrics_stats) >>> - rte_memcpy(stats, &adapter->metrics_stats, sizeof(*stats)); >>> + if (stats != (struct ena_admin_eni_stats *)adapter->metrics_stats) >>> + rte_memcpy(stats, adapter->metrics_stats, >>> + sizeof(*stats)); >>> }), >>> struct ena_com_dev *ena_dev, struct ena_admin_eni_stats *stats); >>> >>> @@ -590,9 +590,8 @@ >> ENA_PROXY_DESC(ena_com_get_customer_metrics, >>> ENA_MP_CUSTOMER_METRICS_GET, ({ >>> ENA_TOUCH(rsp); >>> ENA_TOUCH(ena_dev); >>> - ENA_TOUCH(buf_size); >>> - if (buf != (char *)&adapter->metrics_stats) >>> - rte_memcpy(buf, &adapter->metrics_stats, adapter->metrics_num >> * sizeof(uint64_t)); >>> + if (buf != (char *)adapter->metrics_stats) >>> + rte_memcpy(buf, adapter->metrics_stats, buf_size); >>> }), >>> struct ena_com_dev *ena_dev, char *buf, size_t buf_size); >>> >>> @@ -3240,7 +3239,7 @@ static uint16_t eth_ena_xmit_pkts(void >>>
RE: [PATCH v3 2/5] dumpcap: allow multiple invocations
> From: Stephen Hemminger [mailto:step...@networkplumber.org] > Sent: Thursday, 9 November 2023 16.40 > > On Thu, 9 Nov 2023 08:50:10 +0100 > Morten Brørup wrote: > > > > Signed-off-by: Stephen Hemminger > > > --- > > > > Minor detail: getpid() returns int, so prefer %d over %u. > > Let me check, per man page. getpid() returns pid_t. > The typedef chain leads to: > pid_t -> __pid_t -> __PID_T_TYPE -> __S32_TYPE -> int32 -> int Thank you for confirming. So %d is preferred over %u for getpid(). :-)
RE: [PATCH] net/ena: fix coverity issues
> -Original Message- > From: Ferruh Yigit > Sent: Thursday, November 9, 2023 5:54 PM > To: Brandes, Shai > Cc: dev@dpdk.org; Beider, Ron ; Atrash, Wajeeh > ; Bernstein, Amit > Subject: RE: [EXTERNAL] [PATCH] net/ena: fix coverity issues > > CAUTION: This email originated from outside of the organization. Do not click > links or open attachments unless you can confirm the sender and know the > content is safe. > > > > On 11/9/2023 3:25 PM, Brandes, Shai wrote: > > See inside > > > >> -Original Message- > >> From: Ferruh Yigit > >> Sent: Thursday, November 9, 2023 4:30 PM > >> To: Brandes, Shai > >> Cc: dev@dpdk.org; Beider, Ron ; Atrash, Wajeeh > >> ; Bernstein, Amit > >> Subject: RE: [EXTERNAL] [PATCH] net/ena: fix coverity issues > >> > >> CAUTION: This email originated from outside of the organization. Do > >> not click links or open attachments unless you can confirm the sender > >> and know the content is safe. > >> > >> > >> > >> On 11/9/2023 2:08 PM, shaib...@amazon.com wrote: > >>> From: Shai Brandes > >>> > >>> Changed the rte_memcpy call to use the precomputed buf_size. > >>> Rearranged the ena adapter structure and removed redundant '&' > >>> operators as a precaution. > >>> > >> > >> What is the reason of the structure rearrange? > > [Brandes, Shai] Sorry, I should have included a cover letter to better > > explain > the problem. > > We debugged the Coverity issues and did not find any real issue, all > addresses, lenghts and accesses were as expected (false-positive). > > However, as precaution we decided to change few locations in the code > that might have confused the Coverity but can easily worked around. > > 1. In the case of the structure, we wanted to make sure that > > "metrics_stats" array and the "metrics_num" fields reside in the same > cache line, We originally set "uint64_t > metrics_stats[ENA_MAX_CUSTOMER_METRICS] __rte_cache_aligned; " but > setting this alignment on array (as opposed to structure for example) might > have confused Coverity (I know it shouldn't be the case, but still...). > > Switching the fields and setting the alignment on the "metrics_num" seems > straight forward change and we verified with Pahole that both still reside in > the same cache line (56 bytes total) and that the overall padding for both > cases is identical (14B padding, just with different partition). > > 2. The other problematic change was to remove some redundant "&" when > providing memcpy with the address of the destination array ("&array" > instead of just "array"). The "&" is not needed but should be harmless C-wise > (double checked it by printing this array address both ways, with and without > the "&"). Again, we suspect it might have confuses Coverity, so decided to go > on the safe side. > > Please acknowledge if these changes makes sense, and I will upload an > updated patch. > > > > > > Got it, thanks for the clarification. > > There are a set of 'rte_memcpy' related coverity warnings, I assume it is > because Coverity is confused from the 'rte_memcpy' implementation. > You may be observing same warnings, didn't check. > > As far as I understand this patch has some refactoring but it is not clear if > these changes will help coverity issue or not. > > In that case commit log needs to be updated and coverity tag needs to be > removed, but changes are not functional or doesn't help development, > perhaps changes can wait until related code updated for some functional > reason. [Brandes, Shai] OK, I will update the Coverity issues on the webpage and remove this patch for now. I see in Coverity additional issues where it warns on rte_memcpy accessing offset 160 which overruns the buffer (same offset 160 appears also in my Coverity issues). Thanks. > > >> > >> > >>> Coverity issue: 405363 > >>> Coverity issue: 405357 > >>> Coverity issue: 405359 > >>> > >> > >> Can you please split the patch per each fix if they are not logically > >> related or caused from same code. > > [Brandes, Shai] all cases are related to the same exact rte_memcpy line > that allegedly access the source buffer at 160B offset although its length is > 48B. > > > >> > >>> Fixes: 92401abfbcb9 ("net/ena: support connection tracking stats") > >>> Signed-off-by: Shai Brandes > >>> --- > >>> drivers/net/ena/ena_ethdev.c | 21 ++--- > >>> drivers/net/ena/ena_ethdev.h | 4 ++-- > >>> 2 files changed, 12 insertions(+), 13 deletions(-) > >>> > >>> diff --git a/drivers/net/ena/ena_ethdev.c > >>> b/drivers/net/ena/ena_ethdev.c index dc846d2e84..53e7251874 100644 > >>> --- a/drivers/net/ena/ena_ethdev.c > >>> +++ b/drivers/net/ena/ena_ethdev.c > >>> @@ -531,8 +531,8 @@ ENA_PROXY_DESC(ena_com_get_eni_stats, > >>> ENA_MP_ENI_STATS_GET, ({ > >>> ENA_TOUCH(rsp); > >>> ENA_TOUCH(ena_dev); > >>> - if (stats != (struct ena_admin_eni_stats *)&adapter->metrics_stats) > >>> - rte_memcpy(stats, &adapter->metrics_stats, sizeof(*stats)); > >>> + if (st
Re: [PATCH] app/testpmd: fix indirect action list parameters parsing
On 11/8/2023 4:34 PM, Gregory Etelson wrote: > Indirect actions list arguments parser was configured to place target > number into 64bit value, while the code provided 32bits memory. > Hi Gregory, Can you please give more details why 'id' needs to be 64 bits, with callstack or usecase etc? And please describe what is the observed problem with current code? Inside 'parse_indlst_id2ptr()', 'parse_int()' can work or 32bits and 64bits variables, so that one is OK. But both 'port_action_handle_get_by_id()' & 'indirect_action_list_conf_get()' gets 'id' as parameter and they get 32bits argument, when 'id' is 64bit won't it will be cast to 32bits and loose data, should those functions needs to be updated as well. > The patch updated variable size for translation results. > > Fixes: 72a3dec7126f ("ethdev: add indirect flow list action") > Signed-off-by: Gregory Etelson > --- > app/test-pmd/cmdline_flow.c | 5 +++-- > 1 file changed, 3 insertions(+), 2 deletions(-) > > diff --git a/app/test-pmd/cmdline_flow.c b/app/test-pmd/cmdline_flow.c > index 0d521159e9..cf1ca33208 100644 > --- a/app/test-pmd/cmdline_flow.c > +++ b/app/test-pmd/cmdline_flow.c > @@ -11331,7 +11331,7 @@ parse_indlst_id2ptr(struct context *ctx, const struct > token *token, > struct rte_flow_action *action = ctx->object; > struct rte_flow_action_indirect_list *action_conf; > const struct indlst_conf *indlst_conf; > - uint32_t id; > + uint64_t id; > int ret; > > if (!action) > @@ -11350,7 +11350,8 @@ parse_indlst_id2ptr(struct context *ctx, const struct > token *token, > action_conf->handle = (typeof(action_conf->handle)) > port_action_handle_get_by_id(ctx->port, id); > if (!action_conf->handle) { > - printf("no indirect list handle for id %u\n", id); > + printf("no indirect list handle for id %"PRIu64"\n", > +id); > return -1; > } > break;
RE: [PATCH v3 4/5] pcapng: avoid using alloca()
> From: Stephen Hemminger [mailto:step...@networkplumber.org] > Sent: Thursday, 9 November 2023 16.45 > > On Thu, 9 Nov 2023 09:21:22 +0100 > Morten Brørup wrote: > > > I can't find the definition of BUFSIZ. Please make sure to add a > comment to the definition of BUFSIZ mentioning - like in your patch > description - that it will be more than sufficient for the info related > blocks in the file. > > > > More comments inline below, regarding existing bugs found while > reviewing. > > > > > > Assuming BUFSIZ has a comment describing the reason for its value, > > > > Acked-by: Morten Brørup > > The constant BUFSIZ comes from stdio.h and used lots of places in > libraries. > It is 8192 in current glibc and unlikely to be a problem. OK, didn't know that. So I looked it up, trying to learn more about it. I found two sources [1], [2] mentioning that BUFSIZ is guaranteed to be at least 256. [1]: https://www.gnu.org/software/libc/manual/html_node/Controlling-Buffering.html#BUFSIZ [2]: Page 234 in "The C Standard Library" by P.J. Plauger, ISBN: 0-13-131509-9, from 1992 If 256 suffices, then I am OK with using BUFSIZ. I hope the authors of the other libraries using BUFSIZ don't assume more than the C standard promises about it. > Chose it because this a on stack buffer used before writing to a file > which > is similar to what stdio does. > > The library does not use stdio because most of the I/O is writing > packets > which needs to be fast and overhead of extra stdio buffer is harmful. > Looking into using io_uring in a future version.
RE: [PATCH] config/x86: config support for AMD EPYC processors
> Hi Konstantin, Morten, > > > -Original Message- > > From: Konstantin Ananyev > > Sent: Tuesday, November 7, 2023 8:03 PM > > To: Morten Brørup ; Thomas Monjalon > > ; Kevin Traynor ; Tummala, > > Sivaprasad ; David Marchand > > ; Yigit, Ferruh ; > > bruce.richard...@intel.com; konstantin.v.anan...@yandex.ru; > > maxime.coque...@redhat.com; Aaron Conole > > Cc: dev@dpdk.org > > Subject: RE: [PATCH] config/x86: config support for AMD EPYC processors > > > > Caution: This message originated from an External Source. Use proper caution > > when opening attachments, clicking links, or responding. > > > > > > > > > > > > >> From: Tummala, Sivaprasad > > > > > > > > >>> From: David Marchand On Mon, > > > > > > > > >>> Sep 25, 2023 at 5:11 PM Sivaprasad Tummala > > > > > > > > From: Sivaprasad Tummala > > > > > > > > > > > > > > > > By default, max lcores are limited to 128 for x86 > > > > platforms. > > > > > > > > On AMD EPYC processors, this limit needs to be > > > > > > > > increased > > > > to > > > > > > > > leverage > > > > > > > > all the cores. > > > > > > > > > > > > > > > > The patch adjusts the limit specifically for native > > > > > > compilation on > > > > > > > > AMD EPYC CPUs. > > > > > > > > > > > > > > > > Signed-off-by: Sivaprasad Tummala > > > > > > > > > > > > >>> > > > > > > > > >>> This patch is a revamp of > > > > > > > > >>> > > > > > > > > >> > > > > > > > > > > > > > > > > > > > > http://inbox.dpdk.org/dev/BY5PR12MB3681C3FC6676BC03F0B42CCC96789@BY5 > > > > PR > > > > > > > > >>> 12MB3681.namprd12.prod.outlook.com/ > > > > > > > > >>> for which a discussion at techboard is supposed to have > > > > taken > > > > > > place. > > > > > > > > >>> But I didn't find a trace of it. > > > > > > > > >>> > > > > > > > > >>> One option that had been discussed in the previous > > > > > > > > >>> thread > > > > was > > > > > > to > > > > > > > > >>> increase the max number of cores for x86. > > > > > > > > >>> I am unclear if this option has been properly > > > > > > evaluated/debatted. > > > > > > > > > > > > > > Here are the minutes from the previous techboard discussions: > > > > > > > [1]: http://inbox.dpdk.org/dev/YZ43U36bFWHYClAi@platinum/ > > > > > > > [2]: > > > > http://inbox.dpdk.org/dev/20211202112506.68acaa1a@hermes.local/ > > > > > > > > > > > > > > AFAIK, there has been no progress with dynamic max_lcores, so > > > > > > > I > > > > guess > > > > > > the techboard's conclusion still stands: > > > > > > > > > > > > > > There is no identified use-case where a single application > > > > requires > > > > > > more than 128 lcores. If a case a use-case exists for a single > > > > > > application that uses more than 128 lcores, the TB is ok to > > > > > > update > > > > the > > > > > > default config value. > > > > > > > > > > > > > > > >>> > > > > > > > > >>> Can the topic be brought again at techboard? > > > > > > > > >> > > > > > > > > >> Hi David, > > > > > > > > >> > > > > > > > > >> The patch is intended to detect AMD platforms and enable > > > > > > > > >> all > > > > CPU > > > > > > > > cores by default > > > > > > > > >> on native builds. > > > > > > > > > > > > > > This is done on native ARM builds, so why not on native X86 > > > > builds > > > > > > too? > > > > > > > > > > > > > > > >> > > > > > > > > >> As an optimization for memory footprint, users can > > > > > > > > >> override > > > > this > > > > > > by > > > > > > > > specifying "- > > > > > > > > >> Dmax_lcores" option based on DPDK lcores required for > > > > > > > > >> their > > > > > > usecases. > > > > > > > > >> > > > > > > > > >> Sure, will request to add this topic for discussion at > > > > > > techboard. > > > > > > > > > > > > This is the summary of the techboard meeting: > > > > > > (see > > > > > > https://mails.dpdk.org/archives/dev/2023-October/279672.html) > > > > > > > > > > > > - There is some asks for more than 128 worker cores > > > > > > - Discussion about generally increasing the default max core > > > > > > count > > > > and > > > > > > trade-offs with memory consumption but this is longer term issue > > > > > > > > > > The distros are currently satisfied with the 128 cores default, so > > > > the decision here was: Leave the 128 cores default as is, for now. > > > > > > > > > > Any long term improvements regarding memory consumption of > > > > > many-core > > > > systems are not relevant for this patch. > > > > > > > > > > > - Acceptance for the direction of this patch in the short term > > > > > > > > > > With the twist that it must work for cross compile. It is the > > > > properties of the target CPU that matter, not the properties of the > > > > host > > > > > CPU. (Although the build may be "native", i.e. the target CPU is > > > > > the > > > > same as the host CPU, it is still the target CPU that matters.) > > > > > > > > > > > - Details of whether it should be for EPYC only or x86 to be > > > > figured > > > > > > out > > > >
Re: [RFC] eal: RFC to refactor rte_eal_init into sub-functions
On (11/08/23 16:40), Thomas Monjalon wrote: > Date: Wed, 08 Nov 2023 16:40:07 +0100 > From: Thomas Monjalon > To: rahul gupta , Dmitry Kozlyuk > > Cc: Stephen Hemminger , Rahul Gupta > , dev@dpdk.org, > sovar...@linux.microsoft.com, ok...@kernel.org, > sujithsan...@microsoft.com, sowmini.varad...@microsoft.com, Rahul Gupta > > Subject: Re: [RFC] eal: RFC to refactor rte_eal_init into sub-functions > > 08/11/2023 14:53, Dmitry Kozlyuk: > > 2023-11-07 23:03 (UTC+0530), rahul gupta: > > > > > From: Rahul Gupta > > > > > To: dev@dpdk.org, tho...@monjalon.net > > > > > Cc: sovar...@linux.microsoft.com, ok...@kernel.org, > > > > sujithsan...@microsoft.com, sowmini.varad...@microsoft.com, > > > > rahulrgupt...@gmail.com, Rahul Gupta , Rahul > > > > Gupta > > > > > Subject: [RFC] eal: RFC to refactor rte_eal_init into sub-functions > > > > > Date: Thu, 2 Nov 2023 11:19:24 -0700 > > > > > X-Mailer: git-send-email 1.8.3.1 > > > > > > > > > > From: Rahul Gupta > > > > > > > > > > Initialization often requires rte_eal_init + rte_pktmbuf_pool_create > > > > > which can consume a total time of 500-600 ms: > > > > > a) For many devices FLR may take a significant chunk of time > > > > >(200-250 ms in our use-case), this FLR is triggered during device > > > > >probe in rte_eal_init(). > > > > > b) rte_pktmbuf_pool_create() can consume upto 300-350 ms for > > > > > applications that require huge memory. > > > > > > > > > > This cost is incurred on each restart (which happens in our use-case > > > > > during binary updates for servicing). > > > > > This patch provides an optimization using pthreads that appplications > > > > > can use and which can save 200-230ms. > > > > > > > > > > In this patch, rte_eal_init() is refactored into two parts- > > > > > a) 1st part is dependent code ie- it’s a perquisite of the FLR and > > > > >mempool creation. So this code needs to be executed before any > > > > >pthreads. Its named as rte_eal_init_setup() > > > > > b) 2nd part of code is independent code ie- it can execute in parallel > > > > >to mempool creation in a pthread. Its named as > > > > > rte_probe_and_ioctl(). > > > > > > > > > > Existing applications require no changes unless they wish to leverage > > > > > the optimization. > > > > > > > > > > If the application wants to use pthread functionality, it should call- > > > > > a) rte_eal_init_setup() then create two or more pthreads- > > > > > b) in one pthread call- rte_probe_and_ioctl(), > > > > > c) second pthread call- rte_pktmbuf_pool_create() > > > > > d) (optional) Other pthreads for any other independent function. > > > > > > > > > > Signed-off-by: Rahul Gupta > > > > I doubt that the new API is required. > > It is already possible to block all devices from automatic probing > > with EAL options and then probe explicitly in any threads desired. > > At the same time, this RFC shows a valuable optimization pattern, > > so maybe it is worth having in DPDK as an example. > > There are DPDK use cases when probing is completely unnecessary. > > It seems here we want to do the device probing, > but start it in parallel of other tasks. > > > Exposing the initialization process stages makes it harder to refactor > > and requires precise documentation of when and what is initialized > > (for example, in this RFC rte_eal_init_setup() > > does not make service core API usable yet). > > Yes the init order is sensitive, that's why we have a big init function. > But in general I would agree to try splitting it with necessary warnings > and explanations. > > > P. S. You may be also interested in using `--huge-unlink=never` > > to speed rte_pktmbuf_pool_create() during restarts: > > > > https://doc.dpdk.org/guides/linux_gsg/linux_eal_parameters.html#id3 > > Yes good tip :) > > Thank you for the comments. I will send a patch shortly. eal_init_async(); //Internally forks a thread to do FLR. /* Application can do other stuff, including mempool_create, possibly in multiple threads. If threads are forked, then application has to do any needed thread-joins */ eal_init_async_done(); //To sync with FLR thread.
Re: [RFC] eal: RFC to refactor rte_eal_init into sub-functions
On Thu, Nov 09, 2023 at 09:26:27AM -0800, Rahul Gupta wrote: > On (11/08/23 16:40), Thomas Monjalon wrote: > > Date: Wed, 08 Nov 2023 16:40:07 +0100 > > From: Thomas Monjalon > > To: rahul gupta , Dmitry Kozlyuk > > > > Cc: Stephen Hemminger , Rahul Gupta > > , dev@dpdk.org, > > sovar...@linux.microsoft.com, ok...@kernel.org, > > sujithsan...@microsoft.com, sowmini.varad...@microsoft.com, Rahul Gupta > > > > Subject: Re: [RFC] eal: RFC to refactor rte_eal_init into sub-functions > > > > 08/11/2023 14:53, Dmitry Kozlyuk: > > > 2023-11-07 23:03 (UTC+0530), rahul gupta: > > > > > > From: Rahul Gupta > > > > > > To: dev@dpdk.org, tho...@monjalon.net > > > > > > Cc: sovar...@linux.microsoft.com, ok...@kernel.org, > > > > > sujithsan...@microsoft.com, sowmini.varad...@microsoft.com, > > > > > rahulrgupt...@gmail.com, Rahul Gupta , > > > > > Rahul > > > > > Gupta > > > > > > Subject: [RFC] eal: RFC to refactor rte_eal_init into sub-functions > > > > > > Date: Thu, 2 Nov 2023 11:19:24 -0700 > > > > > > X-Mailer: git-send-email 1.8.3.1 > > > > > > > > > > > > From: Rahul Gupta > > > > > > > > > > > > Initialization often requires rte_eal_init + rte_pktmbuf_pool_create > > > > > > which can consume a total time of 500-600 ms: > > > > > > a) For many devices FLR may take a significant chunk of time > > > > > >(200-250 ms in our use-case), this FLR is triggered during device > > > > > >probe in rte_eal_init(). > > > > > > b) rte_pktmbuf_pool_create() can consume upto 300-350 ms for > > > > > > applications that require huge memory. > > > > > > > > > > > > This cost is incurred on each restart (which happens in our use-case > > > > > > during binary updates for servicing). > > > > > > This patch provides an optimization using pthreads that > > > > > > appplications > > > > > > can use and which can save 200-230ms. > > > > > > > > > > > > In this patch, rte_eal_init() is refactored into two parts- > > > > > > a) 1st part is dependent code ie- it’s a perquisite of the FLR and > > > > > >mempool creation. So this code needs to be executed before any > > > > > >pthreads. Its named as rte_eal_init_setup() > > > > > > b) 2nd part of code is independent code ie- it can execute in > > > > > > parallel > > > > > >to mempool creation in a pthread. Its named as > > > > > > rte_probe_and_ioctl(). > > > > > > > > > > > > Existing applications require no changes unless they wish to > > > > > > leverage > > > > > > the optimization. > > > > > > > > > > > > If the application wants to use pthread functionality, it should > > > > > > call- > > > > > > a) rte_eal_init_setup() then create two or more pthreads- > > > > > > b) in one pthread call- rte_probe_and_ioctl(), > > > > > > c) second pthread call- rte_pktmbuf_pool_create() > > > > > > d) (optional) Other pthreads for any other independent function. > > > > > > > > > > > > Signed-off-by: Rahul Gupta > > > > > > I doubt that the new API is required. > > > It is already possible to block all devices from automatic probing > > > with EAL options and then probe explicitly in any threads desired. > > > At the same time, this RFC shows a valuable optimization pattern, > > > so maybe it is worth having in DPDK as an example. > > > There are DPDK use cases when probing is completely unnecessary. > > > > It seems here we want to do the device probing, > > but start it in parallel of other tasks. > > > > > Exposing the initialization process stages makes it harder to refactor > > > and requires precise documentation of when and what is initialized > > > (for example, in this RFC rte_eal_init_setup() > > > does not make service core API usable yet). > > > > Yes the init order is sensitive, that's why we have a big init function. > > But in general I would agree to try splitting it with necessary warnings > > and explanations. > > > > > P. S. You may be also interested in using `--huge-unlink=never` > > > to speed rte_pktmbuf_pool_create() during restarts: > > > > > > https://doc.dpdk.org/guides/linux_gsg/linux_eal_parameters.html#id3 > > > > Yes good tip :) > > > > > Thank you for the comments. I will send a patch shortly. > eal_init_async(); //Internally forks a thread to do FLR. > /* Application can do other stuff, including mempool_create, possibly in >multiple threads. If threads are forked, then application has to do any >needed thread-joins */ > eal_init_async_done(); //To sync with FLR thread. Just to note, the documentation on rte_eal_init_async() needs to call out very explicitly what DPDK APIs, if any, can be called before the call to async_done(). /Bruce
[PATCH v4 0/5] dumpcap and pcapng fixes
This series has bugfixes and tests for dumpcap and pcapng. It should be in 23.11! It fixes issues related to timestamping. The design choices are to maximize performance in the primary process; and do all the time adjustment in the secondary (dumpcap) since the dumpcap needs to system calls anyway to write the result. This patches set changes where the adjustment is calculated into the pcapng portion that opens the output file. All details of the format of timestamp are contained inside pcapng (data hiding). v4 - incorporate review feedback v3 - don't use alloca() since can have VLA type issues Stephen Hemminger (5): pdump: fix setting rte_errno on mp error dumpcap: allow multiple invocations pcapng: modify timestamp calculation pcapng: avoid using alloca() test: cleanups to pcapng test app/dumpcap/main.c | 53 ++--- app/test/meson.build| 2 +- app/test/test_pcapng.c | 418 +++- lib/graph/graph_pcap.c | 2 +- lib/pcapng/rte_pcapng.c | 156 ++- lib/pcapng/rte_pcapng.h | 19 +- lib/pdump/rte_pdump.c | 9 +- 7 files changed, 374 insertions(+), 285 deletions(-) -- 2.39.2
[PATCH v4 1/5] pdump: fix setting rte_errno on mp error
The response from MP server sets err_value to negative on error. The convention for rte_errno is to use a positive value on error. This makes errors like duplicate registration show up with the correct error value. Fixes: 660098d61f57 ("pdump: use generic multi-process channel") Signed-off-by: Stephen Hemminger Acked-by: Morten Brørup --- lib/pdump/rte_pdump.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/pdump/rte_pdump.c b/lib/pdump/rte_pdump.c index 80b90c6f7d03..e94f49e21250 100644 --- a/lib/pdump/rte_pdump.c +++ b/lib/pdump/rte_pdump.c @@ -564,9 +564,10 @@ pdump_prepare_client_request(const char *device, uint16_t queue, if (rte_mp_request_sync(&mp_req, &mp_reply, &ts) == 0) { mp_rep = &mp_reply.msgs[0]; resp = (struct pdump_response *)mp_rep->param; - rte_errno = resp->err_value; - if (!resp->err_value) + if (resp->err_value == 0) ret = 0; + else + rte_errno = -resp->err_value; free(mp_reply.msgs); } -- 2.39.2
[PATCH v4 2/5] dumpcap: allow multiple invocations
If dumpcap is run twice with each instance pointing a different interface, it would fail because of overlap in ring a pool names. Fix by putting process id in the name. It is still not allowed to do multiple invocations on the same interface because only one callback is allowed and only one copy of mbuf is done. Dumpcap will fail with error in this case: pdump_prepare_client_request(): client request for pdump enable/disable failed EAL: Error - exiting with code: 1 Cause: Packet dump enable on 0:net_null0 failed File exists Fixes: cbb44143be74 ("app/dumpcap: add new packet capture application") Reported-by: Isaac Boukris Signed-off-by: Stephen Hemminger --- app/dumpcap/main.c | 28 ++-- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/app/dumpcap/main.c b/app/dumpcap/main.c index 64294bbfb3e6..74c754e272c5 100644 --- a/app/dumpcap/main.c +++ b/app/dumpcap/main.c @@ -44,7 +44,6 @@ #include #include -#define RING_NAME "capture-ring" #define MONITOR_INTERVAL (500 * 1000) #define MBUF_POOL_CACHE_SIZE 32 #define BURST_SIZE 32 @@ -647,6 +646,7 @@ static void dpdk_init(void) static struct rte_ring *create_ring(void) { struct rte_ring *ring; + char ring_name[RTE_RING_NAMESIZE]; size_t size, log2; /* Find next power of 2 >= size. */ @@ -660,28 +660,28 @@ static struct rte_ring *create_ring(void) ring_size = size; } - ring = rte_ring_lookup(RING_NAME); - if (ring == NULL) { - ring = rte_ring_create(RING_NAME, ring_size, - rte_socket_id(), 0); - if (ring == NULL) - rte_exit(EXIT_FAILURE, "Could not create ring :%s\n", -rte_strerror(rte_errno)); - } + /* Want one ring per invocation of program */ + snprintf(ring_name, sizeof(ring_name), +"dumpcap-%d", getpid()); + + ring = rte_ring_create(ring_name, ring_size, + rte_socket_id(), 0); + if (ring == NULL) + rte_exit(EXIT_FAILURE, "Could not create ring :%s\n", +rte_strerror(rte_errno)); + return ring; } static struct rte_mempool *create_mempool(void) { const struct interface *intf; - static const char pool_name[] = "capture_mbufs"; + char pool_name[RTE_MEMPOOL_NAMESIZE]; size_t num_mbufs = 2 * ring_size; struct rte_mempool *mp; uint32_t data_size = 128; - mp = rte_mempool_lookup(pool_name); - if (mp) - return mp; + snprintf(pool_name, sizeof(pool_name), "capture_%u", getpid()); /* Common pool so size mbuf for biggest snap length */ TAILQ_FOREACH(intf, &interfaces, next) { @@ -826,7 +826,7 @@ static void enable_pdump(struct rte_ring *r, struct rte_mempool *mp) rte_exit(EXIT_FAILURE, "Packet dump enable on %u:%s failed %s\n", intf->port, intf->name, - rte_strerror(-ret)); + rte_strerror(rte_errno)); } if (intf->opts.promisc_mode) { -- 2.39.2
[PATCH v4 3/5] pcapng: modify timestamp calculation
The computation of timestamp is best done in the part of pcapng library that is in secondary process. The secondary process is already doing a bunch of system calls which makes it not performance sensitive. This does change the rte_pcapng_copy() and rte_pcapng_write_stats() experimental API's. Simplify the computation of nanoseconds from TSC to a two step process which avoids numeric overflow issues. The previous code was not thread safe as well. Fixes: c882eb544842 ("pcapng: fix timestamp wrapping in output files") Signed-off-by: Stephen Hemminger Acked-by: Morten Brørup --- app/dumpcap/main.c | 25 +++-- app/test/test_pcapng.c | 4 +- lib/graph/graph_pcap.c | 2 +- lib/pcapng/rte_pcapng.c | 119 +++- lib/pcapng/rte_pcapng.h | 19 ++- lib/pdump/rte_pdump.c | 4 +- 6 files changed, 61 insertions(+), 112 deletions(-) diff --git a/app/dumpcap/main.c b/app/dumpcap/main.c index 74c754e272c5..b5770875fab4 100644 --- a/app/dumpcap/main.c +++ b/app/dumpcap/main.c @@ -66,13 +66,13 @@ static bool print_stats; /* capture limit options */ static struct { - uint64_t duration; /* nanoseconds */ + time_t duration; /* seconds */ unsigned long packets; /* number of packets in file */ size_t size;/* file size (bytes) */ } stop; /* Running state */ -static uint64_t start_time, end_time; +static time_t start_time; static uint64_t packets_received; static size_t file_size; @@ -197,7 +197,7 @@ static void auto_stop(char *opt) if (*value == '\0' || *endp != '\0' || interval <= 0) rte_exit(EXIT_FAILURE, "Invalid duration \"%s\"\n", value); - stop.duration = NSEC_PER_SEC * interval; + stop.duration = interval; } else if (strcmp(opt, "filesize") == 0) { stop.size = get_uint(value, "filesize", 0) * 1024; } else if (strcmp(opt, "packets") == 0) { @@ -511,15 +511,6 @@ static void statistics_loop(void) } } -/* Return the time since 1/1/1970 in nanoseconds */ -static uint64_t create_timestamp(void) -{ - struct timespec now; - - clock_gettime(CLOCK_MONOTONIC, &now); - return rte_timespec_to_ns(&now); -} - static void cleanup_pdump_resources(void) { @@ -589,9 +580,8 @@ report_packet_stats(dumpcap_out_t out) ifdrop = pdump_stats.nombuf + pdump_stats.ringfull; if (use_pcapng) - rte_pcapng_write_stats(out.pcapng, intf->port, NULL, - start_time, end_time, - ifrecv, ifdrop); + rte_pcapng_write_stats(out.pcapng, intf->port, + ifrecv, ifdrop, NULL); if (ifrecv == 0) percent = 0; @@ -983,7 +973,7 @@ int main(int argc, char **argv) mp = create_mempool(); out = create_output(); - start_time = create_timestamp(); + start_time = time(NULL); enable_pdump(r, mp); if (!quiet) { @@ -1005,11 +995,10 @@ int main(int argc, char **argv) break; if (stop.duration != 0 && - create_timestamp() - start_time > stop.duration) + time(NULL) - start_time > stop.duration) break; } - end_time = create_timestamp(); disable_primary_monitor(); if (rte_eal_primary_proc_alive(NULL)) diff --git a/app/test/test_pcapng.c b/app/test/test_pcapng.c index b8429a02f160..55aa2cf93666 100644 --- a/app/test/test_pcapng.c +++ b/app/test/test_pcapng.c @@ -173,8 +173,8 @@ test_write_stats(void) ssize_t len; /* write a statistics block */ - len = rte_pcapng_write_stats(pcapng, port_id, -NULL, 0, 0, + len = rte_pcapng_write_stats(pcapng, port_id, NULL, +0, 0, 0, NUM_PACKETS, 0); if (len <= 0) { fprintf(stderr, "Write of statistics failed\n"); diff --git a/lib/graph/graph_pcap.c b/lib/graph/graph_pcap.c index db722c375fa7..89525f1220ca 100644 --- a/lib/graph/graph_pcap.c +++ b/lib/graph/graph_pcap.c @@ -214,7 +214,7 @@ graph_pcap_dispatch(struct rte_graph *graph, mbuf = (struct rte_mbuf *)objs[i]; mc = rte_pcapng_copy(mbuf->port, 0, mbuf, pkt_mp, mbuf->pkt_len, -rte_get_tsc_cycles(), 0, buffer); +0, buffer); if (mc == NULL) break; diff --git a/lib/pcapng/rte_pcapng.c b/lib/pcapng/rte_pcapng.c index 3c91fc77644a..13fd2b97fb80 100644 --- a/lib/pcapng/rte_pcapng.c +++ b/lib/pcapng/rte_pcapng.c @@ -36,22 +36,14 @@ /* Format of the capture file handle */ struct rte_p
[PATCH v4 4/5] pcapng: avoid using alloca()
The function alloca() like VLA's has problems if the caller passes a large value. Instead use a fixed size buffer (2K) which will be more than sufficient for the info related blocks in the file. Add bounds checks as well. Signed-off-by: Stephen Hemminger Acked-by: Morten Brørup --- lib/pcapng/rte_pcapng.c | 37 - 1 file changed, 16 insertions(+), 21 deletions(-) diff --git a/lib/pcapng/rte_pcapng.c b/lib/pcapng/rte_pcapng.c index 13fd2b97fb80..f74ec939a9f8 100644 --- a/lib/pcapng/rte_pcapng.c +++ b/lib/pcapng/rte_pcapng.c @@ -33,6 +33,9 @@ /* conversion from DPDK speed to PCAPNG */ #define PCAPNG_MBPS_SPEED 100ull +/* upper bound for section, stats and interface blocks */ +#define PCAPNG_BLKSIZ 2048 + /* Format of the capture file handle */ struct rte_pcapng { int outfd; /* output file */ @@ -140,9 +143,8 @@ pcapng_section_block(rte_pcapng_t *self, { struct pcapng_section_header *hdr; struct pcapng_option *opt; - void *buf; + uint8_t buf[PCAPNG_BLKSIZ]; uint32_t len; - ssize_t cc; len = sizeof(*hdr); if (hw) @@ -158,8 +160,7 @@ pcapng_section_block(rte_pcapng_t *self, len += pcapng_optlen(0); len += sizeof(uint32_t); - buf = calloc(1, len); - if (!buf) + if (len > sizeof(buf)) return -1; hdr = (struct pcapng_section_header *)buf; @@ -193,10 +194,7 @@ pcapng_section_block(rte_pcapng_t *self, /* clone block_length after option */ memcpy(opt, &hdr->block_length, sizeof(uint32_t)); - cc = write(self->outfd, buf, len); - free(buf); - - return cc; + return write(self->outfd, buf, len); } /* Write an interface block for a DPDK port */ @@ -213,7 +211,7 @@ rte_pcapng_add_interface(rte_pcapng_t *self, uint16_t port, struct pcapng_option *opt; const uint8_t tsresol = 9; /* nanosecond resolution */ uint32_t len; - void *buf; + uint8_t buf[PCAPNG_BLKSIZ]; char ifname_buf[IF_NAMESIZE]; char ifhw[256]; uint64_t speed = 0; @@ -267,8 +265,7 @@ rte_pcapng_add_interface(rte_pcapng_t *self, uint16_t port, len += pcapng_optlen(0); len += sizeof(uint32_t); - buf = alloca(len); - if (!buf) + if (len > sizeof(buf)) return -1; hdr = (struct pcapng_interface_block *)buf; @@ -296,17 +293,16 @@ rte_pcapng_add_interface(rte_pcapng_t *self, uint16_t port, opt = pcapng_add_option(opt, PCAPNG_IFB_HARDWARE, ifhw, strlen(ifhw)); if (filter) { - /* Encoding is that the first octet indicates string vs BPF */ size_t len; - char *buf; len = strlen(filter) + 1; - buf = alloca(len); - *buf = '\0'; - memcpy(buf + 1, filter, len); + opt->code = PCAPNG_IFB_FILTER; + opt->length = len; + /* Encoding is that the first octet indicates string vs BPF */ + opt->data[0] = 0; + memcpy(opt->data + 1, filter, strlen(filter)); - opt = pcapng_add_option(opt, PCAPNG_IFB_FILTER, - buf, len); + opt = (struct pcapng_option *)((uint8_t *)opt + pcapng_optlen(len)); } opt = pcapng_add_option(opt, PCAPNG_OPT_END, NULL, 0); @@ -333,7 +329,7 @@ rte_pcapng_write_stats(rte_pcapng_t *self, uint16_t port_id, uint64_t start_time = self->offset_ns; uint64_t sample_time; uint32_t optlen, len; - uint8_t *buf; + uint8_t buf[PCAPNG_BLKSIZ]; RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); @@ -353,8 +349,7 @@ rte_pcapng_write_stats(rte_pcapng_t *self, uint16_t port_id, optlen += pcapng_optlen(0); len = sizeof(*hdr) + optlen + sizeof(uint32_t); - buf = alloca(len); - if (buf == NULL) + if (len > sizeof(buf)) return -1; hdr = (struct pcapng_statistics *)buf; -- 2.39.2
[PATCH v4 5/5] test: cleanups to pcapng test
Overhaul of the pcapng test: - promote it to be a fast test so it gets regularly run. - create null device and use i. - use UDP discard packets that are valid so that for debugging the resulting pcapng file can be looked at with wireshark. - do basic checks on resulting pcap file that lengths and timestamps are in range. - add test for interface options Signed-off-by: Stephen Hemminger --- app/test/meson.build | 2 +- app/test/test_pcapng.c | 418 +++-- 2 files changed, 282 insertions(+), 138 deletions(-) diff --git a/app/test/meson.build b/app/test/meson.build index 4183d66b0e9c..dcc93f4a43b4 100644 --- a/app/test/meson.build +++ b/app/test/meson.build @@ -128,7 +128,7 @@ source_file_deps = { 'test_metrics.c': ['metrics'], 'test_mp_secondary.c': ['hash', 'lpm'], 'test_net_ether.c': ['net'], -'test_pcapng.c': ['ethdev', 'net', 'pcapng'], +'test_pcapng.c': ['ethdev', 'net', 'pcapng', 'bus_vdev'], 'test_pdcp.c': ['eventdev', 'pdcp', 'net', 'timer', 'security'], 'test_pdump.c': ['pdump'] + sample_packet_forward_deps, 'test_per_lcore.c': [], diff --git a/app/test/test_pcapng.c b/app/test/test_pcapng.c index 55aa2cf93666..c973aa47d1f8 100644 --- a/app/test/test_pcapng.c +++ b/app/test/test_pcapng.c @@ -6,25 +6,34 @@ #include #include +#include #include #include +#include #include #include #include #include +#include +#include +#include +#include #include #include "test.h" -#define NUM_PACKETS10 -#define DUMMY_MBUF_NUM 3 +#define PCAPNG_TEST_DEBUG 0 + +#define TOTAL_PACKETS 4096 +#define MAX_BURST 64 +#define MAX_GAP_US 10 +#define DUMMY_MBUF_NUM 3 -static rte_pcapng_t *pcapng; static struct rte_mempool *mp; static const uint32_t pkt_len = 200; static uint16_t port_id; -static char file_name[] = "/tmp/pcapng_test_XX.pcapng"; +static const char null_dev[] = "net_null0"; /* first mbuf in the packet, should always be at offset 0 */ struct dummy_mbuf { @@ -61,6 +70,7 @@ mbuf1_prepare(struct dummy_mbuf *dm, uint32_t plen) struct { struct rte_ether_hdr eth; struct rte_ipv4_hdr ip; + struct rte_udp_hdr udp; } pkt = { .eth = { .dst_addr.addr_bytes = "\xff\xff\xff\xff\xff\xff", @@ -68,149 +78,201 @@ mbuf1_prepare(struct dummy_mbuf *dm, uint32_t plen) }, .ip = { .version_ihl = RTE_IPV4_VHL_DEF, - .total_length = rte_cpu_to_be_16(plen), - .time_to_live = IPDEFTTL, - .next_proto_id = IPPROTO_RAW, + .time_to_live = 1, + .next_proto_id = IPPROTO_UDP, .src_addr = rte_cpu_to_be_32(RTE_IPV4_LOOPBACK), .dst_addr = rte_cpu_to_be_32(RTE_IPV4_BROADCAST), - } + }, + .udp = { + .dst_port = rte_cpu_to_be_16(9), /* Discard port */ + }, }; memset(dm, 0, sizeof(*dm)); dummy_mbuf_prep(&dm->mb[0], dm->buf[0], sizeof(dm->buf[0]), plen); rte_eth_random_addr(pkt.eth.src_addr.addr_bytes); - memcpy(rte_pktmbuf_mtod(dm->mb, void *), &pkt, RTE_MIN(sizeof(pkt), plen)); + plen -= sizeof(struct rte_ether_hdr); + + pkt.ip.total_length = rte_cpu_to_be_16(plen); + pkt.ip.hdr_checksum = rte_ipv4_cksum(&pkt.ip); + + plen -= sizeof(struct rte_ipv4_hdr); + pkt.udp.src_port = rte_rand(); + pkt.udp.dgram_len = rte_cpu_to_be_16(plen); + + memcpy(rte_pktmbuf_mtod(dm->mb, void *), &pkt, sizeof(pkt)); } static int test_setup(void) { - int tmp_fd; - - port_id = rte_eth_find_next(0); - if (port_id >= RTE_MAX_ETHPORTS) { - fprintf(stderr, "No valid Ether port\n"); - return -1; - } + port_id = rte_eth_dev_count_avail(); - tmp_fd = mkstemps(file_name, strlen(".pcapng")); - if (tmp_fd == -1) { - perror("mkstemps() failure"); - return -1; - } - printf("pcapng: output file %s\n", file_name); - - /* open a test capture file */ - pcapng = rte_pcapng_fdopen(tmp_fd, NULL, NULL, "pcapng_test", NULL); - if (pcapng == NULL) { - fprintf(stderr, "rte_pcapng_fdopen failed\n"); - close(tmp_fd); - return -1; - } - - /* Add interface to the file */ - if (rte_pcapng_add_interface(pcapng, port_id, -NULL, NULL, NULL) != 0) { - fprintf(stderr, "can not add port %u\n", port_id); - return -1; + /* Make a dummy null device to snoop on */ + if (rte_vdev_init(null_dev, NULL) != 0) { + fprintf(stderr, "Failed to create vdev '%s'\n", null_dev); + goto fail; }
[PATCH] event/dlb2: fix missing queue ordering capability flag
The dlb2 driver did not advertise the fact that events could be enqueued to it for any queues, not just those in numerical sequence. Add the missing bit to the capabilities flag returned from the info_get() function. Fixes: d39e23f26e1e ("event/dlb2: fix advertized capabilities") Fixes: e7c9971a857a ("event/dlb2: add probe-time hardware init") Cc: sta...@dpdk.org Signed-off-by: Bruce Richardson --- drivers/event/dlb2/dlb2.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/event/dlb2/dlb2.c b/drivers/event/dlb2/dlb2.c index e645f7595a..050ace0904 100644 --- a/drivers/event/dlb2/dlb2.c +++ b/drivers/event/dlb2/dlb2.c @@ -72,6 +72,7 @@ static struct rte_event_dev_info evdev_dlb2_default_info = { .max_single_link_event_port_queue_pairs = DLB2_MAX_NUM_DIR_PORTS(DLB2_HW_V2), .event_dev_cap = (RTE_EVENT_DEV_CAP_EVENT_QOS | + RTE_EVENT_DEV_CAP_NONSEQ_MODE | RTE_EVENT_DEV_CAP_DISTRIBUTED_SCHED | RTE_EVENT_DEV_CAP_QUEUE_ALL_TYPES | RTE_EVENT_DEV_CAP_BURST_MODE | -- 2.39.2
[PATCH] net/mlx5: fix use after free on Rx queue start
If RX queue is not started yet, then a mlx5_rxq_obj struct used for storing HW queue objects will be allocated and added to the list held in port's private data structure. After that allocation, Rx queue HW object configuration is done. If that configuration failed, then mlx5_rxq_obj struct is freed, but not removed from the list. This causes an use after free bug, during error handling in mlx5_rxq_start(), where this deallocated struct was accessed during list cleanup. This patch fixes that by inserting mlx5_rxq_obj struct to the list only after HW queue object configuration succeeded. Fixes: 09c2555303be ("net/mlx5: support shared Rx queue") Cc: xuemi...@nvidia.com Cc: sta...@dpdk.org Signed-off-by: Dariusz Sosnowski Acked-by: Viacheslav Ovsiienko --- drivers/net/mlx5/mlx5_trigger.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c index 7bdb897612..88dc271a21 100644 --- a/drivers/net/mlx5/mlx5_trigger.c +++ b/drivers/net/mlx5/mlx5_trigger.c @@ -226,17 +226,17 @@ mlx5_rxq_start(struct rte_eth_dev *dev) if (rxq == NULL) continue; rxq_ctrl = rxq->ctrl; - if (!rxq_ctrl->started) { + if (!rxq_ctrl->started) if (mlx5_rxq_ctrl_prepare(dev, rxq_ctrl, i) < 0) goto error; - LIST_INSERT_HEAD(&priv->rxqsobj, rxq_ctrl->obj, next); - } ret = priv->obj_ops.rxq_obj_new(rxq); if (ret) { mlx5_free(rxq_ctrl->obj); rxq_ctrl->obj = NULL; goto error; } + if (!rxq_ctrl->started) + LIST_INSERT_HEAD(&priv->rxqsobj, rxq_ctrl->obj, next); rxq_ctrl->started = true; } return 0; -- 2.25.1
[PATCH] net/mlx5: fix unbind of incorrect hairpin queue
Let's take an application with the following configuration: - It uses 2 ports. - Each port has 3 Rx queues and 3 Tx queues. - On each port, Rx queues have a following purposes: - Rx queue 0 - SW queue, - Rx queue 1 - hairpin queue, bound to Tx queue on the same port, - Rx queue 2 - hairpin queue, bound to Tx queue on another port. - On each port, Tx queues have a following purposes: - Tx queue 0 - SW queue, - Tx queue 1 - hairpin queue, bound to Rx queue on the same port, - Tx queue 2 - hairpin queue, bound to Rx queue on another port. - Application configured all of the hairpin queues for manual binding. After ports are configured and queues are set up, if the application does the following API call sequence: 1. rte_eth_dev_start(port_id=0) 2. rte_eth_hairpin_bind(tx_port=0, rx_port=0) 3. rte_eth_hairpin_bind(tx_port=0, rx_port=1) mlx5 PMD fails to modify SQ and logs this error: mlx5_common: mlx5_devx_cmds.c:2079: mlx5_devx_cmd_modify_sq(): Failed to modify SQ using DevX This error was caused by an incorrect unbind operation taken during error handling inside call (3). (3) fails, because port 1 (Rx side of the hairpin) was not started. As a result of this failure, PMD goes into error handling, where all previously bound hairpin queues are unbound. This is incorrect, since this error handling procedure in rte_eth_hairpin_bind() implementation assumes that all hairpin queues are bound to the same rx_port, which is not the case. The following sequence of function calls appears: - rte_eth_hairpin_queue_peer_unbind(rx_port=**1**, rx_queue=1, 0), - mlx5_hairpin_queue_peer_unbind(dev=**port 0**, tx_queue=1, 1). Which violates the hairpin queue destroy flow, by unbinding Tx queue 1 on port 0, before unbinding Rx queue 1 on port 1. This patch fixes that behavior, by filtering Tx queues on which error handling is done to only affect: - hairpin queues (it also reduces unnecessary debug log messages), - hairpin queues connected to the rx_port which is currently processed. Fixes: 37cd4501e873 ("net/mlx5: support two ports hairpin mode") Cc: bi...@nvidia.com Cc: sta...@dpdk.org Signed-off-by: Dariusz Sosnowski Acked-by: Viacheslav Ovsiienko --- drivers/net/mlx5/mlx5_trigger.c | 5 + 1 file changed, 5 insertions(+) diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c index 88dc271a21..329fa7da3e 100644 --- a/drivers/net/mlx5/mlx5_trigger.c +++ b/drivers/net/mlx5/mlx5_trigger.c @@ -845,6 +845,11 @@ mlx5_hairpin_bind_single_port(struct rte_eth_dev *dev, uint16_t rx_port) txq_ctrl = mlx5_txq_get(dev, i); if (txq_ctrl == NULL) continue; + if (!txq_ctrl->is_hairpin || + txq_ctrl->hairpin_conf.peers[0].port != rx_port) { + mlx5_txq_release(dev, i); + continue; + } rx_queue = txq_ctrl->hairpin_conf.peers[0].queue; rte_eth_hairpin_queue_peer_unbind(rx_port, rx_queue, 0); mlx5_hairpin_queue_peer_unbind(dev, i, 1); -- 2.25.1
[PATCH] net/mlx5: fix expected hairpin queue states
This patch fixes the expected SQ and RQ states used in MODIFY_SQ and MODIFY_RQ during unbinding of the hairpin queues. When unbinding the queue objects, they are in RDY state and are transitioning to RST state, instead of going from RST to RST state. Also, this patch fixes the constants used for RQ states. Instead of MLX5_SQC_STATE_*, now MLX5_RQC_STATE_* are used. Fixes: 6a338ad4f7fe ("net/mlx5: add hairpin binding function") Fixes: 37cd4501e873 ("net/mlx5: support two ports hairpin mode") Cc: bi...@nvidia.com Cc: or...@nvidia.com Cc: sta...@dpdk.org Signed-off-by: Dariusz Sosnowski Acked-by: Viacheslav Ovsiienko --- drivers/net/mlx5/mlx5_trigger.c | 14 +++--- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c index 329fa7da3e..6d787a7c93 100644 --- a/drivers/net/mlx5/mlx5_trigger.c +++ b/drivers/net/mlx5/mlx5_trigger.c @@ -346,8 +346,8 @@ mlx5_hairpin_auto_bind(struct rte_eth_dev *dev) ret = mlx5_devx_cmd_modify_sq(sq, &sq_attr); if (ret) goto error; - rq_attr.state = MLX5_SQC_STATE_RDY; - rq_attr.rq_state = MLX5_SQC_STATE_RST; + rq_attr.state = MLX5_RQC_STATE_RDY; + rq_attr.rq_state = MLX5_RQC_STATE_RST; rq_attr.hairpin_peer_sq = sq->id; rq_attr.hairpin_peer_vhca = priv->sh->cdev->config.hca_attr.vhca_id; @@ -601,8 +601,8 @@ mlx5_hairpin_queue_peer_bind(struct rte_eth_dev *dev, uint16_t cur_queue, " mismatch", dev->data->port_id, cur_queue); return -rte_errno; } - rq_attr.state = MLX5_SQC_STATE_RDY; - rq_attr.rq_state = MLX5_SQC_STATE_RST; + rq_attr.state = MLX5_RQC_STATE_RDY; + rq_attr.rq_state = MLX5_RQC_STATE_RST; rq_attr.hairpin_peer_sq = peer_info->qp_id; rq_attr.hairpin_peer_vhca = peer_info->vhca_id; ret = mlx5_devx_cmd_modify_rq(rxq_ctrl->obj->rq, &rq_attr); @@ -666,7 +666,7 @@ mlx5_hairpin_queue_peer_unbind(struct rte_eth_dev *dev, uint16_t cur_queue, return -rte_errno; } sq_attr.state = MLX5_SQC_STATE_RST; - sq_attr.sq_state = MLX5_SQC_STATE_RST; + sq_attr.sq_state = MLX5_SQC_STATE_RDY; ret = mlx5_devx_cmd_modify_sq(txq_ctrl->obj->sq, &sq_attr); if (ret == 0) txq_ctrl->hairpin_status = 0; @@ -700,8 +700,8 @@ mlx5_hairpin_queue_peer_unbind(struct rte_eth_dev *dev, uint16_t cur_queue, dev->data->port_id, cur_queue); return -rte_errno; } - rq_attr.state = MLX5_SQC_STATE_RST; - rq_attr.rq_state = MLX5_SQC_STATE_RST; + rq_attr.state = MLX5_RQC_STATE_RST; + rq_attr.rq_state = MLX5_RQC_STATE_RDY; ret = mlx5_devx_cmd_modify_rq(rxq_ctrl->obj->rq, &rq_attr); if (ret == 0) rxq->hairpin_status = 0; -- 2.25.1
RE: [PATCH v3 2/5] dumpcap: allow multiple invocations
> From: Stephen Hemminger [mailto:step...@networkplumber.org] > Sent: Thursday, 9 November 2023 18.16 > > On Thu, 9 Nov 2023 08:50:10 +0100 > Morten Brørup wrote: > > > > rte_exit(EXIT_FAILURE, > > > "Packet dump enable on %u:%s failed %s\n", > > > intf->port, intf->name, > > > - rte_strerror(-ret)); > > > + rte_strerror(rte_errno)); > > > > This bugfix (the line above, not the patch itself) supports Tyler's > proposal to standardize on returning -1 with rte_errno set on failure, > instead of some functions returning -errno. Our dual convention for > function return values will cause many bugs like this. > > The error case here is when rte_pdump_enable_bpf() fails. > This is return from pdump_enable in pdump library. > The library does follow the rte_errno convention correctly. I'm sorry about being unclear in my comment about rte_errno conventions; it was not targeted at this library. My comment was meant as general support for Tyler's suggestion, using this as an example of a bug that would not have been there if the return convention was always -1 with rte_errno. With the dual return convention, it's amazing that you caught this bug. > But the error message wasn't reporting correctly which would lead to > confusing error in case where > multiple invocations failed. > > It is not possible to do multiple captures on same interface. And not > worth modifying the > library (would require multiple copies and ref counts) to handle this > case.
Re: [PATCH] app/testpmd: fix indirect action list parameters parsing
Hello Ferruh, Indirect actions list arguments parser was configured to place target number into 64bit value, while the code provided 32bits memory. Hi Gregory, Can you please give more details why 'id' needs to be 64 bits, with callstack or usecase etc? And please describe what is the observed problem with current code? In rte_flow.h, struct rte_flow_action_indirect_list::handle is a pointer. Testpmd ACTION_INDIRECT_LIST_HANDLE and ACTION_INDIRECT_LIST_CONF tokens define arguments size as uintptr_t. On 64 bits system, defining the id variable as 32 bits value corrupted parse_indlst_id2ptr stack. I'll change the id definition to uintptr_t to match token in v2. Regards, Gregory Inside 'parse_indlst_id2ptr()', 'parse_int()' can work or 32bits and 64bits variables, so that one is OK. But both 'port_action_handle_get_by_id()' & 'indirect_action_list_conf_get()' gets 'id' as parameter and they get 32bits argument, when 'id' is 64bit won't it will be cast to 32bits and loose data, should those functions needs to be updated as well. The patch updated variable size for translation results. Fixes: 72a3dec7126f ("ethdev: add indirect flow list action") Signed-off-by: Gregory Etelson --- app/test-pmd/cmdline_flow.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/app/test-pmd/cmdline_flow.c b/app/test-pmd/cmdline_flow.c index 0d521159e9..cf1ca33208 100644 --- a/app/test-pmd/cmdline_flow.c +++ b/app/test-pmd/cmdline_flow.c @@ -11331,7 +11331,7 @@ parse_indlst_id2ptr(struct context *ctx, const struct token *token, struct rte_flow_action *action = ctx->object; struct rte_flow_action_indirect_list *action_conf; const struct indlst_conf *indlst_conf; - uint32_t id; + uint64_t id; int ret; if (!action) @@ -11350,7 +11350,8 @@ parse_indlst_id2ptr(struct context *ctx, const struct token *token, action_conf->handle = (typeof(action_conf->handle)) port_action_handle_get_by_id(ctx->port, id); if (!action_conf->handle) { - printf("no indirect list handle for id %u\n", id); + printf("no indirect list handle for id %"PRIu64"\n", +id); return -1; } break;
RE: [PATCH v4 2/5] dumpcap: allow multiple invocations
> From: Stephen Hemminger [mailto:step...@networkplumber.org] > Sent: Thursday, 9 November 2023 18.34 > > If dumpcap is run twice with each instance pointing a different > interface, it would fail because of overlap in ring a pool names. > Fix by putting process id in the name. > > It is still not allowed to do multiple invocations on the same > interface because only one callback is allowed and only one copy > of mbuf is done. Dumpcap will fail with error in this case: > >pdump_prepare_client_request(): client request for pdump > enable/disable failed >EAL: Error - exiting with code: 1 > Cause: Packet dump enable on 0:net_null0 failed File exists > > Fixes: cbb44143be74 ("app/dumpcap: add new packet capture application") > Reported-by: Isaac Boukris > Signed-off-by: Stephen Hemminger > --- [...] > + snprintf(ring_name, sizeof(ring_name), > + "dumpcap-%d", getpid()); Fixed - thank you. [...] > + snprintf(pool_name, sizeof(pool_name), "capture_%u", getpid()); Should change from %u to %d here too. ;-) Either way, Reviewed-by: Morten Brørup
[PATCH v2] app/testpmd: fix indirect action list parameters parsing
Indirect actions list arguments parser was configured to place target number into 64bit value, while the code provided 32bits memory. The patch updated variable size for translation results. Fixes: 72a3dec7126f ("ethdev: add indirect flow list action") Signed-off-by: Gregory Etelson --- v2: define `id` as uintptr_t --- app/test-pmd/cmdline_flow.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/app/test-pmd/cmdline_flow.c b/app/test-pmd/cmdline_flow.c index 0d521159e9..397f9bc3eb 100644 --- a/app/test-pmd/cmdline_flow.c +++ b/app/test-pmd/cmdline_flow.c @@ -11331,7 +11331,7 @@ parse_indlst_id2ptr(struct context *ctx, const struct token *token, struct rte_flow_action *action = ctx->object; struct rte_flow_action_indirect_list *action_conf; const struct indlst_conf *indlst_conf; - uint32_t id; + uintptr_t id; int ret; if (!action) @@ -11350,7 +11350,8 @@ parse_indlst_id2ptr(struct context *ctx, const struct token *token, action_conf->handle = (typeof(action_conf->handle)) port_action_handle_get_by_id(ctx->port, id); if (!action_conf->handle) { - printf("no indirect list handle for id %u\n", id); + printf("no indirect list handle for id %"PRIu64"\n", + id); return -1; } break; -- 2.39.2
[RFC] event/dsw: support explicit release only mode
Add the RTE_EVENT_DEV_CAP_IMPLICIT_RELEASE_DISABLE capability to the DSW event device. This feature may be used by an EAL thread to pull more work from the work scheduler, without giving up the option to forward events originating from a previous dequeue batch. This in turn allows an EAL thread to be productive while waiting for a hardware accelerator to complete some operation. Prior to this change, DSW didn't make any distinction between RTE_EVENT_OP_FORWARD and RTE_EVENT_OP_NEW type events, other than that new events would be backpressured earlier. After this change, DSW tracks the number of released events (i.e., events of type RTE_EVENT_OP_FORWARD and RTE_EVENT_OP_RELASE) that has been enqueued. To reduce overhead, DSW does not track the *identity* of individual events. This in turn implies that a certain stage in the flow migration process, DSW must wait for all pending releases (on the migration source port, only) to be received from the application, to assure that no event pertaining to any of the to-be-migrated flows are being processed. With this change, DSW starts making a distinction between forward and new type events for credit allocation purposes. Only new events needs credits. All events marked as RTE_EVENT_OP_FORWARD must have a corresponding dequeued event from a previous dequeue batch. Flow migration for flows on RTE_SCHED_TYPE_PARALLEL queues remains unaffected by this change. A side-effect of the tweaked DSW migration logic is that the migration latency is reduced, regardless if implicit relase is enabled or not. Signed-off-by: Mattias Rönnblom --- drivers/event/dsw/dsw_evdev.c | 8 +++- drivers/event/dsw/dsw_evdev.h | 3 ++ drivers/event/dsw/dsw_event.c | 84 ++- 3 files changed, 62 insertions(+), 33 deletions(-) diff --git a/drivers/event/dsw/dsw_evdev.c b/drivers/event/dsw/dsw_evdev.c index 1209e73a9d..445f3ac357 100644 --- a/drivers/event/dsw/dsw_evdev.c +++ b/drivers/event/dsw/dsw_evdev.c @@ -23,15 +23,20 @@ dsw_port_setup(struct rte_eventdev *dev, uint8_t port_id, struct rte_event_ring *in_ring; struct rte_ring *ctl_in_ring; char ring_name[RTE_RING_NAMESIZE]; + bool implicit_release; port = &dsw->ports[port_id]; + implicit_release = + !(conf->event_port_cfg & RTE_EVENT_PORT_CFG_DISABLE_IMPL_REL); + *port = (struct dsw_port) { .id = port_id, .dsw = dsw, .dequeue_depth = conf->dequeue_depth, .enqueue_depth = conf->enqueue_depth, - .new_event_threshold = conf->new_event_threshold + .new_event_threshold = conf->new_event_threshold, + .implicit_release = implicit_release }; snprintf(ring_name, sizeof(ring_name), "dsw%d_p%u", dev->data->dev_id, @@ -221,6 +226,7 @@ dsw_info_get(struct rte_eventdev *dev __rte_unused, .max_profiles_per_port = 1, .event_dev_cap = RTE_EVENT_DEV_CAP_BURST_MODE| RTE_EVENT_DEV_CAP_DISTRIBUTED_SCHED| + RTE_EVENT_DEV_CAP_IMPLICIT_RELEASE_DISABLE| RTE_EVENT_DEV_CAP_NONSEQ_MODE| RTE_EVENT_DEV_CAP_MULTIPLE_QUEUE_PORT| RTE_EVENT_DEV_CAP_CARRY_FLOW_ID diff --git a/drivers/event/dsw/dsw_evdev.h b/drivers/event/dsw/dsw_evdev.h index 6416a8a898..a245a8940e 100644 --- a/drivers/event/dsw/dsw_evdev.h +++ b/drivers/event/dsw/dsw_evdev.h @@ -128,6 +128,7 @@ struct dsw_queue_flow { enum dsw_migration_state { DSW_MIGRATION_STATE_IDLE, DSW_MIGRATION_STATE_PAUSING, + DSW_MIGRATION_STATE_FINISH_PENDING, DSW_MIGRATION_STATE_UNPAUSING }; @@ -148,6 +149,8 @@ struct dsw_port { int32_t new_event_threshold; + bool implicit_release; + uint16_t pending_releases; uint16_t next_parallel_flow_id; diff --git a/drivers/event/dsw/dsw_event.c b/drivers/event/dsw/dsw_event.c index 93bbeead2e..c70e50dd16 100644 --- a/drivers/event/dsw/dsw_event.c +++ b/drivers/event/dsw/dsw_event.c @@ -1141,6 +1141,15 @@ dsw_port_move_emigrating_flows(struct dsw_evdev *dsw, source_port->migration_state = DSW_MIGRATION_STATE_UNPAUSING; } +static void +dsw_port_try_finish_pending(struct dsw_evdev *dsw, struct dsw_port *source_port) +{ + if (unlikely(source_port->migration_state == +DSW_MIGRATION_STATE_FINISH_PENDING && +source_port->pending_releases == 0)) + dsw_port_move_emigrating_flows(dsw, source_port); +} + static void dsw_port_handle_confirm(struct dsw_evdev *dsw, struct dsw_port *port) { @@ -1149,14 +1158,15 @@ dsw_port_handle_confirm(struct dsw_evdev *dsw, struct dsw_port *port) if (port->cfm_cnt == (dsw->num_ports-1)) { switch (port->migration_state) { case DSW_MIGRATION_STATE_PAUSING: - dsw_port_move_emigrating_flows(dsw, port); + port->mig
Re: [PATCH 0/5] net/hns3: fix and refactor mailbox code
On 11/8/2023 3:44 AM, Jie Hai wrote: > This patchset fixes failure on sync mailbox and refactors some codes on > mailbox. > > Dengdui Huang (5): > net/hns3: fix sync mailbox failure forever > net/hns3: refactor VF mailbox message struct > net/hns3: refactor PF mailbox message struct > net/hns3: refactor send mailbox function > net/hns3: refactor handle mailbox function > Hi Jie, Overall patchset looks good with minor issue below [1], but this set has high impact and not solving a critical defect etc, but mainly refactoring. We are very close to the release, there won't be enough time to fix any issue caused by this refactoring. My suggestion is to postpone the refactoring to next release, maybe get only the first fix patch in this release, what do you think? [1] Can you please fix the checkpatch warning: ### [PATCH] net/hns3: refactor handle mailbox function Warning in drivers/net/hns3/hns3_mbx.c: Using __atomic_xxx/__ATOMIC_XXX built-ins, prefer rte_atomic_xxx/rte_memory_order_xxx
Re: [PATCH 2/5] net/hns3: refactor VF mailbox message struct
On 11/8/2023 3:44 AM, Jie Hai wrote: > From: Dengdui Huang > > The data region in VF to PF mbx memssage command is > s/memssage/message/ Same for next patch > used to communicate with PF driver. And this data > region exists as an array. As a result, some complicated > feature commands, like setting promisc mode, map/unmap > ring vector and setting VLAN id, have to use magic number > to set them. This isn't good for maintenance of driver. > So this patch refactors these messages by extracting an > hns3_vf_to_pf_msg structure. > > In addition, the PF link change event message is reported > by the firmware and is reported in hns3_mbx_vf_to_pf_cmd > format, it also needs to be modified. > > Fixes: 463e748964f5 ("net/hns3: support mailbox") > Cc: sta...@dpdk.org > > Signed-off-by: Dengdui Huang > Signed-off-by: Jie Hai <...>
Re: [PATCH] app/testpmd: fix indirect action list parameters parsing
On 11/9/2023 6:22 PM, Etelson, Gregory wrote: > Hello Ferruh, > >>> Indirect actions list arguments parser was configured to place target >>> number into 64bit value, while the code provided 32bits memory. >>> >> >> Hi Gregory, >> >> Can you please give more details why 'id' needs to be 64 bits, with >> callstack or usecase etc? >> And please describe what is the observed problem with current code? >> > > In rte_flow.h, struct rte_flow_action_indirect_list::handle is a pointer. > > Testpmd ACTION_INDIRECT_LIST_HANDLE and ACTION_INDIRECT_LIST_CONF tokens > define arguments size as uintptr_t. > > On 64 bits system, defining the id variable as 32 bits value > corrupted parse_indlst_id2ptr stack. > I can't see how stack corruption can happen, can you please provide call stack and flow command? > I'll change the id definition to uintptr_t to match token in v2. > > Regards, > Gregory > >> >> Inside 'parse_indlst_id2ptr()', >> 'parse_int()' can work or 32bits and 64bits variables, so that one is OK. >> But both 'port_action_handle_get_by_id()' & >> 'indirect_action_list_conf_get()' gets 'id' as parameter and they get >> 32bits argument, when 'id' is 64bit won't it will be cast to 32bits and >> loose data, should those functions needs to be updated as well. >> Can you please reply to above question, about changing 'id' type impact to other functions using it? >> >> >>> The patch updated variable size for translation results. >>> >>> Fixes: 72a3dec7126f ("ethdev: add indirect flow list action") >>> Signed-off-by: Gregory Etelson >>> --- >>> app/test-pmd/cmdline_flow.c | 5 +++-- >>> 1 file changed, 3 insertions(+), 2 deletions(-) >>> >>> diff --git a/app/test-pmd/cmdline_flow.c b/app/test-pmd/cmdline_flow.c >>> index 0d521159e9..cf1ca33208 100644 >>> --- a/app/test-pmd/cmdline_flow.c >>> +++ b/app/test-pmd/cmdline_flow.c >>> @@ -11331,7 +11331,7 @@ parse_indlst_id2ptr(struct context *ctx, >>> const struct token *token, >>> struct rte_flow_action *action = ctx->object; >>> struct rte_flow_action_indirect_list *action_conf; >>> const struct indlst_conf *indlst_conf; >>> - uint32_t id; >>> + uint64_t id; >>> int ret; >>> >>> if (!action) >>> @@ -11350,7 +11350,8 @@ parse_indlst_id2ptr(struct context *ctx, >>> const struct token *token, >>> action_conf->handle = (typeof(action_conf->handle)) >>> port_action_handle_get_by_id(ctx->port, >>> id); >>> if (!action_conf->handle) { >>> - printf("no indirect list handle for id %u\n", id); >>> + printf("no indirect list handle for id >>> %"PRIu64"\n", >>> + id); >>> return -1; >>> } >>> break; >> >>
Re: [PATCH v2] app/testpmd: fix indirect action list parameters parsing
On Thu, 9 Nov 2023 20:36:37 +0200 Gregory Etelson wrote: > diff --git a/app/test-pmd/cmdline_flow.c b/app/test-pmd/cmdline_flow.c > index 0d521159e9..397f9bc3eb 100644 > --- a/app/test-pmd/cmdline_flow.c > +++ b/app/test-pmd/cmdline_flow.c > @@ -11331,7 +11331,7 @@ parse_indlst_id2ptr(struct context *ctx, const struct > token *token, > struct rte_flow_action *action = ctx->object; > struct rte_flow_action_indirect_list *action_conf; > const struct indlst_conf *indlst_conf; > - uint32_t id; > + uintptr_t id; > int ret; > > if (!action) > @@ -11350,7 +11350,8 @@ parse_indlst_id2ptr(struct context *ctx, const struct > token *token, > action_conf->handle = (typeof(action_conf->handle)) > port_action_handle_get_by_id(ctx->port, id); > if (!action_conf->handle) { > - printf("no indirect list handle for id %u\n", id); > + printf("no indirect list handle for id %"PRIu64"\n", > +id); On 32 bit platforms uintptr_t is 32 bits. Uintptr_t is always a typedef for unsigned long so use %lu here instead.
[PATCH v5 1/5] pdump: fix setting rte_errno on mp error
The response from MP server sets err_value to negative on error. The convention for rte_errno is to use a positive value on error. This makes errors like duplicate registration show up with the correct error value. Fixes: 660098d61f57 ("pdump: use generic multi-process channel") Signed-off-by: Stephen Hemminger Acked-by: Morten Brørup --- lib/pdump/rte_pdump.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/pdump/rte_pdump.c b/lib/pdump/rte_pdump.c index 80b90c6f7d03..e94f49e21250 100644 --- a/lib/pdump/rte_pdump.c +++ b/lib/pdump/rte_pdump.c @@ -564,9 +564,10 @@ pdump_prepare_client_request(const char *device, uint16_t queue, if (rte_mp_request_sync(&mp_req, &mp_reply, &ts) == 0) { mp_rep = &mp_reply.msgs[0]; resp = (struct pdump_response *)mp_rep->param; - rte_errno = resp->err_value; - if (!resp->err_value) + if (resp->err_value == 0) ret = 0; + else + rte_errno = -resp->err_value; free(mp_reply.msgs); } -- 2.39.2
[PATCH v5 0/5] dumpcap and pcapng fixes
This series has bugfixes and tests for dumpcap and pcapng. It should be in 23.11! It fixes issues related to timestamping. The design choices are to maximize performance in the primary process; and do all the time adjustment in the secondary (dumpcap) since the dumpcap needs to system calls anyway to write the result. This patches set changes where the adjustment is calculated into the pcapng portion that opens the output file. All details of the format of timestamp are contained inside pcapng (data hiding). v5 - fix format of getpid in capture name v4 - incorporate review feedback v3 - don't use alloca() since can have VLA type issues Stephen Hemminger (5): pdump: fix setting rte_errno on mp error dumpcap: allow multiple invocations pcapng: modify timestamp calculation pcapng: avoid using alloca() test: cleanups to pcapng test app/dumpcap/main.c | 53 ++--- app/test/meson.build| 2 +- app/test/test_pcapng.c | 418 +++- lib/graph/graph_pcap.c | 2 +- lib/pcapng/rte_pcapng.c | 156 ++- lib/pcapng/rte_pcapng.h | 19 +- lib/pdump/rte_pdump.c | 9 +- 7 files changed, 374 insertions(+), 285 deletions(-) -- 2.39.2
[PATCH v5 2/5] dumpcap: allow multiple invocations
If dumpcap is run twice with each instance pointing a different interface, it would fail because of overlap in ring a pool names. Fix by putting process id in the name. It is still not allowed to do multiple invocations on the same interface because only one callback is allowed and only one copy of mbuf is done. Dumpcap will fail with error in this case: pdump_prepare_client_request(): client request for pdump enable/disable failed EAL: Error - exiting with code: 1 Cause: Packet dump enable on 0:net_null0 failed File exists Fixes: cbb44143be74 ("app/dumpcap: add new packet capture application") Reported-by: Isaac Boukris Signed-off-by: Stephen Hemminger --- app/dumpcap/main.c | 28 ++-- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/app/dumpcap/main.c b/app/dumpcap/main.c index 64294bbfb3e6..efc60372d718 100644 --- a/app/dumpcap/main.c +++ b/app/dumpcap/main.c @@ -44,7 +44,6 @@ #include #include -#define RING_NAME "capture-ring" #define MONITOR_INTERVAL (500 * 1000) #define MBUF_POOL_CACHE_SIZE 32 #define BURST_SIZE 32 @@ -647,6 +646,7 @@ static void dpdk_init(void) static struct rte_ring *create_ring(void) { struct rte_ring *ring; + char ring_name[RTE_RING_NAMESIZE]; size_t size, log2; /* Find next power of 2 >= size. */ @@ -660,28 +660,28 @@ static struct rte_ring *create_ring(void) ring_size = size; } - ring = rte_ring_lookup(RING_NAME); - if (ring == NULL) { - ring = rte_ring_create(RING_NAME, ring_size, - rte_socket_id(), 0); - if (ring == NULL) - rte_exit(EXIT_FAILURE, "Could not create ring :%s\n", -rte_strerror(rte_errno)); - } + /* Want one ring per invocation of program */ + snprintf(ring_name, sizeof(ring_name), +"dumpcap-%d", getpid()); + + ring = rte_ring_create(ring_name, ring_size, + rte_socket_id(), 0); + if (ring == NULL) + rte_exit(EXIT_FAILURE, "Could not create ring :%s\n", +rte_strerror(rte_errno)); + return ring; } static struct rte_mempool *create_mempool(void) { const struct interface *intf; - static const char pool_name[] = "capture_mbufs"; + char pool_name[RTE_MEMPOOL_NAMESIZE]; size_t num_mbufs = 2 * ring_size; struct rte_mempool *mp; uint32_t data_size = 128; - mp = rte_mempool_lookup(pool_name); - if (mp) - return mp; + snprintf(pool_name, sizeof(pool_name), "capture_%d", getpid()); /* Common pool so size mbuf for biggest snap length */ TAILQ_FOREACH(intf, &interfaces, next) { @@ -826,7 +826,7 @@ static void enable_pdump(struct rte_ring *r, struct rte_mempool *mp) rte_exit(EXIT_FAILURE, "Packet dump enable on %u:%s failed %s\n", intf->port, intf->name, - rte_strerror(-ret)); + rte_strerror(rte_errno)); } if (intf->opts.promisc_mode) { -- 2.39.2
[PATCH v5 3/5] pcapng: modify timestamp calculation
The computation of timestamp is best done in the part of pcapng library that is in secondary process. The secondary process is already doing a bunch of system calls which makes it not performance sensitive. This does change the rte_pcapng_copy() and rte_pcapng_write_stats() experimental API's. Simplify the computation of nanoseconds from TSC to a two step process which avoids numeric overflow issues. The previous code was not thread safe as well. Fixes: c882eb544842 ("pcapng: fix timestamp wrapping in output files") Signed-off-by: Stephen Hemminger Acked-by: Morten Brørup --- app/dumpcap/main.c | 25 +++-- app/test/test_pcapng.c | 4 +- lib/graph/graph_pcap.c | 2 +- lib/pcapng/rte_pcapng.c | 119 +++- lib/pcapng/rte_pcapng.h | 19 ++- lib/pdump/rte_pdump.c | 4 +- 6 files changed, 61 insertions(+), 112 deletions(-) diff --git a/app/dumpcap/main.c b/app/dumpcap/main.c index efc60372d718..583bce80166c 100644 --- a/app/dumpcap/main.c +++ b/app/dumpcap/main.c @@ -66,13 +66,13 @@ static bool print_stats; /* capture limit options */ static struct { - uint64_t duration; /* nanoseconds */ + time_t duration; /* seconds */ unsigned long packets; /* number of packets in file */ size_t size;/* file size (bytes) */ } stop; /* Running state */ -static uint64_t start_time, end_time; +static time_t start_time; static uint64_t packets_received; static size_t file_size; @@ -197,7 +197,7 @@ static void auto_stop(char *opt) if (*value == '\0' || *endp != '\0' || interval <= 0) rte_exit(EXIT_FAILURE, "Invalid duration \"%s\"\n", value); - stop.duration = NSEC_PER_SEC * interval; + stop.duration = interval; } else if (strcmp(opt, "filesize") == 0) { stop.size = get_uint(value, "filesize", 0) * 1024; } else if (strcmp(opt, "packets") == 0) { @@ -511,15 +511,6 @@ static void statistics_loop(void) } } -/* Return the time since 1/1/1970 in nanoseconds */ -static uint64_t create_timestamp(void) -{ - struct timespec now; - - clock_gettime(CLOCK_MONOTONIC, &now); - return rte_timespec_to_ns(&now); -} - static void cleanup_pdump_resources(void) { @@ -589,9 +580,8 @@ report_packet_stats(dumpcap_out_t out) ifdrop = pdump_stats.nombuf + pdump_stats.ringfull; if (use_pcapng) - rte_pcapng_write_stats(out.pcapng, intf->port, NULL, - start_time, end_time, - ifrecv, ifdrop); + rte_pcapng_write_stats(out.pcapng, intf->port, + ifrecv, ifdrop, NULL); if (ifrecv == 0) percent = 0; @@ -983,7 +973,7 @@ int main(int argc, char **argv) mp = create_mempool(); out = create_output(); - start_time = create_timestamp(); + start_time = time(NULL); enable_pdump(r, mp); if (!quiet) { @@ -1005,11 +995,10 @@ int main(int argc, char **argv) break; if (stop.duration != 0 && - create_timestamp() - start_time > stop.duration) + time(NULL) - start_time > stop.duration) break; } - end_time = create_timestamp(); disable_primary_monitor(); if (rte_eal_primary_proc_alive(NULL)) diff --git a/app/test/test_pcapng.c b/app/test/test_pcapng.c index b8429a02f160..55aa2cf93666 100644 --- a/app/test/test_pcapng.c +++ b/app/test/test_pcapng.c @@ -173,8 +173,8 @@ test_write_stats(void) ssize_t len; /* write a statistics block */ - len = rte_pcapng_write_stats(pcapng, port_id, -NULL, 0, 0, + len = rte_pcapng_write_stats(pcapng, port_id, NULL, +0, 0, 0, NUM_PACKETS, 0); if (len <= 0) { fprintf(stderr, "Write of statistics failed\n"); diff --git a/lib/graph/graph_pcap.c b/lib/graph/graph_pcap.c index db722c375fa7..89525f1220ca 100644 --- a/lib/graph/graph_pcap.c +++ b/lib/graph/graph_pcap.c @@ -214,7 +214,7 @@ graph_pcap_dispatch(struct rte_graph *graph, mbuf = (struct rte_mbuf *)objs[i]; mc = rte_pcapng_copy(mbuf->port, 0, mbuf, pkt_mp, mbuf->pkt_len, -rte_get_tsc_cycles(), 0, buffer); +0, buffer); if (mc == NULL) break; diff --git a/lib/pcapng/rte_pcapng.c b/lib/pcapng/rte_pcapng.c index 3c91fc77644a..13fd2b97fb80 100644 --- a/lib/pcapng/rte_pcapng.c +++ b/lib/pcapng/rte_pcapng.c @@ -36,22 +36,14 @@ /* Format of the capture file handle */ struct rte_p
[PATCH v5 4/5] pcapng: avoid using alloca()
The function alloca() like VLA's has problems if the caller passes a large value. Instead use a fixed size buffer (2K) which will be more than sufficient for the info related blocks in the file. Add bounds checks as well. Signed-off-by: Stephen Hemminger Acked-by: Morten Brørup --- lib/pcapng/rte_pcapng.c | 37 - 1 file changed, 16 insertions(+), 21 deletions(-) diff --git a/lib/pcapng/rte_pcapng.c b/lib/pcapng/rte_pcapng.c index 13fd2b97fb80..f74ec939a9f8 100644 --- a/lib/pcapng/rte_pcapng.c +++ b/lib/pcapng/rte_pcapng.c @@ -33,6 +33,9 @@ /* conversion from DPDK speed to PCAPNG */ #define PCAPNG_MBPS_SPEED 100ull +/* upper bound for section, stats and interface blocks */ +#define PCAPNG_BLKSIZ 2048 + /* Format of the capture file handle */ struct rte_pcapng { int outfd; /* output file */ @@ -140,9 +143,8 @@ pcapng_section_block(rte_pcapng_t *self, { struct pcapng_section_header *hdr; struct pcapng_option *opt; - void *buf; + uint8_t buf[PCAPNG_BLKSIZ]; uint32_t len; - ssize_t cc; len = sizeof(*hdr); if (hw) @@ -158,8 +160,7 @@ pcapng_section_block(rte_pcapng_t *self, len += pcapng_optlen(0); len += sizeof(uint32_t); - buf = calloc(1, len); - if (!buf) + if (len > sizeof(buf)) return -1; hdr = (struct pcapng_section_header *)buf; @@ -193,10 +194,7 @@ pcapng_section_block(rte_pcapng_t *self, /* clone block_length after option */ memcpy(opt, &hdr->block_length, sizeof(uint32_t)); - cc = write(self->outfd, buf, len); - free(buf); - - return cc; + return write(self->outfd, buf, len); } /* Write an interface block for a DPDK port */ @@ -213,7 +211,7 @@ rte_pcapng_add_interface(rte_pcapng_t *self, uint16_t port, struct pcapng_option *opt; const uint8_t tsresol = 9; /* nanosecond resolution */ uint32_t len; - void *buf; + uint8_t buf[PCAPNG_BLKSIZ]; char ifname_buf[IF_NAMESIZE]; char ifhw[256]; uint64_t speed = 0; @@ -267,8 +265,7 @@ rte_pcapng_add_interface(rte_pcapng_t *self, uint16_t port, len += pcapng_optlen(0); len += sizeof(uint32_t); - buf = alloca(len); - if (!buf) + if (len > sizeof(buf)) return -1; hdr = (struct pcapng_interface_block *)buf; @@ -296,17 +293,16 @@ rte_pcapng_add_interface(rte_pcapng_t *self, uint16_t port, opt = pcapng_add_option(opt, PCAPNG_IFB_HARDWARE, ifhw, strlen(ifhw)); if (filter) { - /* Encoding is that the first octet indicates string vs BPF */ size_t len; - char *buf; len = strlen(filter) + 1; - buf = alloca(len); - *buf = '\0'; - memcpy(buf + 1, filter, len); + opt->code = PCAPNG_IFB_FILTER; + opt->length = len; + /* Encoding is that the first octet indicates string vs BPF */ + opt->data[0] = 0; + memcpy(opt->data + 1, filter, strlen(filter)); - opt = pcapng_add_option(opt, PCAPNG_IFB_FILTER, - buf, len); + opt = (struct pcapng_option *)((uint8_t *)opt + pcapng_optlen(len)); } opt = pcapng_add_option(opt, PCAPNG_OPT_END, NULL, 0); @@ -333,7 +329,7 @@ rte_pcapng_write_stats(rte_pcapng_t *self, uint16_t port_id, uint64_t start_time = self->offset_ns; uint64_t sample_time; uint32_t optlen, len; - uint8_t *buf; + uint8_t buf[PCAPNG_BLKSIZ]; RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); @@ -353,8 +349,7 @@ rte_pcapng_write_stats(rte_pcapng_t *self, uint16_t port_id, optlen += pcapng_optlen(0); len = sizeof(*hdr) + optlen + sizeof(uint32_t); - buf = alloca(len); - if (buf == NULL) + if (len > sizeof(buf)) return -1; hdr = (struct pcapng_statistics *)buf; -- 2.39.2
[PATCH v5 5/5] test: cleanups to pcapng test
Overhaul of the pcapng test: - promote it to be a fast test so it gets regularly run. - create null device and use i. - use UDP discard packets that are valid so that for debugging the resulting pcapng file can be looked at with wireshark. - do basic checks on resulting pcap file that lengths and timestamps are in range. - add test for interface options Signed-off-by: Stephen Hemminger --- app/test/meson.build | 2 +- app/test/test_pcapng.c | 418 +++-- 2 files changed, 282 insertions(+), 138 deletions(-) diff --git a/app/test/meson.build b/app/test/meson.build index 4183d66b0e9c..dcc93f4a43b4 100644 --- a/app/test/meson.build +++ b/app/test/meson.build @@ -128,7 +128,7 @@ source_file_deps = { 'test_metrics.c': ['metrics'], 'test_mp_secondary.c': ['hash', 'lpm'], 'test_net_ether.c': ['net'], -'test_pcapng.c': ['ethdev', 'net', 'pcapng'], +'test_pcapng.c': ['ethdev', 'net', 'pcapng', 'bus_vdev'], 'test_pdcp.c': ['eventdev', 'pdcp', 'net', 'timer', 'security'], 'test_pdump.c': ['pdump'] + sample_packet_forward_deps, 'test_per_lcore.c': [], diff --git a/app/test/test_pcapng.c b/app/test/test_pcapng.c index 55aa2cf93666..c973aa47d1f8 100644 --- a/app/test/test_pcapng.c +++ b/app/test/test_pcapng.c @@ -6,25 +6,34 @@ #include #include +#include #include #include +#include #include #include #include #include +#include +#include +#include +#include #include #include "test.h" -#define NUM_PACKETS10 -#define DUMMY_MBUF_NUM 3 +#define PCAPNG_TEST_DEBUG 0 + +#define TOTAL_PACKETS 4096 +#define MAX_BURST 64 +#define MAX_GAP_US 10 +#define DUMMY_MBUF_NUM 3 -static rte_pcapng_t *pcapng; static struct rte_mempool *mp; static const uint32_t pkt_len = 200; static uint16_t port_id; -static char file_name[] = "/tmp/pcapng_test_XX.pcapng"; +static const char null_dev[] = "net_null0"; /* first mbuf in the packet, should always be at offset 0 */ struct dummy_mbuf { @@ -61,6 +70,7 @@ mbuf1_prepare(struct dummy_mbuf *dm, uint32_t plen) struct { struct rte_ether_hdr eth; struct rte_ipv4_hdr ip; + struct rte_udp_hdr udp; } pkt = { .eth = { .dst_addr.addr_bytes = "\xff\xff\xff\xff\xff\xff", @@ -68,149 +78,201 @@ mbuf1_prepare(struct dummy_mbuf *dm, uint32_t plen) }, .ip = { .version_ihl = RTE_IPV4_VHL_DEF, - .total_length = rte_cpu_to_be_16(plen), - .time_to_live = IPDEFTTL, - .next_proto_id = IPPROTO_RAW, + .time_to_live = 1, + .next_proto_id = IPPROTO_UDP, .src_addr = rte_cpu_to_be_32(RTE_IPV4_LOOPBACK), .dst_addr = rte_cpu_to_be_32(RTE_IPV4_BROADCAST), - } + }, + .udp = { + .dst_port = rte_cpu_to_be_16(9), /* Discard port */ + }, }; memset(dm, 0, sizeof(*dm)); dummy_mbuf_prep(&dm->mb[0], dm->buf[0], sizeof(dm->buf[0]), plen); rte_eth_random_addr(pkt.eth.src_addr.addr_bytes); - memcpy(rte_pktmbuf_mtod(dm->mb, void *), &pkt, RTE_MIN(sizeof(pkt), plen)); + plen -= sizeof(struct rte_ether_hdr); + + pkt.ip.total_length = rte_cpu_to_be_16(plen); + pkt.ip.hdr_checksum = rte_ipv4_cksum(&pkt.ip); + + plen -= sizeof(struct rte_ipv4_hdr); + pkt.udp.src_port = rte_rand(); + pkt.udp.dgram_len = rte_cpu_to_be_16(plen); + + memcpy(rte_pktmbuf_mtod(dm->mb, void *), &pkt, sizeof(pkt)); } static int test_setup(void) { - int tmp_fd; - - port_id = rte_eth_find_next(0); - if (port_id >= RTE_MAX_ETHPORTS) { - fprintf(stderr, "No valid Ether port\n"); - return -1; - } + port_id = rte_eth_dev_count_avail(); - tmp_fd = mkstemps(file_name, strlen(".pcapng")); - if (tmp_fd == -1) { - perror("mkstemps() failure"); - return -1; - } - printf("pcapng: output file %s\n", file_name); - - /* open a test capture file */ - pcapng = rte_pcapng_fdopen(tmp_fd, NULL, NULL, "pcapng_test", NULL); - if (pcapng == NULL) { - fprintf(stderr, "rte_pcapng_fdopen failed\n"); - close(tmp_fd); - return -1; - } - - /* Add interface to the file */ - if (rte_pcapng_add_interface(pcapng, port_id, -NULL, NULL, NULL) != 0) { - fprintf(stderr, "can not add port %u\n", port_id); - return -1; + /* Make a dummy null device to snoop on */ + if (rte_vdev_init(null_dev, NULL) != 0) { + fprintf(stderr, "Failed to create vdev '%s'\n", null_dev); + goto fail; }
Re: [PATCH] app/testpmd: fix indirect action list parameters parsing
Hello Ferruh, Indirect actions list arguments parser was configured to place target number into 64bit value, while the code provided 32bits memory. Hi Gregory, Can you please give more details why 'id' needs to be 64 bits, with callstack or usecase etc? And please describe what is the observed problem with current code? In rte_flow.h, struct rte_flow_action_indirect_list::handle is a pointer. Testpmd ACTION_INDIRECT_LIST_HANDLE and ACTION_INDIRECT_LIST_CONF tokens define arguments size as uintptr_t. On 64 bits system, defining the id variable as 32 bits value corrupted parse_indlst_id2ptr stack. I can't see how stack corruption can happen, can you please provide call stack and flow command? To reproduce the crash buildtype must be release or debugoptimized. The crash will not reproduce with the debug builds. Testpmd commands I use: dpdk-testpmd -a ${PCI_ADDR},dv_flow_en=2,representor=vf0-1 -- -i port stop all flow configure 0 queues_number 12 queues_size 256 flow configure 1 queues_number 12 queues_size 256 flow configure 2 queues_number 12 queues_size 256 port start all start set raw_encap 0 eth dst is 00:16:3e:52:bd:37 src is 00:16:3e:6e:16:e0 type is 2048 has_vlan is 0 / ipv4 src is 110.240.52.255 dst is 189.68.183.147 proto is 17 fragment_offset is 0 packet_id is 1 tos is 102 ttl is 189 version_ihl is 69 / udp src is 56800 dst is 4789 / vxlan vni is 3 / end_set set sample_actions 0 represented_port ethdev_port_id 0 / end flow indirect_action 0 create action_id 5 transfer list actions sample ratio 1 index 0 / represented_port ethdev_port_id 2 / end flow actions_template 0 create transfer actions_template_id 6 template indirect_list handle 5 / end mask indirect_list handle 5 / end Result: *** stack smashing detected ***: terminated The corruption occurred in `parse_int()` called from `parse_indlst_id2ptr()`. Inside `parse_int()` the arg parameter referenced 8 bytes of memory while the target buffer was 4 bytes allocated on caller optimized stack: (gdb) p *arg $1 = { ... size = 8, ...} Inside 'parse_indlst_id2ptr()', 'parse_int()' can work or 32bits and 64bits variables, so that one is OK. But both 'port_action_handle_get_by_id()' & 'indirect_action_list_conf_get()' gets 'id' as parameter and they get 32bits argument, when 'id' is 64bit won't it will be cast to 32bits and loose data, should those functions needs to be updated as well. Can you please reply to above question, about changing 'id' type impact to other functions using it? I've missed that. Need to re-think. Regards, Gregory The patch updated variable size for translation results. Fixes: 72a3dec7126f ("ethdev: add indirect flow list action") Signed-off-by: Gregory Etelson --- app/test-pmd/cmdline_flow.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/app/test-pmd/cmdline_flow.c b/app/test-pmd/cmdline_flow.c index 0d521159e9..cf1ca33208 100644 --- a/app/test-pmd/cmdline_flow.c +++ b/app/test-pmd/cmdline_flow.c @@ -11331,7 +11331,7 @@ parse_indlst_id2ptr(struct context *ctx, const struct token *token, struct rte_flow_action *action = ctx->object; struct rte_flow_action_indirect_list *action_conf; const struct indlst_conf *indlst_conf; - uint32_t id; + uint64_t id; int ret; if (!action) @@ -11350,7 +11350,8 @@ parse_indlst_id2ptr(struct context *ctx, const struct token *token, action_conf->handle = (typeof(action_conf->handle)) port_action_handle_get_by_id(ctx->port, id); if (!action_conf->handle) { - printf("no indirect list handle for id %u\n", id); + printf("no indirect list handle for id %"PRIu64"\n", +id); return -1; } break;
Re: [PATCH v2] app/testpmd: fix indirect action list parameters parsing
On Thu, Nov 09, 2023 at 11:41:37AM -0800, Stephen Hemminger wrote: > On Thu, 9 Nov 2023 20:36:37 +0200 > Gregory Etelson wrote: > > > diff --git a/app/test-pmd/cmdline_flow.c b/app/test-pmd/cmdline_flow.c > > index 0d521159e9..397f9bc3eb 100644 > > --- a/app/test-pmd/cmdline_flow.c > > +++ b/app/test-pmd/cmdline_flow.c > > @@ -11331,7 +11331,7 @@ parse_indlst_id2ptr(struct context *ctx, const > > struct token *token, > > struct rte_flow_action *action = ctx->object; > > struct rte_flow_action_indirect_list *action_conf; > > const struct indlst_conf *indlst_conf; > > - uint32_t id; > > + uintptr_t id; > > int ret; > > > > if (!action) > > @@ -11350,7 +11350,8 @@ parse_indlst_id2ptr(struct context *ctx, const > > struct token *token, > > action_conf->handle = (typeof(action_conf->handle)) > > port_action_handle_get_by_id(ctx->port, id); > > if (!action_conf->handle) { > > - printf("no indirect list handle for id %u\n", id); > > + printf("no indirect list handle for id %"PRIu64"\n", > > + id); > > On 32 bit platforms uintptr_t is 32 bits. > Uintptr_t is always a typedef for unsigned long so use %lu here instead. PRIuPTR(and PRIxPTR) is the corresponding define from inttypes.h. /Bruce
RE: [PATCH v5 2/5] dumpcap: allow multiple invocations
> From: Stephen Hemminger [mailto:step...@networkplumber.org] > Sent: Thursday, 9 November 2023 20.46 > > If dumpcap is run twice with each instance pointing a different > interface, it would fail because of overlap in ring a pool names. > Fix by putting process id in the name. > > It is still not allowed to do multiple invocations on the same > interface because only one callback is allowed and only one copy > of mbuf is done. Dumpcap will fail with error in this case: > >pdump_prepare_client_request(): client request for pdump > enable/disable failed >EAL: Error - exiting with code: 1 > Cause: Packet dump enable on 0:net_null0 failed File exists > > Fixes: cbb44143be74 ("app/dumpcap: add new packet capture application") > Reported-by: Isaac Boukris > Signed-off-by: Stephen Hemminger > --- Reviewed-by: Morten Brørup
RE: [EXT] [PATCH 0/2] remove unnecessary null checks in OpenSSL usage
> OpenSSL follows null free conventions > > Stephen Hemminger (2): > nullfree: add matches for null free cases from OpenSSL > crypto/openssl: remove unnecessary NULL checks before free > > devtools/cocci/nullfree.cocci| 9 + > drivers/crypto/openssl/rte_openssl_pmd.c | 36 +++- > drivers/crypto/openssl/rte_openssl_pmd_ops.c | 3 +- > 3 files changed, 22 insertions(+), 26 deletions(-) Series Applied to dpdk-next-crypto Thanks.
[PATCH] ethdev: add extension keyword to statement expression macro
add missing __extension__ keyword to macros using gcc statement expression extension. Signed-off-by: Tyler Retzlaff --- lib/ethdev/rte_mtr.c | 10 +- lib/ethdev/rte_tm.c | 6 +++--- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/lib/ethdev/rte_mtr.c b/lib/ethdev/rte_mtr.c index 4e94af9..900837b 100644 --- a/lib/ethdev/rte_mtr.c +++ b/lib/ethdev/rte_mtr.c @@ -41,14 +41,14 @@ } #define RTE_MTR_FUNC(port_id, func)\ -({ \ +__extension__ ({ \ const struct rte_mtr_ops *ops = \ - rte_mtr_ops_get(port_id, error);\ - if (ops == NULL)\ + rte_mtr_ops_get(port_id, error);\ + if (ops == NULL)\ return -rte_errno; \ \ if (ops->func == NULL) \ - return -rte_mtr_error_set(error,\ + return -rte_mtr_error_set(error,\ ENOSYS, \ RTE_MTR_ERROR_TYPE_UNSPECIFIED, \ NULL, \ @@ -58,7 +58,7 @@ }) #define RTE_MTR_HNDL_FUNC(port_id, func) \ -({ \ +__extension__ ({ \ const struct rte_mtr_ops *ops = \ rte_mtr_ops_get(port_id, error);\ if (ops == NULL)\ diff --git a/lib/ethdev/rte_tm.c b/lib/ethdev/rte_tm.c index 2d08141..d594fe0 100644 --- a/lib/ethdev/rte_tm.c +++ b/lib/ethdev/rte_tm.c @@ -40,11 +40,11 @@ return ops; } -#define RTE_TM_FUNC(port_id, func) \ -({ \ +#define RTE_TM_FUNC(port_id, func) \ +__extension__ ({ \ const struct rte_tm_ops *ops = \ rte_tm_ops_get(port_id, error); \ - if (ops == NULL)\ + if (ops == NULL)\ return -rte_errno; \ \ if (ops->func == NULL) \ -- 1.8.3.1
RE: [PATCH v3] crypto/openssl: fix memory leaks in asym ops
> Subject: [PATCH v3] crypto/openssl: fix memory leaks in asym ops > > Fix memory leaks in Asymmetric ops, as reported by valgrind. > > Signed-off-by: Gowrishankar Muthukrishnan > --- > v3: > - changes as suggested in v2. The patch conflicts with Stephen's patch to remove the null checks before free. Please rebase. Stephen's patch is merged.
RE: [PATCH v2] test/cryptodev: add ECDH tests
> Subject: [PATCH v2] test/cryptodev: add ECDH tests > > Add ECDH tests. > > Signed-off-by: Gowrishankar Muthukrishnan > Change-Id: I88099e2ba8e058fbb519a06d09ebb3ece7c7e27f > --- > v2: > - rebased due to patch conflict. Applied to dpdk-next-crypto Removed change-id while merging.
Re: RFC acceptable handling of VLAs across toolchains
On Wed, Nov 08, 2023 at 08:51:54AM -0800, Stephen Hemminger wrote: > On Tue, 7 Nov 2023 11:32:20 -0800 > Tyler Retzlaff wrote: > > > hi folks, > > > > i'm seeking advice. we have use of VLAs which are now optional in > > standard C. some toolchains provide a conformant implementation and msvc > > does not (and never will). > > > > we seem to have a few options, just curious about what people would > > prefer. > > > > * use alloca > > > > * use dynamically allocated storage > > > > * conditional compiled code where the msvc leg uses one of the previous > > two options > > > > i'll leave it simple for now, i'd like to hear input rather than provide > > a recommendation for now. > > > > thanks! > > As an experiment did a build of current DPDK with -Wvla option. so maybe what i will do here is put a series up that convers to alloca() for libs and enables -Wvla as a part of the review we can discuss case-by-case basis of keeping alloca or converting to regular C arrays? for the items identified below i'll make the conversions as you have suggested in v1 of the series and seek further comment. > > Lots of errors, some have obvious solutions like: > > ../drivers/net/failsafe/failsafe_intr.c: In function > ‘fs_rx_event_proxy_service_install’: > ../drivers/net/failsafe/failsafe_intr.c:142:17: warning: ISO C90 forbids > variable length array ‘service_core_list’ [-Wvla] > 142 | uint32_t service_core_list[num_service_cores]; > | ^~~~ > > This could just be RTE_MAX_LCORES. > > others like rte_metrics should just use malloc() as is used already in > that function. > > ../lib/metrics/rte_metrics_telemetry.c: In function > ‘rte_metrics_tel_update_metrics_ethdev’: > ../lib/metrics/rte_metrics_telemetry.c:140:9: warning: ISO C90 forbids > variable length array ‘xstats_values’ [-Wvla] > 140 | uint64_t xstats_values[num_xstats]; > | ^~~~ > ../lib/metrics/rte_metrics_telemetry.c: In function > ‘rte_metrics_tel_extract_data’: > ../lib/metrics/rte_metrics_telemetry.c:384:9: warning: ISO C90 forbids > variable length array ‘stat_names’ [-Wvla] > 384 | const char *stat_names[num_stat_names]; > | ^ > > Others already have an implicit upper bound. > Example is in rte_cuckoo_hash where some fields us RTE_HASH_LOOKUP_BULK_MAX > and some use VLA. > > [170/2868] Compiling C object lib/librte_hash.a.p/hash_rte_cuckoo_hash.c.o > ../lib/hash/rte_cuckoo_hash.c: In function ‘rte_hash_lookup_bulk_data’: > ../lib/hash/rte_cuckoo_hash.c:2355:9: warning: ISO C90 forbids variable > length array ‘positions’ [-Wvla] > 2355 | int32_t positions[num_keys]; > | ^~~ > ../lib/hash/rte_cuckoo_hash.c: In function > ‘rte_hash_lookup_with_hash_bulk_data’: > ../lib/hash/rte_cuckoo_hash.c:2471:9: warning: ISO C90 forbids variable > length array ‘positions’ [-Wvla] > 2471 | int32_t positions[num_keys]; > | ^~~ > > Would it make sense to have an rte_config.h value for maximum burst size? > Lots of code is using nb_pkts. > > There is also some confusing ones like: > ../lib/mempool/rte_mempool.c: In function ‘mempool_cache_init’: > ../lib/mempool/rte_mempool.c:751:50: warning: ISO C90 forbids array whose > size cannot be evaluated [-Wvla] > 751 | RTE_SIZEOF_FIELD(struct rte_mempool_cache, > objs[0])); > | ^ > ../lib/eal/include/rte_common.h:498:65: note: in definition of macro > ‘RTE_BUILD_BUG_ON’ > 498 | #define RTE_BUILD_BUG_ON(condition) ((void)sizeof(char[1 - > 2*!!(condition)])) > | > ^ > ../lib/mempool/rte_mempool.c:751:26: note: in expansion of macro > ‘RTE_SIZEOF_FIELD’ > 751 | RTE_SIZEOF_FIELD(struct rte_mempool_cache, > objs[0]));
RE: [EXT] [PATCH] crypto/mlx5: fix crypto dev leak
> For the case crypto initialize failed, the allocated crypto dev should > be destroyed, otherwise the dev leaked. Current PMD returns directly > instead of releasing the dev. > > This commit fixes the crypto dev leak when initialize failed. > > Fixes: a27f6a2e1f30 ("crypto/mlx5: split AES-XTS") > Signed-off-by: Suanming Mou > Acked-by: Matan Azrad Applied to dpdk-next-crypto Cc: sta...@dpdk.org
[PATCH v4] examples/l2fwd-macsec: add MACsec forwarding application
Added a new application based on l2fwd to demonstrate inline protocol offload MACsec performance using rte_security APIs. Example command: ./dpdk-l2fwd-macsec -a 0002:04:00.0 -a 0002:05:00.0 -c 0x6 -- -p 0x3 \ --mcs-tx-portmask 0x1 --mcs-rx-portmask 0x2 --mcs-port-config \ '(0,02:03:04:05:06:07,01:02:03:04:05:06), \ (1,02:03:04:05:06:17,01:02:03:04:05:16)' Signed-off-by: Akhil Goyal --- Changes in v4: - rebased and fixed format specifier. Changes in v3: - updated MAINTAINERS - fixed checkpatch Changes in v2: - fixed CentOS compilation - fixed documentation and added release notes. MAINTAINERS |5 + doc/guides/rel_notes/release_23_11.rst|5 + doc/guides/sample_app_ug/index.rst|1 + .../sample_app_ug/l2_forward_macsec.rst | 97 + examples/l2fwd-macsec/Makefile| 52 + examples/l2fwd-macsec/main.c | 1570 + examples/l2fwd-macsec/meson.build | 13 + examples/meson.build |1 + 8 files changed, 1744 insertions(+) create mode 100644 doc/guides/sample_app_ug/l2_forward_macsec.rst create mode 100644 examples/l2fwd-macsec/Makefile create mode 100644 examples/l2fwd-macsec/main.c create mode 100644 examples/l2fwd-macsec/meson.build diff --git a/MAINTAINERS b/MAINTAINERS index 787f5683d6..b2c3ca2116 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1885,6 +1885,11 @@ T: git://dpdk.org/next/dpdk-next-eventdev F: examples/l2fwd-event/ F: doc/guides/sample_app_ug/l2_forward_event.rst +L2 forwarding with MACsec example +M: Akhil Goyal +F: doc/guides/sample_app_ug/l2_forward_macsec.rst +F: examples/l2fwd-macsec/ + L3 forwarding example F: examples/l3fwd/ F: doc/guides/sample_app_ug/l3_forward.rst diff --git a/doc/guides/rel_notes/release_23_11.rst b/doc/guides/rel_notes/release_23_11.rst index 0a0f51d1c0..0d8815dd53 100644 --- a/doc/guides/rel_notes/release_23_11.rst +++ b/doc/guides/rel_notes/release_23_11.rst @@ -301,6 +301,11 @@ New Features Application provides a framework so that each use case can be added via a file. Each CLI will further be translated into a graph representing the use case. +* **Added layer 2 MACsec forwarding example application.** + + Added a new example layer 2 forwarding application to benchmark + MACsec encryption/decryption using rte_security based inline sessions. + Removed Items - diff --git a/doc/guides/sample_app_ug/index.rst b/doc/guides/sample_app_ug/index.rst index 19485556c7..21ee35cc8b 100644 --- a/doc/guides/sample_app_ug/index.rst +++ b/doc/guides/sample_app_ug/index.rst @@ -26,6 +26,7 @@ Sample Applications User Guides l2_forward_real_virtual l2_forward_event l2_forward_cat +l2_forward_macsec l3_forward l3_forward_graph l3_forward_power_man diff --git a/doc/guides/sample_app_ug/l2_forward_macsec.rst b/doc/guides/sample_app_ug/l2_forward_macsec.rst new file mode 100644 index 00..4744c3fa68 --- /dev/null +++ b/doc/guides/sample_app_ug/l2_forward_macsec.rst @@ -0,0 +1,97 @@ +.. SPDX-License-Identifier: BSD-3-Clause +Copyright(C) 2023 Marvell. + +.. _l2_fwd_macsec_app: + +L2 Forwarding MACsec Sample Application +=== + +The L2 Forwarding MACsec application is a simple example of packet processing using +the Data Plane Development Kit (DPDK) which encrypt/decrypt packets based on +rte_security MACsec sessions. + +Overview + + +The L2 Forwarding MACsec application performs L2 forwarding for each packet that is +received on an RX_PORT after encrypting/decrypting the packets based on rte_security +sessions using inline protocol mode. +The destination port is the adjacent port from the enabled portmask, that is, +if the first four ports are enabled (portmask 0xf), +ports 1 and 2 forward into each other, and ports 3 and 4 forward into each other. + +This application can be used to benchmark performance using a traffic-generator. + +Compiling the Application +- + +To compile the sample application see :doc:`compiling`. + +The application is located in the ``l2fwd-macsec`` sub-directory. + +Running the Application +--- + +The application requires a number of command line options: + +.. code-block:: console + +.//examples/dpdk-l2fwd-macsec [EAL options] -- -p PORTMASK + [-q NQ] + --mcs-tx-portmask OUTBOUND_PORTMASK + --mcs-rx-portmask INBOUND_PORTMASK + --mcs-port-config '(port,src_mac,dst_mac)[,(port,src_mac,dst_mac)]' + [--portmap="(port, port)[,(port, port)]"] + [-T STAT_INTERVAL] + +where, + +* ``p PORTMASK``: A hexadecimal bitmask of the ports to configure + +* ``q NQ``: A number of queues (=ports) per lcore (default is 1) + +* ``T ST
Re: [PATCH v4] examples/l2fwd-macsec: add MACsec forwarding application
Hi Akhil, 09/11/2023 22:01, Akhil Goyal: > Added a new application based on l2fwd to demonstrate inline protocol > offload MACsec performance using rte_security APIs. In general we try to avoid adding a new example application. Does this one has been discussed already with the techboard? We need a good justification for an exception. Why not merging it in l2fwd-crypto?
[PATCH v2 0/1] dts: Add the ability to bind ports to drivers
From: Jeremy Spewock Changes in this version address the comments on the last and change what was necessary. Now, we no longer modprobe the driver, but the decision was made to still make driver binding exclusive to the SUT for the time being due to the uncertainty of what binding drivers on the traffic generator will look like in the future when we need to do so. I also decided to leave the os_udp test case in the patch as leaving it does no harm really, all that is required for it to run is binding to the os_driver before it runs and back to the DPDK driver after, and I think it serves as somewhat of a "hello world" for ensuring that your traffic generator is functioning. If it is decided that we no longer want it in the future or want to make it a part of the hello_world suite, another patch will be submitted at a later date. v1: https://mails.dpdk.org/archives/dev/2023-November/281477.html Jeremy Spewock (1): dts: allow configuring MTU of ports dts/framework/remote_session/linux_session.py | 7 +++ dts/framework/remote_session/os_session.py| 9 + 2 files changed, 16 insertions(+) -- 2.42.0
[PATCH v2 1/1] dts: allow configuring MTU of ports
From: Jeremy Spewock Adds methods in both os_session and linux session to allow for setting MTU of port interfaces in an OS agnostic way. Signed-off-by: Jeremy Spewock --- dts/framework/remote_session/linux_session.py | 7 +++ dts/framework/remote_session/os_session.py| 9 + 2 files changed, 16 insertions(+) diff --git a/dts/framework/remote_session/linux_session.py b/dts/framework/remote_session/linux_session.py index a3f1a6bf3b..dab68d41b1 100644 --- a/dts/framework/remote_session/linux_session.py +++ b/dts/framework/remote_session/linux_session.py @@ -196,6 +196,13 @@ def configure_port_ip_address( verify=True, ) +def configure_port_mtu(self, mtu: int, port: Port) -> None: +self.send_command( +f"ip link set dev {port.logical_name} mtu {mtu}", +privileged=True, +verify=True, +) + def configure_ipv4_forwarding(self, enable: bool) -> None: state = 1 if enable else 0 self.send_command(f"sysctl -w net.ipv4.ip_forward={state}", privileged=True) diff --git a/dts/framework/remote_session/os_session.py b/dts/framework/remote_session/os_session.py index 8a709eac1c..c038f78b79 100644 --- a/dts/framework/remote_session/os_session.py +++ b/dts/framework/remote_session/os_session.py @@ -277,6 +277,15 @@ def configure_port_ip_address( Configure (add or delete) an IP address of the input port. """ +@abstractmethod +def configure_port_mtu(self, mtu: int, port: Port) -> None: +"""Configure MTU on a given port. + +Args: +mtu: Desired MTU value. +port: Port to set the MTU on. +""" + @abstractmethod def configure_ipv4_forwarding(self, enable: bool) -> None: """ -- 2.42.0
[PATCH v1 1/7] dts: Add scatter test suite
From: Jeremy Spewock This test suite provides testing the support of scattered packets by Poll Mode Drivers using testpmd. It incorporates 5 different test cases which test the sending and receiving of packets with lengths that are less than the mbuf data buffer size, the same as the mbuf data buffer size, and the mbuf data buffer size plus 1, 4, and 5. The goal of this test suite is to align with the existing dts test plan for scattered packets within DTS. Signed-off-by: Jeremy Spewock --- dts/tests/TestSuite_scatter.py | 85 ++ 1 file changed, 85 insertions(+) create mode 100644 dts/tests/TestSuite_scatter.py diff --git a/dts/tests/TestSuite_scatter.py b/dts/tests/TestSuite_scatter.py new file mode 100644 index 00..0adaad1b2c --- /dev/null +++ b/dts/tests/TestSuite_scatter.py @@ -0,0 +1,85 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2023 University of New Hampshire + +import struct + +from scapy.layers.inet import IP # type: ignore[import] +from scapy.layers.l2 import Ether # type: ignore[import] +from scapy.packet import Raw # type: ignore[import] +from scapy.utils import hexstr # type: ignore[import] + +from framework.remote_session import TestPmdShell +from framework.test_suite import TestSuite + + +class Scatter(TestSuite): +mbsize: int + +def set_up_suite(self) -> None: +self.verify( +len(self._port_links) > 1, +"Must have at least two port links to run scatter", +) +if self._sut_port_egress.os_driver in ["i40e", "ixgbe", "ice"]: +self.mbsize = 2048 +else: +self.mbsize = 1024 + +self.tg_node.main_session.configure_port_mtu(9000, self._tg_port_egress) +self.tg_node.main_session.configure_port_mtu(9000, self._tg_port_ingress) + +def scatter_pktgen_send_packet(self, pktsize: int) -> str: +"""Generate and send packet to the SUT. + +Functional test for scatter packets. + +Args: +pktsize: Size of the packet to generate and send. +""" +packet = Ether() / IP() / Raw() +packet.getlayer(2).load = "" +payload_len = pktsize - len(packet) - 4 +payload = ["58"] * payload_len +# pack the payload +for X_in_hex in payload: +packet.load += struct.pack( +"=B", int("%s%s" % (X_in_hex[0], X_in_hex[1]), 16) +) +received_packets = self.send_packet_and_capture(packet) +self.verify(len(received_packets) > 0, "Did not receive any packets.") +load = hexstr(received_packets[0].getlayer(2), onlyhex=1) + +return load + +def test_scatter_mbuf_2048(self) -> None: +""" +Test: +Start testpmd and run functional test with preset mbsize. +""" +testpmd = self.sut_node.create_interactive_shell( +TestPmdShell, +app_parameters=( +"--mbcache=200 " +f"--mbuf-size={self.mbsize} " +"--max-pkt-len=9000 " +"--port-topology=paired " +"--tx-offloads=0x8000" +), +privileged=True, +) +testpmd.send_command("set fwd mac") +testpmd.send_command("start") +link_is_up = testpmd.wait_link_status_up(0) and testpmd.wait_link_status_up(1) +self.verify(link_is_up, "Links never came up in TestPMD.") + +for offset in [-1, 0, 1, 4, 5]: +recv_payload = self.scatter_pktgen_send_packet(self.mbsize + offset) +self.verify( +("58 " * 8).strip() in recv_payload, +"Received packet had incorrect payload", +) +testpmd.send_command("stop") + +def tear_down_suite(self) -> None: +self.tg_node.main_session.configure_port_mtu(1500, self._tg_port_egress) +self.tg_node.main_session.configure_port_mtu(1500, self._tg_port_ingress) -- 2.42.0
[PATCH v1 0/7] dts: Port scatter suite over
From: Jeremy Spewock This patch ports over the functionality of the scatter testing suite from "old dts." The idea of the suite is the coverage it provides should be parity with the ethdev testing suite in old DTS. Depends-on: series-30228 ("dts: Add the ability to bind ports to drivers") Jeremy Spewock (7): dts: Add scatter test suite dts: add waiting for port up in testpmd dts: add scatter to the yaml schema dts: allow passing parameters into interactive apps dts: add optional packet filtering to scapy sniffer dts: add pci addresses to EAL parameters dts: allow configuring MTU of ports dts/framework/config/conf_yaml_schema.json| 3 +- dts/framework/remote_session/linux_session.py | 7 ++ dts/framework/remote_session/os_session.py| 9 ++ .../remote_session/remote/testpmd_shell.py| 31 ++- dts/framework/test_suite.py | 13 ++- .../capturing_traffic_generator.py| 12 ++- dts/framework/testbed_model/scapy.py | 11 ++- dts/framework/testbed_model/sut_node.py | 20 - dts/framework/testbed_model/tg_node.py| 4 +- dts/tests/TestSuite_scatter.py| 85 +++ 10 files changed, 184 insertions(+), 11 deletions(-) create mode 100644 dts/tests/TestSuite_scatter.py -- 2.42.0
[PATCH v1 2/7] dts: add waiting for port up in testpmd
From: Jeremy Spewock Added a method within the testpmd interactive shell that polls the status of ports and verifies that the link status on a given port is "up." Polling will continue until either the link comes up, or the timeout is reached. Signed-off-by: Jeremy Spewock --- .../remote_session/remote/testpmd_shell.py| 29 +++ 1 file changed, 29 insertions(+) diff --git a/dts/framework/remote_session/remote/testpmd_shell.py b/dts/framework/remote_session/remote/testpmd_shell.py index 1455b5a199..3ea16c7ab3 100644 --- a/dts/framework/remote_session/remote/testpmd_shell.py +++ b/dts/framework/remote_session/remote/testpmd_shell.py @@ -1,9 +1,12 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright(c) 2023 University of New Hampshire +import time from pathlib import PurePath from typing import Callable +from framework.settings import SETTINGS + from .interactive_shell import InteractiveShell @@ -47,3 +50,29 @@ def get_devices(self) -> list[TestPmdDevice]: if "device name:" in line.lower(): dev_list.append(TestPmdDevice(line)) return dev_list + +def wait_link_status_up(self, port_id: int, timeout=SETTINGS.timeout) -> bool: +"""Wait until the link status on the given port is "up". + +Arguments: +port_id: Port to check the link status on. +timeout: time to wait for the link to come up. + +Returns: +If the link came up in time or not. +""" +time_to_stop = time.time() + timeout +while time.time() < time_to_stop: +port_info = self.send_command(f"show port info {port_id}") +if "Link status: up" in port_info: +break +time.sleep(0.5) +else: +self._logger.error( +f"The link for port {port_id} did not come up in the given timeout." +) +return "Link status: up" in port_info + +def close(self) -> None: +self.send_command("exit", "") +return super().close() -- 2.42.0
[PATCH v1 3/7] dts: add scatter to the yaml schema
From: Jeremy Spewock Allow for scatter to be specificed in the configuration file. Signed-off-by: Jeremy Spewock --- dts/framework/config/conf_yaml_schema.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dts/framework/config/conf_yaml_schema.json b/dts/framework/config/conf_yaml_schema.json index 84e45fe3c2..97ee32f47a 100644 --- a/dts/framework/config/conf_yaml_schema.json +++ b/dts/framework/config/conf_yaml_schema.json @@ -186,7 +186,8 @@ "type": "string", "enum": [ "hello_world", -"os_udp" +"os_udp", +"scatter" ] }, "test_target": { -- 2.42.0
[PATCH v1 4/7] dts: allow passing parameters into interactive apps
From: Jeremy Spewock Modified interactive applications to allow for the ability to pass parameters into the app on start up. Also modified the way EAL parameters are handled so that the trailing "--" separator is added be default after all EAL parameters. Signed-off-by: Jeremy Spewock --- dts/framework/remote_session/remote/testpmd_shell.py | 2 +- dts/framework/testbed_model/sut_node.py | 11 +++ 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/dts/framework/remote_session/remote/testpmd_shell.py b/dts/framework/remote_session/remote/testpmd_shell.py index 3ea16c7ab3..3f6a86aa35 100644 --- a/dts/framework/remote_session/remote/testpmd_shell.py +++ b/dts/framework/remote_session/remote/testpmd_shell.py @@ -32,7 +32,7 @@ def _start_application( self, get_privileged_command: Callable[[str], str] | None ) -> None: """See "_start_application" in InteractiveShell.""" -self._app_args += " -- -i" +self._app_args += " -i" super()._start_application(get_privileged_command) def get_devices(self) -> list[TestPmdDevice]: diff --git a/dts/framework/testbed_model/sut_node.py b/dts/framework/testbed_model/sut_node.py index 4161d3a4d5..bcac939e72 100644 --- a/dts/framework/testbed_model/sut_node.py +++ b/dts/framework/testbed_model/sut_node.py @@ -377,7 +377,8 @@ def create_interactive_shell( shell_cls: Type[InteractiveShellType], timeout: float = SETTINGS.timeout, privileged: bool = False, -eal_parameters: EalParameters | str | None = None, +eal_parameters: EalParameters | str = "", +app_parameters: str = "", ) -> InteractiveShellType: """Factory method for creating a handler for an interactive session. @@ -392,12 +393,14 @@ def create_interactive_shell( eal_parameters: List of EAL parameters to use to launch the app. If this isn't provided or an empty string is passed, it will default to calling create_eal_parameters(). +app_parameters: Additional arguments to pass into the application on the +command-line. Returns: Instance of the desired interactive application. """ -if not eal_parameters: +if not eal_parameters and shell_cls.dpdk_app: eal_parameters = self.create_eal_parameters() - +eal_parameters = f"{eal_parameters} --" # We need to append the build directory for DPDK apps if shell_cls.dpdk_app: shell_cls.path = self.main_session.join_remote_path( @@ -405,7 +408,7 @@ def create_interactive_shell( ) return super().create_interactive_shell( -shell_cls, timeout, privileged, str(eal_parameters) +shell_cls, timeout, privileged, f"{eal_parameters} {app_parameters}" ) def bind_ports_to_driver(self, for_dpdk: bool = True) -> None: -- 2.42.0
[PATCH v1 5/7] dts: add optional packet filtering to scapy sniffer
From: Jeremy Spewock Added the options to filter out LLDP and ARP packets when sniffing for packets with scapy. This was done using BPF filters to ensure that the noise these packets provide does not interfere with test cases. Signed-off-by: Jeremy Spewock --- dts/framework/test_suite.py | 13 +++-- .../testbed_model/capturing_traffic_generator.py| 12 +++- dts/framework/testbed_model/scapy.py| 11 ++- dts/framework/testbed_model/tg_node.py | 4 +++- 4 files changed, 35 insertions(+), 5 deletions(-) diff --git a/dts/framework/test_suite.py b/dts/framework/test_suite.py index 3b890c0451..3222f172f3 100644 --- a/dts/framework/test_suite.py +++ b/dts/framework/test_suite.py @@ -152,7 +152,11 @@ def _configure_ipv4_forwarding(self, enable: bool) -> None: self.sut_node.configure_ipv4_forwarding(enable) def send_packet_and_capture( -self, packet: Packet, duration: float = 1 +self, +packet: Packet, +duration: float = 1, +no_lldp: bool = True, +no_arp: bool = True, ) -> list[Packet]: """ Send a packet through the appropriate interface and @@ -162,7 +166,12 @@ def send_packet_and_capture( """ packet = self._adjust_addresses(packet) return self.tg_node.send_packet_and_capture( -packet, self._tg_port_egress, self._tg_port_ingress, duration +packet, +self._tg_port_egress, +self._tg_port_ingress, +duration, +no_lldp, +no_arp, ) def get_expected_packet(self, packet: Packet) -> Packet: diff --git a/dts/framework/testbed_model/capturing_traffic_generator.py b/dts/framework/testbed_model/capturing_traffic_generator.py index ab98987f8e..0a0d0f7d0d 100644 --- a/dts/framework/testbed_model/capturing_traffic_generator.py +++ b/dts/framework/testbed_model/capturing_traffic_generator.py @@ -52,6 +52,8 @@ def send_packet_and_capture( send_port: Port, receive_port: Port, duration: float, +no_lldp: bool, +no_arp: bool, capture_name: str = _get_default_capture_name(), ) -> list[Packet]: """Send a packet, return received traffic. @@ -71,7 +73,7 @@ def send_packet_and_capture( A list of received packets. May be empty if no packets are captured. """ return self.send_packets_and_capture( -[packet], send_port, receive_port, duration, capture_name +[packet], send_port, receive_port, duration, no_lldp, no_arp, capture_name ) def send_packets_and_capture( @@ -80,6 +82,8 @@ def send_packets_and_capture( send_port: Port, receive_port: Port, duration: float, +no_lldp: bool, +no_arp: bool, capture_name: str = _get_default_capture_name(), ) -> list[Packet]: """Send packets, return received traffic. @@ -93,6 +97,8 @@ def send_packets_and_capture( send_port: The egress port on the TG node. receive_port: The ingress port in the TG node. duration: Capture traffic for this amount of time after sending the packets. +no_lldp: Option to disable capturing LLDP packets +no_arp: Option to disable capturing ARP packets capture_name: The name of the .pcap file where to store the capture. Returns: @@ -108,6 +114,8 @@ def send_packets_and_capture( send_port, receive_port, duration, +no_lldp, +no_arp, ) self._logger.debug( @@ -123,6 +131,8 @@ def _send_packets_and_capture( send_port: Port, receive_port: Port, duration: float, +no_lldp: bool, +no_arp: bool, ) -> list[Packet]: """ The extended classes must implement this method which diff --git a/dts/framework/testbed_model/scapy.py b/dts/framework/testbed_model/scapy.py index af0d4dbb25..58f01af21a 100644 --- a/dts/framework/testbed_model/scapy.py +++ b/dts/framework/testbed_model/scapy.py @@ -69,6 +69,7 @@ def scapy_send_packets_and_capture( send_iface: str, recv_iface: str, duration: float, +sniff_filter: str, ) -> list[bytes]: """RPC function to send and capture packets. @@ -90,6 +91,7 @@ def scapy_send_packets_and_capture( iface=recv_iface, store=True, started_callback=lambda *args: scapy.all.sendp(scapy_packets, iface=send_iface), +filter=sniff_filter, ) sniffer.start() time.sleep(duration) @@ -264,10 +266,16 @@ def _send_packets_and_capture( send_port: Port, receive_port: Port, duration: float, +no_lldp: bool, +no_arp: bool, capture_name: str = _get_default_capture_name(), ) -> list[Packet]: binary_packets = [packet.build() for pac
[PATCH v1 6/7] dts: add pci addresses to EAL parameters
From: Jeremy Spewock Added allow list to the EAL parameters created in DTS to ensure that only the relevent PCI devices are considered when launching DPDK applications. Signed-off-by: Jeremy Spewock --- dts/framework/testbed_model/sut_node.py | 9 + 1 file changed, 9 insertions(+) diff --git a/dts/framework/testbed_model/sut_node.py b/dts/framework/testbed_model/sut_node.py index bcac939e72..f9c7bd9bf3 100644 --- a/dts/framework/testbed_model/sut_node.py +++ b/dts/framework/testbed_model/sut_node.py @@ -20,6 +20,7 @@ from framework.utils import MesonArgs from .hw import LogicalCoreCount, LogicalCoreList, VirtualDevice +from .hw.port import Port from .node import Node @@ -31,6 +32,7 @@ def __init__( prefix: str, no_pci: bool, vdevs: list[VirtualDevice], +ports: list[Port], other_eal_param: str, ): """ @@ -56,6 +58,7 @@ def __init__( self._prefix = f"--file-prefix={prefix}" self._no_pci = "--no-pci" if no_pci else "" self._vdevs = " ".join(f"--vdev {vdev}" for vdev in vdevs) +self._ports = " ".join(f"-a {port.pci}" for port in ports) self._other_eal_param = other_eal_param def __str__(self) -> str: @@ -65,6 +68,7 @@ def __str__(self) -> str: f"{self._prefix} " f"{self._no_pci} " f"{self._vdevs} " +f"{self._ports} " f"{self._other_eal_param}" ) @@ -308,6 +312,7 @@ def create_eal_parameters( append_prefix_timestamp: bool = True, no_pci: bool = False, vdevs: list[VirtualDevice] = None, +ports: list[Port] | None = None, other_eal_param: str = "", ) -> "EalParameters": """ @@ -350,12 +355,16 @@ def create_eal_parameters( if vdevs is None: vdevs = [] +if ports is None: +ports = self.ports + return EalParameters( lcore_list=lcore_list, memory_channels=self.config.memory_channels, prefix=prefix, no_pci=no_pci, vdevs=vdevs, +ports=ports, other_eal_param=other_eal_param, ) -- 2.42.0
[PATCH v1 7/7] dts: allow configuring MTU of ports
From: Jeremy Spewock Adds methods in both os_session and linux session to allow for setting MTU of port interfaces in an OS agnostic way. Signed-off-by: Jeremy Spewock --- dts/framework/remote_session/linux_session.py | 7 +++ dts/framework/remote_session/os_session.py| 9 + 2 files changed, 16 insertions(+) diff --git a/dts/framework/remote_session/linux_session.py b/dts/framework/remote_session/linux_session.py index a3f1a6bf3b..dab68d41b1 100644 --- a/dts/framework/remote_session/linux_session.py +++ b/dts/framework/remote_session/linux_session.py @@ -196,6 +196,13 @@ def configure_port_ip_address( verify=True, ) +def configure_port_mtu(self, mtu: int, port: Port) -> None: +self.send_command( +f"ip link set dev {port.logical_name} mtu {mtu}", +privileged=True, +verify=True, +) + def configure_ipv4_forwarding(self, enable: bool) -> None: state = 1 if enable else 0 self.send_command(f"sysctl -w net.ipv4.ip_forward={state}", privileged=True) diff --git a/dts/framework/remote_session/os_session.py b/dts/framework/remote_session/os_session.py index 8a709eac1c..c038f78b79 100644 --- a/dts/framework/remote_session/os_session.py +++ b/dts/framework/remote_session/os_session.py @@ -277,6 +277,15 @@ def configure_port_ip_address( Configure (add or delete) an IP address of the input port. """ +@abstractmethod +def configure_port_mtu(self, mtu: int, port: Port) -> None: +"""Configure MTU on a given port. + +Args: +mtu: Desired MTU value. +port: Port to set the MTU on. +""" + @abstractmethod def configure_ipv4_forwarding(self, enable: bool) -> None: """ -- 2.42.0