Re: [RESEND v7 1/3] ring: fix unmatched type definition and usage
On 2024/2/19 2:11, Thomas Monjalon wrote: 09/11/2023 11:20, Jie Hai: Field 'flags' of struct rte_ring is defined as int type. However, it is used as unsigned int. To ensure consistency, change the type of flags to unsigned int. Since these two types has the same byte size, this change is not an ABI change. Fixes: af75078fece3 ("first public release") Signed-off-by: Jie Hai Acked-by: Konstantin Ananyev Acked-by: Chengwen Feng Acked-by: Morten Brørup --- lib/ring/rte_ring_core.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/ring/rte_ring_core.h b/lib/ring/rte_ring_core.h index b7708730658a..14dac6495d83 100644 --- a/lib/ring/rte_ring_core.h +++ b/lib/ring/rte_ring_core.h @@ -119,7 +119,7 @@ struct rte_ring_hts_headtail { struct rte_ring { char name[RTE_RING_NAMESIZE] __rte_cache_aligned; /**< Name of the ring. */ - int flags; /**< Flags supplied at creation. */ + uint32_t flags; /**< Flags supplied at creation. */ This triggers a warning in our ABI checker: in pointed to type 'struct rte_ring' at rte_ring_core.h:119:1: type size hasn't changed 1 data member change: type of 'int flags' changed: entity changed from 'int' to compatible type 'typedef uint32_t' at stdint-uintn.h:26:1 type name changed from 'int' to 'unsigned int' type size hasn't changed I guess we were supposed to merge this in 23.11, sorry about this. How can we proceed? How about we drop this amendment (patch 1/3) for now? .
[PATCH v8 2/2] ring: add telemetry cmd for ring info
This patch supports dump of ring information by its name. An example using this command is shown below: --> /ring/info,MP_mb_pool_0 { "/ring/info": { "name": "MP_mb_pool_0", "socket": 0, "flags": 0, "producer_type": "MP", "consumer_type": "MC", "size": 262144, "mask": "0x3", "capacity": 262143, "used_count": 153197, "mz_name": "RG_MP_mb_pool_0", "mz_len": 2097536, "mz_hugepage_sz": 1073741824, "mz_socket_id": 0, "mz_flags": "0x0" } } Signed-off-by: Jie Hai Reviewed-by: Honnappa Nagarahalli Acked-by: Konstantin Ananyev Acked-by: Huisong Li Acked-by: Chengwen Feng Acked-by: Morten Brørup --- lib/ring/rte_ring.c | 95 + 1 file changed, 95 insertions(+) diff --git a/lib/ring/rte_ring.c b/lib/ring/rte_ring.c index 75f53c723186..e6c746cce1f1 100644 --- a/lib/ring/rte_ring.c +++ b/lib/ring/rte_ring.c @@ -458,8 +458,103 @@ ring_handle_list(const char *cmd __rte_unused, return 0; } +static const char * +ring_prod_sync_type_to_name(struct rte_ring *r) +{ + switch (r->prod.sync_type) { + case RTE_RING_SYNC_MT: + return "MP"; + case RTE_RING_SYNC_ST: + return "SP"; + case RTE_RING_SYNC_MT_RTS: + return "MP_RTS"; + case RTE_RING_SYNC_MT_HTS: + return "MP_HTS"; + default: + return "Unknown"; + } +} + +static const char * +ring_cons_sync_type_to_name(struct rte_ring *r) +{ + switch (r->cons.sync_type) { + case RTE_RING_SYNC_MT: + return "MC"; + case RTE_RING_SYNC_ST: + return "SC"; + case RTE_RING_SYNC_MT_RTS: + return "MC_RTS"; + case RTE_RING_SYNC_MT_HTS: + return "MC_HTS"; + default: + return "Unknown"; + } +} + +struct ring_info_cb_arg { + char *ring_name; + struct rte_tel_data *d; +}; + +static void +ring_info_cb(struct rte_ring *r, void *arg) +{ + struct ring_info_cb_arg *ring_arg = (struct ring_info_cb_arg *)arg; + struct rte_tel_data *d = ring_arg->d; + const struct rte_memzone *mz; + + if (strncmp(r->name, ring_arg->ring_name, RTE_RING_NAMESIZE)) + return; + + rte_tel_data_add_dict_string(d, "name", r->name); + rte_tel_data_add_dict_int(d, "socket", r->memzone->socket_id); + rte_tel_data_add_dict_int(d, "flags", r->flags); + rte_tel_data_add_dict_string(d, "producer_type", + ring_prod_sync_type_to_name(r)); + rte_tel_data_add_dict_string(d, "consumer_type", + ring_cons_sync_type_to_name(r)); + rte_tel_data_add_dict_uint(d, "size", r->size); + rte_tel_data_add_dict_uint_hex(d, "mask", r->mask, 0); + rte_tel_data_add_dict_uint(d, "capacity", r->capacity); + rte_tel_data_add_dict_uint(d, "used_count", rte_ring_count(r)); + + mz = r->memzone; + if (mz == NULL) + return; + rte_tel_data_add_dict_string(d, "mz_name", mz->name); + rte_tel_data_add_dict_uint(d, "mz_len", mz->len); + rte_tel_data_add_dict_uint(d, "mz_hugepage_sz", mz->hugepage_sz); + rte_tel_data_add_dict_int(d, "mz_socket_id", mz->socket_id); + rte_tel_data_add_dict_uint_hex(d, "mz_flags", mz->flags, 0); +} + +static int +ring_handle_info(const char *cmd __rte_unused, const char *params, + struct rte_tel_data *d) +{ + char name[RTE_RING_NAMESIZE] = {0}; + struct ring_info_cb_arg ring_arg; + + if (params == NULL || strlen(params) == 0 || + strlen(params) >= RTE_RING_NAMESIZE) + return -EINVAL; + + rte_strlcpy(name, params, RTE_RING_NAMESIZE); + + ring_arg.ring_name = name; + ring_arg.d = d; + + rte_tel_data_start_dict(d); + ring_walk(ring_info_cb, &ring_arg); + + return 0; +} + RTE_INIT(ring_init_telemetry) { rte_telemetry_register_cmd("/ring/list", ring_handle_list, "Returns list of available rings. Takes no parameters"); + rte_telemetry_register_cmd("/ring/info", ring_handle_info, + "Returns ring info. Parameters: ring_name."); } -- 2.30.0
[PATCH v8 1/2] ring: add telemetry cmd to list rings
Add a telemetry command to list the rings used in the system. An example using this command is shown below: --> /ring/list { "/ring/list": [ "HT_:7d:00.2", "MP_mb_pool_0" ] } Signed-off-by: Jie Hai Acked-by: Konstantin Ananyev Reviewed-by: Honnappa Nagarahalli Acked-by: Huisong Li Acked-by: Chengwen Feng Acked-by: Morten Brørup --- lib/ring/meson.build | 1 + lib/ring/rte_ring.c | 40 2 files changed, 41 insertions(+) diff --git a/lib/ring/meson.build b/lib/ring/meson.build index c20685c689ac..7fca958ed7fa 100644 --- a/lib/ring/meson.build +++ b/lib/ring/meson.build @@ -18,3 +18,4 @@ indirect_headers += files ( 'rte_ring_rts.h', 'rte_ring_rts_elem_pvt.h', ) +deps += ['telemetry'] diff --git a/lib/ring/rte_ring.c b/lib/ring/rte_ring.c index c59f62626383..75f53c723186 100644 --- a/lib/ring/rte_ring.c +++ b/lib/ring/rte_ring.c @@ -22,6 +22,7 @@ #include #include #include +#include #include "rte_ring.h" #include "rte_ring_elem.h" @@ -423,3 +424,42 @@ rte_ring_lookup(const char *name) return r; } + +static void +ring_walk(void (*func)(struct rte_ring *, void *), void *arg) +{ + struct rte_ring_list *ring_list; + struct rte_tailq_entry *tailq_entry; + + ring_list = RTE_TAILQ_CAST(rte_ring_tailq.head, rte_ring_list); + rte_mcfg_tailq_read_lock(); + + TAILQ_FOREACH(tailq_entry, ring_list, next) { + (*func)((struct rte_ring *) tailq_entry->data, arg); + } + + rte_mcfg_tailq_read_unlock(); +} + +static void +ring_list_cb(struct rte_ring *r, void *arg) +{ + struct rte_tel_data *d = (struct rte_tel_data *)arg; + + rte_tel_data_add_array_string(d, r->name); +} + +static int +ring_handle_list(const char *cmd __rte_unused, + const char *params __rte_unused, struct rte_tel_data *d) +{ + rte_tel_data_start_array(d, RTE_TEL_STRING_VAL); + ring_walk(ring_list_cb, d); + return 0; +} + +RTE_INIT(ring_init_telemetry) +{ + rte_telemetry_register_cmd("/ring/list", ring_handle_list, + "Returns list of available rings. Takes no parameters"); +} -- 2.30.0
[PATCH v8 0/2] add telemetry cmds for ring
This patch set supports telemetry cmd to list rings and dump information of a ring by its name. v1->v2: 1. Add space after "switch". 2. Fix wrong strlen parameter. v2->v3: 1. Remove prefix "rte_" for static function. 2. Add Acked-by Konstantin Ananyev for PATCH 1. 3. Introduce functions to return strings instead copy strings. 4. Check pointer to memzone of ring. 5. Remove redundant variable. 6. Hold lock when access ring data. v3->v4: 1. Update changelog according to reviews of Honnappa Nagarahalli. 2. Add Reviewed-by Honnappa Nagarahalli. 3. Correct grammar in help information. 4. Correct spell warning on "te" reported by checkpatch.pl. 5. Use ring_walk() to query ring info instead of rte_ring_lookup(). 6. Fix that type definition the flag field of rte_ring does not match the usage. 7. Use rte_tel_data_add_dict_uint_hex instead of rte_tel_data_add_dict_u64 for mask and flags. v4->v5: 1. Add Acked-by Konstantin Ananyev and Chengwen Feng. 2. Add ABI change explanation for commit message of patch 1/3. v5->v6: 1. Add Acked-by Morten Br?rup. 2. Fix incorrect reference of commit. v6->v7: 1. Remove prod/consumer head/tail info. v7->v8: 1. Drop patch 1/3 and related codes. Jie Hai (2): ring: add telemetry cmd to list rings ring: add telemetry cmd for ring info lib/ring/meson.build | 1 + lib/ring/rte_ring.c | 135 +++ 2 files changed, 136 insertions(+) -- 2.30.0
[PATCH v7 0/2] fix parsing of VLAN metadata
The previous netvsc code incorrectly parsed the VLAN ID and priority. If the 16-bits of VLAN ID and priority/CFI on the wire was 0123456789ABCDEF the code parsed it as 456789ABCDEF3012. This patch fixes netvsc parsing code and adds common macros for extracting and setting parts of the VLAN tag. Alan Elder (2): lib/net: fix parsing of VLAN metadata net/netvsc: fix parsing of VLAN metadata --- v7: * Split patches for lib and driver v6: * Line length can be 100 - un-split lines v5: * Move the VLAN parsing macros to rte_ether.h v4: * Make consistent with FreeBSD code --- .mailmap | 1 + drivers/net/netvsc/hn_rxtx.c | 8 ++-- lib/net/rte_ether.h | 14 ++ 3 files changed, 21 insertions(+), 2 deletions(-) -- 2.25.1
[PATCH v7 1/2] lib/net: fix parsing of VLAN metadata
Add common macros for extracting parts of VLAN tag. Fixes: 4e9c73e96e83 ("net/netvsc: add Hyper-V network device") Cc: sthem...@microsoft.com Cc: sta...@dpdk.org Signed-off-by: Alan Elder --- v7: * Split patches for lib and driver v6: * Line length can be 100 - un-split lines v5: * Move the VLAN parsing macros to rte_ether.h v4: * Make consistent with FreeBSD code --- .mailmap| 1 + lib/net/rte_ether.h | 14 ++ 2 files changed, 15 insertions(+) diff --git a/.mailmap b/.mailmap index de339562f4..08fce0c472 100644 --- a/.mailmap +++ b/.mailmap @@ -33,6 +33,7 @@ Alain Leon Alan Brady Alan Carew Alan Dewar +Alan Elder Alan Liu Alan Winkowski Alejandro Lucero diff --git a/lib/net/rte_ether.h b/lib/net/rte_ether.h index ce073ea818..75285bdd12 100644 --- a/lib/net/rte_ether.h +++ b/lib/net/rte_ether.h @@ -46,6 +46,20 @@ extern "C" { #define RTE_ETHER_MIN_MTU 68 /**< Minimum MTU for IPv4 packets, see RFC 791. */ +/* VLAN header fields */ +#define RTE_VLAN_DEI_SHIFT 12 +#define RTE_VLAN_PRI_SHIFT 13 +#define RTE_VLAN_PRI_MASK 0xe000 /* Priority Code Point */ +#define RTE_VLAN_DEI_MASK 0x1000 /* Drop Eligible Indicator */ +#define RTE_VLAN_ID_MASK 0x0fff /* VLAN Identifier */ + +#define RTE_VLAN_TCI_ID(vlan_tci) ((vlan_tci) & RTE_VLAN_ID_MASK) +#define RTE_VLAN_TCI_PRI(vlan_tci) (((vlan_tci) & RTE_VLAN_PRI_MASK) >> RTE_VLAN_PRI_SHIFT) +#define RTE_VLAN_TCI_DEI(vlan_tci) (((vlan_tci) & RTE_VLAN_DEI_MASK) >> RTE_VLAN_DEI_SHIFT) +#define RTE_VLAN_TCI_MAKE(id, pri, dei)((id) | \ +((pri) << RTE_VLAN_PRI_SHIFT) | \ +((dei) << RTE_VLAN_DEI_SHIFT)) + /** * Ethernet address: * A universally administered address is uniquely assigned to a device by its -- 2.25.1
[PATCH v7 2/2] net/netvsc: fix parsing of VLAN metadata
The previous code incorrectly parsed the VLAN ID and priority. If the 16-bits of VLAN ID and priority/CFI on the wire was 0123456789ABCDEF the code parsed it as 456789ABCDEF3012. There were macros defined to handle this conversion but they were not used. Fixes: 4e9c73e96e83 ("net/netvsc: add Hyper-V network device") Cc: sthem...@microsoft.com Cc: sta...@dpdk.org Signed-off-by: Alan Elder --- v7: * Split into two patches, one for lib and one for driver v6: * Line length can be 100 - un-split lines v5: * Move the VLAN parsing macros to rte_ether.h v4: * Make consistent with FreeBSD code --- drivers/net/netvsc/hn_rxtx.c | 8 ++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/netvsc/hn_rxtx.c b/drivers/net/netvsc/hn_rxtx.c index e4f5015aa3..9bf1ec5509 100644 --- a/drivers/net/netvsc/hn_rxtx.c +++ b/drivers/net/netvsc/hn_rxtx.c @@ -612,7 +612,9 @@ static void hn_rxpkt(struct hn_rx_queue *rxq, struct hn_rx_bufinfo *rxb, RTE_PTYPE_L4_MASK); if (info->vlan_info != HN_NDIS_VLAN_INFO_INVALID) { - m->vlan_tci = info->vlan_info; + m->vlan_tci = RTE_VLAN_TCI_MAKE(NDIS_VLAN_INFO_ID(info->vlan_info), + NDIS_VLAN_INFO_PRI(info->vlan_info), + NDIS_VLAN_INFO_CFI(info->vlan_info)); m->ol_flags |= RTE_MBUF_F_RX_VLAN_STRIPPED | RTE_MBUF_F_RX_VLAN; /* NDIS always strips tag, put it back if necessary */ @@ -1332,7 +1334,9 @@ static void hn_encap(struct rndis_packet_msg *pkt, if (m->ol_flags & RTE_MBUF_F_TX_VLAN) { pi_data = hn_rndis_pktinfo_append(pkt, NDIS_VLAN_INFO_SIZE, NDIS_PKTINFO_TYPE_VLAN); - *pi_data = m->vlan_tci; + *pi_data = NDIS_VLAN_INFO_MAKE(RTE_VLAN_TCI_ID(m->vlan_tci), + RTE_VLAN_TCI_PRI(m->vlan_tci), + RTE_VLAN_TCI_DEI(m->vlan_tci)); } if (m->ol_flags & RTE_MBUF_F_TX_TCP_SEG) { -- 2.25.1
Re: [PATCH] net/gve: Change ERR to DEBUG to prevent flooding of logs for Tx-Dqo.
On 2/19/2024 2:44 AM, Rushil Gupta wrote: > This was causing failure for testpmd runs (for queues >=15) > presumably due to flooding of logs due to descriptor ring being > overwritten. > > Fixes: a01854 ("net/gve: fix dqo bug for chained descriptors") > Cc: sta...@dpdk.org > > Signed-off-by: Rushil Gupta > Reviewed-by: Joshua Washington > --- > drivers/net/gve/gve_tx_dqo.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/drivers/net/gve/gve_tx_dqo.c b/drivers/net/gve/gve_tx_dqo.c > index 1a8eb96ea9..30a1455b20 100644 > --- a/drivers/net/gve/gve_tx_dqo.c > +++ b/drivers/net/gve/gve_tx_dqo.c > @@ -116,7 +116,7 @@ gve_tx_burst_dqo(void *tx_queue, struct rte_mbuf > **tx_pkts, uint16_t nb_pkts) > first_sw_id = sw_id; > do { > if (sw_ring[sw_id] != NULL) > - PMD_DRV_LOG(ERR, "Overwriting an entry in > sw_ring"); > + PMD_DRV_LOG(DEBUG, "Overwriting an entry in > sw_ring"); > > txd = &txr[tx_id]; > sw_ring[sw_id] = tx_pkt; Hi Rushil, I will continue with this patch, BUT logging in the datapath has potential problems like this, also it may have performance impact even log is not printed, because of additional check in the log function. For datapath, you can prefer: - Add log macros controlled by RTE_ETHDEV_DEBUG_RX & RTE_ETHDEV_DEBUG_TX flags - Or use RTE_LOG_DP() macro which is compiled out if default log level is less than the log uses Also another perspective for the logs, when end-user observes this bloat of logs, what she can do? Can she change some driver arguments or environment variables to fix the issue, if not what is the point of the log? If this is a condition that can occur dynamically based on received traffic and user doesn't have control on it, maybe driver should handle the error without log? And if this is a log for driver developer, perhaps it should be assert or similar that is disabled in the release build?
RE: [EXTERNAL] Re: [PATCH v6] net/netvsc: fix parsing of VLAN metadata
On 2/16/2024 11:40 AM, Ferruh Yigit: > I missed v6 but put some comment on v5, briefly can you please split the > patch for lib/net and driver? I've tried to split the patch - hopefully got the formatting right - please let me know if not (it's my first time submitting a patch and I don't have all the tooling set up)
[RFC v2 0/5] Lcore variables
This RFC presents a new API for static per-lcore id data allocation. Please refer to the API documentation for both a rationale for this new API, and a comparison to the alternatives available. The adoption of this API would affect many different DPDK modules, but the author updated only a few, mostly to serve as examples in this RFC, and to iron out some, but surely not all, wrinkles in the API. The question on how to best allocate static per-lcore memory has been up several times on the dev mailing list, for example in the thread on "random: use per lcore state" RFC by Stephen Hemminger. Lcore variables are surely not the answer to all your per-lcore-data needs, since it only allows for more-or-less static allocation. In the author's opinion, it does however provide a reasonably simple and clean and seemingly very much performant solution to a real problem. One thing is unclear to the author is how this API relates to potential future per-lcore dynamic allocator (e.g., a per-lcore heap). Contrary to what the version.map edit suggests, this RFC is not meant for a proposal for DPDK 24.03. Mattias Rönnblom (5): eal: add static per-lcore memory allocation facility eal: add lcore variable test suite random: keep PRNG state in lcore variable power: keep per-lcore state in lcore variable service: keep per-lcore state in lcore variable app/test/meson.build | 1 + app/test/test_lcore_var.c | 408 ++ config/rte_config.h | 1 + doc/api/doxy-api-index.md | 1 + lib/eal/common/eal_common_lcore_var.c | 82 ++ lib/eal/common/meson.build| 1 + lib/eal/common/rte_random.c | 30 +- lib/eal/common/rte_service.c | 119 lib/eal/include/meson.build | 1 + lib/eal/include/rte_lcore_var.h | 374 +++ lib/eal/version.map | 4 + lib/power/rte_power_pmd_mgmt.c| 27 +- 12 files changed, 973 insertions(+), 76 deletions(-) create mode 100644 app/test/test_lcore_var.c create mode 100644 lib/eal/common/eal_common_lcore_var.c create mode 100644 lib/eal/include/rte_lcore_var.h -- 2.34.1
[RFC v2 3/5] random: keep PRNG state in lcore variable
Replace keeping PRNG state in a RTE_MAX_LCORE-sized static array of cache-aligned and RTE_CACHE_GUARDed struct instances with keeping the same state in a more cache-friendly lcore variable. Signed-off-by: Mattias Rönnblom --- lib/eal/common/rte_random.c | 30 ++ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/lib/eal/common/rte_random.c b/lib/eal/common/rte_random.c index 7709b8f2c6..af9fffd81b 100644 --- a/lib/eal/common/rte_random.c +++ b/lib/eal/common/rte_random.c @@ -11,6 +11,7 @@ #include #include #include +#include #include struct rte_rand_state { @@ -19,14 +20,12 @@ struct rte_rand_state { uint64_t z3; uint64_t z4; uint64_t z5; - RTE_CACHE_GUARD; -} __rte_cache_aligned; +}; -/* One instance each for every lcore id-equipped thread, and one - * additional instance to be shared by all others threads (i.e., all - * unregistered non-EAL threads). - */ -static struct rte_rand_state rand_states[RTE_MAX_LCORE + 1]; +RTE_LCORE_VAR_HANDLE(struct rte_rand_state, rand_state); + +/* instance to be shared by all unregistered non-EAL threads */ +static struct rte_rand_state unregistered_rand_state __rte_cache_aligned; static uint32_t __rte_rand_lcg32(uint32_t *seed) @@ -85,8 +84,14 @@ rte_srand(uint64_t seed) unsigned int lcore_id; /* add lcore_id to seed to avoid having the same sequence */ - for (lcore_id = 0; lcore_id < RTE_DIM(rand_states); lcore_id++) - __rte_srand_lfsr258(seed + lcore_id, &rand_states[lcore_id]); + for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { + struct rte_rand_state *lcore_state = + RTE_LCORE_VAR_LCORE_PTR(lcore_id, rand_state); + + __rte_srand_lfsr258(seed + lcore_id, lcore_state); + } + + __rte_srand_lfsr258(seed + lcore_id, &unregistered_rand_state); } static __rte_always_inline uint64_t @@ -124,11 +129,10 @@ struct rte_rand_state *__rte_rand_get_state(void) idx = rte_lcore_id(); - /* last instance reserved for unregistered non-EAL threads */ if (unlikely(idx == LCORE_ID_ANY)) - idx = RTE_MAX_LCORE; + return &unregistered_rand_state; - return &rand_states[idx]; + return RTE_LCORE_VAR_PTR(rand_state); } uint64_t @@ -228,6 +232,8 @@ RTE_INIT(rte_rand_init) { uint64_t seed; + RTE_LCORE_VAR_ALLOC(rand_state); + seed = __rte_random_initial_seed(); rte_srand(seed); -- 2.34.1
[RFC v2 4/5] power: keep per-lcore state in lcore variable
Signed-off-by: Mattias Rönnblom --- lib/power/rte_power_pmd_mgmt.c | 27 ++- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/lib/power/rte_power_pmd_mgmt.c b/lib/power/rte_power_pmd_mgmt.c index 591fc69f36..bb20e564de 100644 --- a/lib/power/rte_power_pmd_mgmt.c +++ b/lib/power/rte_power_pmd_mgmt.c @@ -5,6 +5,7 @@ #include #include +#include #include #include #include @@ -68,8 +69,8 @@ struct pmd_core_cfg { /**< Number of queues ready to enter power optimized state */ uint64_t sleep_target; /**< Prevent a queue from triggering sleep multiple times */ -} __rte_cache_aligned; -static struct pmd_core_cfg lcore_cfgs[RTE_MAX_LCORE]; +}; +static RTE_LCORE_VAR_HANDLE(struct pmd_core_cfg, lcore_cfgs); static inline bool queue_equal(const union queue *l, const union queue *r) @@ -252,12 +253,11 @@ clb_multiwait(uint16_t port_id __rte_unused, uint16_t qidx __rte_unused, struct rte_mbuf **pkts __rte_unused, uint16_t nb_rx, uint16_t max_pkts __rte_unused, void *arg) { - const unsigned int lcore = rte_lcore_id(); struct queue_list_entry *queue_conf = arg; struct pmd_core_cfg *lcore_conf; const bool empty = nb_rx == 0; - lcore_conf = &lcore_cfgs[lcore]; + lcore_conf = RTE_LCORE_VAR_PTR(lcore_cfgs); /* early exit */ if (likely(!empty)) @@ -317,13 +317,12 @@ clb_pause(uint16_t port_id __rte_unused, uint16_t qidx __rte_unused, struct rte_mbuf **pkts __rte_unused, uint16_t nb_rx, uint16_t max_pkts __rte_unused, void *arg) { - const unsigned int lcore = rte_lcore_id(); struct queue_list_entry *queue_conf = arg; struct pmd_core_cfg *lcore_conf; const bool empty = nb_rx == 0; uint32_t pause_duration = rte_power_pmd_mgmt_get_pause_duration(); - lcore_conf = &lcore_cfgs[lcore]; + lcore_conf = RTE_LCORE_VAR_PTR(lcore_cfgs); if (likely(!empty)) /* early exit */ @@ -358,9 +357,8 @@ clb_scale_freq(uint16_t port_id __rte_unused, uint16_t qidx __rte_unused, struct rte_mbuf **pkts __rte_unused, uint16_t nb_rx, uint16_t max_pkts __rte_unused, void *arg) { - const unsigned int lcore = rte_lcore_id(); const bool empty = nb_rx == 0; - struct pmd_core_cfg *lcore_conf = &lcore_cfgs[lcore]; + struct pmd_core_cfg *lcore_conf = RTE_LCORE_VAR_PTR(lcore_cfgs); struct queue_list_entry *queue_conf = arg; if (likely(!empty)) { @@ -518,7 +516,7 @@ rte_power_ethdev_pmgmt_queue_enable(unsigned int lcore_id, uint16_t port_id, goto end; } - lcore_cfg = &lcore_cfgs[lcore_id]; + lcore_cfg = RTE_LCORE_VAR_LCORE_PTR(lcore_id, lcore_cfgs); /* check if other queues are stopped as well */ ret = cfg_queues_stopped(lcore_cfg); @@ -619,7 +617,7 @@ rte_power_ethdev_pmgmt_queue_disable(unsigned int lcore_id, } /* no need to check queue id as wrong queue id would not be enabled */ - lcore_cfg = &lcore_cfgs[lcore_id]; + lcore_cfg = RTE_LCORE_VAR_LCORE_PTR(lcore_id, lcore_cfgs); /* check if other queues are stopped as well */ ret = cfg_queues_stopped(lcore_cfg); @@ -772,10 +770,13 @@ RTE_INIT(rte_power_ethdev_pmgmt_init) { size_t i; int j; + RTE_LCORE_VAR_ALLOC(lcore_cfgs); + /* initialize all tailqs */ - for (i = 0; i < RTE_DIM(lcore_cfgs); i++) { - struct pmd_core_cfg *cfg = &lcore_cfgs[i]; - TAILQ_INIT(&cfg->head); + for (i = 0; i < RTE_MAX_LCORE; i++) { + struct pmd_core_cfg *lcore_cfg = + RTE_LCORE_VAR_LCORE_PTR(i, lcore_cfgs); + TAILQ_INIT(&lcore_cfg->head); } /* initialize config defaults */ -- 2.34.1
[RFC v2 2/5] eal: add lcore variable test suite
RFC v2: * Improve alignment-related test coverage. Signed-off-by: Mattias Rönnblom --- app/test/meson.build | 1 + app/test/test_lcore_var.c | 408 ++ 2 files changed, 409 insertions(+) create mode 100644 app/test/test_lcore_var.c diff --git a/app/test/meson.build b/app/test/meson.build index 6389ae83ee..93412cce51 100644 --- a/app/test/meson.build +++ b/app/test/meson.build @@ -101,6 +101,7 @@ source_file_deps = { 'test_ipsec_sad.c': ['ipsec'], 'test_kvargs.c': ['kvargs'], 'test_latencystats.c': ['ethdev', 'latencystats', 'metrics'] + sample_packet_forward_deps, +'test_lcore_var.c': [], 'test_lcores.c': [], 'test_link_bonding.c': ['ethdev', 'net_bond', 'net'] + packet_burst_generator_deps + virtual_pmd_deps, diff --git a/app/test/test_lcore_var.c b/app/test/test_lcore_var.c new file mode 100644 index 00..310d32e10d --- /dev/null +++ b/app/test/test_lcore_var.c @@ -0,0 +1,408 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2024 Ericsson AB + */ + +#include +#include +#include + +#include +#include +#include + +#include "test.h" + +#define MIN_LCORES 2 + +RTE_LCORE_VAR_HANDLE(int, test_int); +RTE_LCORE_VAR_HANDLE(char, test_char); +RTE_LCORE_VAR_HANDLE(long, test_long_sized); +RTE_LCORE_VAR_HANDLE(short, test_short); +RTE_LCORE_VAR_HANDLE(long, test_long_sized_aligned); + +struct int_checker_state { + int old_value; + int new_value; + bool success; +}; + +static bool +rand_bool(void) +{ + return rte_rand() & 1; +} + +static void +rand_blk(void *blk, size_t size) +{ + size_t i; + + for (i = 0; i < size; i++) + ((unsigned char *)blk)[i] = (unsigned char)rte_rand(); +} + +static bool +is_ptr_aligned(const void *ptr, size_t align) +{ + return ptr != NULL ? (uintptr_t)ptr % align == 0 : false; +} + +static int +check_int(void *arg) +{ + struct int_checker_state *state = arg; + + int *ptr = RTE_LCORE_VAR_PTR(test_int); + + bool naturally_aligned = is_ptr_aligned(ptr, sizeof(int)); + + bool equal; + + if (rand_bool()) + equal = RTE_LCORE_VAR_GET(test_int) == state->old_value; + else + equal = *(RTE_LCORE_VAR_PTR(test_int)) == state->old_value; + + state->success = equal && naturally_aligned; + + if (rand_bool()) + RTE_LCORE_VAR_SET(test_int, state->new_value); + else + *ptr = state->new_value; + + return 0; +} + +RTE_LCORE_VAR_INIT(test_int); +RTE_LCORE_VAR_INIT(test_char); +RTE_LCORE_VAR_INIT_SIZE(test_long_sized, 32); +RTE_LCORE_VAR_INIT(test_short); +RTE_LCORE_VAR_INIT_SIZE_ALIGN(test_long_sized_aligned, sizeof(long), + RTE_CACHE_LINE_SIZE); + +static int +test_int_lvar(void) +{ + unsigned int lcore_id; + + struct int_checker_state states[RTE_MAX_LCORE] = {}; + + RTE_LCORE_FOREACH_WORKER(lcore_id) { + struct int_checker_state *state = &states[lcore_id]; + + state->old_value = (int)rte_rand(); + state->new_value = (int)rte_rand(); + + RTE_LCORE_VAR_LCORE_SET(lcore_id, test_int, state->old_value); + } + + RTE_LCORE_FOREACH_WORKER(lcore_id) + rte_eal_remote_launch(check_int, &states[lcore_id], lcore_id); + + rte_eal_mp_wait_lcore(); + + RTE_LCORE_FOREACH_WORKER(lcore_id) { + struct int_checker_state *state = &states[lcore_id]; + + TEST_ASSERT(state->success, "Unexpected value " + "encountered on lcore %d", lcore_id); + + TEST_ASSERT_EQUAL(state->new_value, + RTE_LCORE_VAR_LCORE_GET(lcore_id, test_int), + "Lcore %d failed to update int", lcore_id); + } + + /* take the opportunity to test the foreach macro */ + int *v; + lcore_id = 0; + RTE_LCORE_VAR_FOREACH_VALUE(v, test_int) { + TEST_ASSERT_EQUAL(states[lcore_id].new_value, *v, + "Unexpected value on lcore %d during " + "iteration", lcore_id); + lcore_id++; + } + + return TEST_SUCCESS; +} + +static int +test_sized_alignment(void) +{ + long *v; + + RTE_LCORE_VAR_FOREACH_VALUE(v, test_long_sized) { + TEST_ASSERT(is_ptr_aligned(v, alignof(long)), + "Type-derived alignment failed"); + } + + RTE_LCORE_VAR_FOREACH_VALUE(v, test_long_sized_aligned) { + TEST_ASSERT(is_ptr_aligned(v, RTE_CACHE_LINE_SIZE), + "Explicit alignment failed"); + } + + return TEST_SUCCESS; +} + +/* private, larger, struct */ +#define TEST_STRUCT_DATA_SIZE 1234 + +struct test_struct { + uint8_t data[TEST_STRUCT_DATA_SIZE]; +}; + +static RTE_LCORE_VAR_HANDLE(char, before_struct); +static RTE_LCORE_V
[RFC v2 1/5] eal: add static per-lcore memory allocation facility
Introduce DPDK per-lcore id variables, or lcore variables for short. An lcore variable has one value for every current and future lcore id-equipped thread. The primary use case is for statically allocating small chunks of often-used data, which is related logically, but where there are performance benefits to reap from having updates being local to an lcore. Lcore variables are similar to thread-local storage (TLS, e.g., C11 _Thread_local), but decoupling the values' life time with that of the threads. Lcore variables are also similar in terms of functionality provided by FreeBSD kernel's DPCPU_*() family of macros and the associated build-time machinery. DPCPU uses linker scripts, which effectively prevents the reuse of its, otherwise seemingly viable, approach. The currently-prevailing way to solve the same problem as lcore variables is to keep a module's per-lcore data as RTE_MAX_LCORE-sized array of cache-aligned, RTE_CACHE_GUARDed structs. The benefit of lcore variables over this approach is that data related to the same lcore now is close (spatially, in memory), rather than data used by the same module, which in turn avoid excessive use of padding, polluting caches with unused data. RFC v2: * Use alignof to derive alignment requirements. (Morten Brørup) * Change name of FOREACH to make it distinct from 's *per-EAL-thread* RTE_LCORE_FOREACH(). (Morten Brørup) * Allow user-specified alignment, but limit max to cache line size. Signed-off-by: Mattias Rönnblom --- config/rte_config.h | 1 + doc/api/doxy-api-index.md | 1 + lib/eal/common/eal_common_lcore_var.c | 82 ++ lib/eal/common/meson.build| 1 + lib/eal/include/meson.build | 1 + lib/eal/include/rte_lcore_var.h | 374 ++ lib/eal/version.map | 4 + 7 files changed, 464 insertions(+) create mode 100644 lib/eal/common/eal_common_lcore_var.c create mode 100644 lib/eal/include/rte_lcore_var.h diff --git a/config/rte_config.h b/config/rte_config.h index da265d7dd2..884482e473 100644 --- a/config/rte_config.h +++ b/config/rte_config.h @@ -30,6 +30,7 @@ /* EAL defines */ #define RTE_CACHE_GUARD_LINES 1 #define RTE_MAX_HEAPS 32 +#define RTE_MAX_LCORE_VAR 1048576 #define RTE_MAX_MEMSEG_LISTS 128 #define RTE_MAX_MEMSEG_PER_LIST 8192 #define RTE_MAX_MEM_MB_PER_LIST 32768 diff --git a/doc/api/doxy-api-index.md b/doc/api/doxy-api-index.md index a6a768bd7c..bb06bb7ca1 100644 --- a/doc/api/doxy-api-index.md +++ b/doc/api/doxy-api-index.md @@ -98,6 +98,7 @@ The public API headers are grouped by topics: [interrupts](@ref rte_interrupts.h), [launch](@ref rte_launch.h), [lcore](@ref rte_lcore.h), + [lcore-varible](@ref rte_lcore_var.h), [per-lcore](@ref rte_per_lcore.h), [service cores](@ref rte_service.h), [keepalive](@ref rte_keepalive.h), diff --git a/lib/eal/common/eal_common_lcore_var.c b/lib/eal/common/eal_common_lcore_var.c new file mode 100644 index 00..dfd11cbd0b --- /dev/null +++ b/lib/eal/common/eal_common_lcore_var.c @@ -0,0 +1,82 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2024 Ericsson AB + */ + +#include + +#include +#include +#include + +#include + +#include "eal_private.h" + +#define WARN_THRESHOLD 75 + +/* + * Avoid using offset zero, since it would result in a NULL-value + * "handle" (offset) pointer, which in principle and per the API + * definition shouldn't be an issue, but may confuse some tools and + * users. + */ +#define INITIAL_OFFSET 1 + +char rte_lcore_var[RTE_MAX_LCORE][RTE_MAX_LCORE_VAR] __rte_cache_aligned; + +static uintptr_t allocated = INITIAL_OFFSET; + +static void +verify_allocation(uintptr_t new_allocated) +{ + static bool has_warned; + + RTE_VERIFY(new_allocated < RTE_MAX_LCORE_VAR); + + if (new_allocated > (WARN_THRESHOLD * RTE_MAX_LCORE_VAR) / 100 && + !has_warned) { + EAL_LOG(WARNING, "Per-lcore data usage has exceeded %d%% " + "of the maximum capacity (%d bytes)", WARN_THRESHOLD, + RTE_MAX_LCORE_VAR); + has_warned = true; + } +} + +static void * +lcore_var_alloc(size_t size, size_t align) +{ + uintptr_t new_allocated = RTE_ALIGN_CEIL(allocated, align); + + void *offset = (void *)new_allocated; + + new_allocated += size; + + verify_allocation(new_allocated); + + allocated = new_allocated; + + EAL_LOG(DEBUG, "Allocated %"PRIuPTR" bytes of per-lcore data with a " + "%"PRIuPTR"-byte alignment", size, align); + + return offset; +} + +void * +rte_lcore_var_alloc(size_t size, size_t align) +{ + /* Having the per-lcore buffer size aligned on cache lines +* assures as well as having the base pointer aligned on cache +* size assures that aligned offsets also translate to aligned +* pointers across all values. +*/ + RTE_BUILD_BUG_ON(RTE_MAX_LCORE_
[RFC v2 5/5] service: keep per-lcore state in lcore variable
Signed-off-by: Mattias Rönnblom --- lib/eal/common/rte_service.c | 119 --- 1 file changed, 68 insertions(+), 51 deletions(-) diff --git a/lib/eal/common/rte_service.c b/lib/eal/common/rte_service.c index d959c91459..de205c5da5 100644 --- a/lib/eal/common/rte_service.c +++ b/lib/eal/common/rte_service.c @@ -11,6 +11,7 @@ #include #include +#include #include #include #include @@ -75,7 +76,7 @@ struct core_state { static uint32_t rte_service_count; static struct rte_service_spec_impl *rte_services; -static struct core_state *lcore_states; +static RTE_LCORE_VAR_HANDLE(struct core_state, lcore_states); static uint32_t rte_service_library_initialized; int32_t @@ -101,11 +102,12 @@ rte_service_init(void) goto fail_mem; } - lcore_states = rte_calloc("rte_service_core_states", RTE_MAX_LCORE, - sizeof(struct core_state), RTE_CACHE_LINE_SIZE); - if (!lcore_states) { - EAL_LOG(ERR, "error allocating core states array"); - goto fail_mem; + if (lcore_states == NULL) + RTE_LCORE_VAR_ALLOC(lcore_states); + else { + struct core_state *cs; + RTE_LCORE_VAR_FOREACH_VALUE(cs, lcore_states) + memset(cs, 0, sizeof(struct core_state)); } int i; @@ -122,7 +124,6 @@ rte_service_init(void) return 0; fail_mem: rte_free(rte_services); - rte_free(lcore_states); return -ENOMEM; } @@ -136,7 +137,6 @@ rte_service_finalize(void) rte_eal_mp_wait_lcore(); rte_free(rte_services); - rte_free(lcore_states); rte_service_library_initialized = 0; } @@ -286,7 +286,6 @@ rte_service_component_register(const struct rte_service_spec *spec, int32_t rte_service_component_unregister(uint32_t id) { - uint32_t i; struct rte_service_spec_impl *s; SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL); @@ -294,9 +293,10 @@ rte_service_component_unregister(uint32_t id) s->internal_flags &= ~(SERVICE_F_REGISTERED); + struct core_state *cs; /* clear the run-bit in all cores */ - for (i = 0; i < RTE_MAX_LCORE; i++) - lcore_states[i].service_mask &= ~(UINT64_C(1) << id); + RTE_LCORE_VAR_FOREACH_VALUE(cs, lcore_states) + cs->service_mask &= ~(UINT64_C(1) << id); memset(&rte_services[id], 0, sizeof(struct rte_service_spec_impl)); @@ -454,7 +454,10 @@ rte_service_may_be_active(uint32_t id) return -EINVAL; for (i = 0; i < lcore_count; i++) { - if (lcore_states[ids[i]].service_active_on_lcore[id]) + struct core_state *cs = + RTE_LCORE_VAR_LCORE_PTR(ids[i], lcore_states); + + if (cs->service_active_on_lcore[id]) return 1; } @@ -464,7 +467,7 @@ rte_service_may_be_active(uint32_t id) int32_t rte_service_run_iter_on_app_lcore(uint32_t id, uint32_t serialize_mt_unsafe) { - struct core_state *cs = &lcore_states[rte_lcore_id()]; + struct core_state *cs = RTE_LCORE_VAR_PTR(lcore_states); struct rte_service_spec_impl *s; SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL); @@ -486,8 +489,7 @@ service_runner_func(void *arg) { RTE_SET_USED(arg); uint8_t i; - const int lcore = rte_lcore_id(); - struct core_state *cs = &lcore_states[lcore]; + struct core_state *cs = RTE_LCORE_VAR_PTR(lcore_states); rte_atomic_store_explicit(&cs->thread_active, 1, rte_memory_order_seq_cst); @@ -533,13 +535,16 @@ service_runner_func(void *arg) int32_t rte_service_lcore_may_be_active(uint32_t lcore) { - if (lcore >= RTE_MAX_LCORE || !lcore_states[lcore].is_service_core) + struct core_state *cs = + RTE_LCORE_VAR_LCORE_PTR(lcore, lcore_states); + + if (lcore >= RTE_MAX_LCORE || !cs->is_service_core) return -EINVAL; /* Load thread_active using ACQUIRE to avoid instructions dependent on * the result being re-ordered before this load completes. */ - return rte_atomic_load_explicit(&lcore_states[lcore].thread_active, + return rte_atomic_load_explicit(&cs->thread_active, rte_memory_order_acquire); } @@ -547,9 +552,11 @@ int32_t rte_service_lcore_count(void) { int32_t count = 0; - uint32_t i; - for (i = 0; i < RTE_MAX_LCORE; i++) - count += lcore_states[i].is_service_core; + + struct core_state *cs; + RTE_LCORE_VAR_FOREACH_VALUE(cs, lcore_states) + count += cs->is_service_core; + return count; } @@ -566,7 +573,8 @@ rte_service_lcore_list(uint32_t array[], uint32_t n) uint32_t i; uint32_t idx = 0; for (i = 0; i < RTE_MAX_LCORE; i++) { - struct core_state *cs = &lcore_states[i]; +
Re: [PATCH v8 0/2] add telemetry cmds for ring
> Jie Hai (2): > ring: add telemetry cmd to list rings > ring: add telemetry cmd for ring info Applied, thanks.
[PATCH v12] net/iavf: add diagnostic support in TX path
Implemented a Tx wrapper to perform a thorough check on mbufs, categorizing and counting invalid cases by types for diagnostic purposes. The count of invalid cases is accessible through xstats_get. Also, the devarg option "mbuf_check" was introduced to configure the diagnostic parameters to enable the appropriate diagnostic features. supported cases: mbuf, size, segment, offload. 1. mbuf: check for corrupted mbuf. 2. size: check min/max packet length according to hw spec. 3. segment: check number of mbuf segments not exceed hw limitation. 4. offload: check any unsupported offload flag. parameter format: "mbuf_check=" or "mbuf_check=[,]" eg: dpdk-testpmd -a :81:01.0,mbuf_check=[mbuf,size] -- -i Signed-off-by: Mingjin Ye --- v2: Remove call chain. --- v3: Optimisation implementation. --- v4: Fix Windows os compilation error. --- v5: Split Patch. --- v6: remove strict. --- v9: Modify the description document. --- v10: Modify vf rst document. --- v11: modify comment log. --- v12: Fix buggy logs and add necessary notes. --- doc/guides/nics/intel_vf.rst | 11 drivers/net/iavf/iavf.h| 11 drivers/net/iavf/iavf_ethdev.c | 79 +++ drivers/net/iavf/iavf_rxtx.c | 98 ++ drivers/net/iavf/iavf_rxtx.h | 2 + 5 files changed, 201 insertions(+) diff --git a/doc/guides/nics/intel_vf.rst b/doc/guides/nics/intel_vf.rst index ce96c2e1f8..b84ea214d4 100644 --- a/doc/guides/nics/intel_vf.rst +++ b/doc/guides/nics/intel_vf.rst @@ -111,6 +111,17 @@ For more detail on SR-IOV, please refer to the following documents: by setting the ``devargs`` parameter like ``-a 18:01.0,no-poll-on-link-down=1`` when IAVF is backed by an Intel\ |reg| E810 device or an Intel\ |reg| 700 Series Ethernet device. +When IAVF is backed by an Intel?? E810 device or an Intel?? 700 Series Ethernet device. +Set the ``devargs`` parameter ``mbuf_check`` to enable TX diagnostics. For example, +``-a 18:01.0,mbuf_check=`` or ``-a 18:01.0,mbuf_check=[,...]``. Also, +``xstats_get`` can be used to get the error counts, which are collected in ``tx_mbuf_error_packets`` +xstats. For example, ``testpmd> show port xstats all``. Supported cases: + +* mbuf: Check for corrupted mbuf. +* size: Check min/max packet length according to hw spec. +* segment: Check number of mbuf segments not exceed hw limitation. +* offload: Check any unsupported offload flag. + The PCIE host-interface of Intel Ethernet Switch FM1 Series VF infrastructure ^ diff --git a/drivers/net/iavf/iavf.h b/drivers/net/iavf/iavf.h index ab24cb02c3..824ae4aa02 100644 --- a/drivers/net/iavf/iavf.h +++ b/drivers/net/iavf/iavf.h @@ -114,9 +114,14 @@ struct iavf_ipsec_crypto_stats { } ierrors; }; +struct iavf_mbuf_stats { + uint64_t tx_pkt_errors; +}; + struct iavf_eth_xstats { struct virtchnl_eth_stats eth_stats; struct iavf_ipsec_crypto_stats ips_stats; + struct iavf_mbuf_stats mbuf_stats; }; /* Structure that defines a VSI, associated with a adapter. */ @@ -310,6 +315,7 @@ struct iavf_devargs { uint32_t watchdog_period; int auto_reset; int no_poll_on_link_down; + uint64_t mbuf_check; }; struct iavf_security_ctx; @@ -353,6 +359,11 @@ enum iavf_tx_burst_type { IAVF_TX_AVX512_CTX_OFFLOAD, }; +#define IAVF_MBUF_CHECK_F_TX_MBUF(1ULL << 0) +#define IAVF_MBUF_CHECK_F_TX_SIZE(1ULL << 1) +#define IAVF_MBUF_CHECK_F_TX_SEGMENT (1ULL << 2) +#define IAVF_MBUF_CHECK_F_TX_OFFLOAD (1ULL << 3) + /* Structure to store private data for each VF instance. */ struct iavf_adapter { struct iavf_hw hw; diff --git a/drivers/net/iavf/iavf_ethdev.c b/drivers/net/iavf/iavf_ethdev.c index 1fb876e827..3fb255d748 100644 --- a/drivers/net/iavf/iavf_ethdev.c +++ b/drivers/net/iavf/iavf_ethdev.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -39,6 +40,7 @@ #define IAVF_RESET_WATCHDOG_ARG"watchdog_period" #define IAVF_ENABLE_AUTO_RESET_ARG "auto_reset" #define IAVF_NO_POLL_ON_LINK_DOWN_ARG "no-poll-on-link-down" +#define IAVF_MBUF_CHECK_ARG "mbuf_check" uint64_t iavf_timestamp_dynflag; int iavf_timestamp_dynfield_offset = -1; int rte_pmd_iavf_tx_lldp_dynfield_offset = -1; @@ -49,6 +51,7 @@ static const char * const iavf_valid_args[] = { IAVF_RESET_WATCHDOG_ARG, IAVF_ENABLE_AUTO_RESET_ARG, IAVF_NO_POLL_ON_LINK_DOWN_ARG, + IAVF_MBUF_CHECK_ARG, NULL }; @@ -175,6 +178,7 @@ static const struct rte_iavf_xstats_name_off rte_iavf_stats_strings[] = { {"tx_broadcast_packets", _OFF_OF(eth_stats.tx_broadcast)}, {"tx_dropped_packets", _OFF_OF(eth_stats.tx_discards)}, {"tx_error_packets", _OFF_OF(eth_stats.tx_errors)}, + {"tx_mbuf_error_packets", _OFF_OF(mbuf_stats.t
Re: [PATCH] net/gve: Change ERR to DEBUG to prevent flooding of logs for Tx-Dqo.
On 2/19/2024 2:44 AM, Rushil Gupta wrote: > This was causing failure for testpmd runs (for queues >=15) > presumably due to flooding of logs due to descriptor ring being > overwritten. > > Fixes: a01854 ("net/gve: fix dqo bug for chained descriptors") > Cc: sta...@dpdk.org > > Signed-off-by: Rushil Gupta > Reviewed-by: Joshua Washington > Squashed into relevant commit in next-net, thanks.
Re: [PATCH] net/nfp: add support of UDP fragmentation offload
On Sun, Feb 18, 2024 at 11:05:35AM +0100, Morten Brørup wrote: > > From: Stephen Hemminger [mailto:step...@networkplumber.org] > > Sent: Saturday, 17 February 2024 19.11 > > > > On Sat, 17 Feb 2024 19:02:30 +0100 > > Morten Brørup wrote: > > > > > Not formally... it follows the official DPDK Coding Style [1]. > > > > > > [1]: > > https://doc.dpdk.org/guides/contributing/coding_style.html#general > > > > > > > Should be: > > > > > > > > if ((ol_flags & RTE_MBUF_F_TX_TCP_SEG) == 0 && > > > > (ol_flags & RTE_MBUF_F_TX_UDP_SEG) == 0) > > > > goto clean_txd; > > > > > > This indentation style is mentioned as an alternative in the guide. > > But the example in the guide also uses two tabs for a similar long > > comparison. > > > > > > Personally, I also prefer the style suggested by Stephen, so we might > > want to update the Coding Style. ;-) > > > > > > The two tabs is an Intel thing, and I prefer the kernel, line up the > > conditional style. > > I prefer 4 space indentation, which is sufficient to notice the indentation. > 8 spaces seems overkill to me, and quickly makes the lines too long. > With the editor configured to show tab as 4 spaces, the kernel alignment > style ends up with the same indentation for the condition and the code block: > > if (a && > b) > ctr++; > > Whereas with the "tab as 4 spaces" editor configuration, the double > indentation style clearly separates the continued condition from code block: > > if (a && > b) > ctr++; > These two above are the main reasons I much prefer the double indent on continuation, though I'd also add a third one: it means we don't have a mix of tabs and spaces for indentation. However, as stated already indent can be a matter of taste, and there will be some disagreement about it. The existing coding standards document what was done in the code base when they were written, and I don't think we should look to change them. It's a bit annoying having 2 standards for continuation rather than 1, but it's not exactly a free-for-all, and it's not something that applies to every line, only to a small subset. > On the other hand, complex conditions are easier readable when aligning > logically instead of by a fixed number of tabs, e.g.: > > if (a | > (b & > (c ^ d)) | > (e ^ f) | > g) > ctr++; > Apart from the alignment of the increment at the end, yes, I admit it is a little more readable. However, I also think that it's still pretty complex even with the helpers! > Placing the operators at the beginning also makes the code more readable: > > if (a > | (b >& (c ^ d)) > | (e ^ f) > | g) > ctr++; > +1 to this. I think having operators at the beginning of lines is good. It also makes it visually clearer that the indent is for line continuation. > I guess that coding styles are mostly a matter of taste. > > I wonder if any research into coding styles has reached any conclusions or > recommendations, beyond mixing coding styles is bad for readability. > > > We really should have a style that can be describe by clang format. > > Other projects like VPP have a target that reformats the code and uses > > one of the clang format templates. > > Automated code formatting seems like a good idea. > Yep. The trouble is that, if you don't do this from the start, the deltas will be massive, and confusing in our git history. /Bruce
Re: [PATCH] net/nfp: add support of UDP fragmentation offload
On 2/19/2024 10:26 AM, Bruce Richardson wrote: > On Sun, Feb 18, 2024 at 11:05:35AM +0100, Morten Brørup wrote: >>> From: Stephen Hemminger [mailto:step...@networkplumber.org] >>> Sent: Saturday, 17 February 2024 19.11 >>> >>> On Sat, 17 Feb 2024 19:02:30 +0100 >>> Morten Brørup wrote: >>> Not formally... it follows the official DPDK Coding Style [1]. [1]: >>> https://doc.dpdk.org/guides/contributing/coding_style.html#general > Should be: > > if ((ol_flags & RTE_MBUF_F_TX_TCP_SEG) == 0 && > (ol_flags & RTE_MBUF_F_TX_UDP_SEG) == 0) > goto clean_txd; This indentation style is mentioned as an alternative in the guide. >>> But the example in the guide also uses two tabs for a similar long >>> comparison. Personally, I also prefer the style suggested by Stephen, so we might >>> want to update the Coding Style. ;-) >>> >>> >>> The two tabs is an Intel thing, and I prefer the kernel, line up the >>> conditional style. >> >> I prefer 4 space indentation, which is sufficient to notice the indentation. >> 8 spaces seems overkill to me, and quickly makes the lines too long. >> With the editor configured to show tab as 4 spaces, the kernel alignment >> style ends up with the same indentation for the condition and the code block: >> >> if (a && >> b) >> ctr++; >> >> Whereas with the "tab as 4 spaces" editor configuration, the double >> indentation style clearly separates the continued condition from code block: >> >> if (a && >> b) >> ctr++; >> > > These two above are the main reasons I much prefer the double indent on > continuation, though I'd also add a third one: it means we don't have a mix > of tabs and spaces for indentation. > > However, as stated already indent can be a matter of taste, and there will > be some disagreement about it. The existing coding standards document what > was done in the code base when they were written, and I don't think we > should look to change them. It's a bit annoying having 2 standards for > continuation rather than 1, but it's not exactly a free-for-all, and it's > not something that applies to every line, only to a small subset. > ++1
Re: [PATCH v2 5/5] vhost: enhance virtqueue access lock asserts
05/12/2023 10:45, David Marchand: > +#define VHOST_USER_ASSERT_LOCK(dev, vq, id) do { \ > + RTE_BUILD_BUG_ON(!vhost_message_handlers[id].lock_all_qps); \ > + vq_assert_lock(dev, vq); \ > +} while (0) Since "eal: enhance compile-time checks using C11 assert", it is not allowed to have non-constant check in RTE_BUILD_BUG_ON: lib/vhost/vhost_user.c:413:25: note: in expansion of macro 'VHOST_USER_ASSERT_LOCK' lib/vhost/vhost_user.c: In function 'vhost_user_set_vring_addr': lib/eal/include/rte_common.h:518:56: error: expression in static assertion is not constant #define RTE_BUILD_BUG_ON(condition) do { static_assert(!(condition), #condition); } while (0) I suppose we can make this check at compile-time with few adjustments. For -rc1, I am dropping this patch.
RE: [RFC 1/5] eal: add static per-lcore memory allocation facility
> From: Mattias Rönnblom [mailto:hof...@lysator.liu.se] > Sent: Monday, 19 February 2024 08.49 > > On 2024-02-09 14:04, Morten Brørup wrote: > >> From: Mattias Rönnblom [mailto:hof...@lysator.liu.se] > >> Sent: Friday, 9 February 2024 12.46 > >> > >> On 2024-02-09 09:25, Morten Brørup wrote: > From: Mattias Rönnblom [mailto:mattias.ronnb...@ericsson.com] > Sent: Thursday, 8 February 2024 19.17 > > Introduce DPDK per-lcore id variables, or lcore variables for > short. > > An lcore variable has one value for every current and future lcore > id-equipped thread. > > The primary use case is for statically > allocating > small chunks of often-used data, which is related logically, but > >> where > there are performance benefits to reap from having updates being > >> local > to an lcore. > > Lcore variables are similar to thread-local storage (TLS, e.g., > C11 > _Thread_local), but decoupling the values' life time with that of > >> the > threads. > > Lcore variables are also similar in terms of functionality > provided > >> by > FreeBSD kernel's DPCPU_*() family of macros and the associated > build-time machinery. DPCPU uses linker scripts, which effectively > prevents the reuse of its, otherwise seemingly viable, approach. > > The currently-prevailing way to solve the same problem as lcore > variables is to keep a module's per-lcore data as RTE_MAX_LCORE- > >> sized > array of cache-aligned, RTE_CACHE_GUARDed structs. The benefit of > lcore variables over this approach is that data related to the > same > lcore now is close (spatially, in memory), rather than data used > by > the same module, which in turn avoid excessive use of padding, > polluting caches with unused data. > > Signed-off-by: Mattias Rönnblom > --- > >>> > >>> This looks very promising. :-) > >>> > >>> Here's a bunch of comments, questions and suggestions. > >>> > >>> > >>> * Question: Performance. > >>> What is the cost of accessing an lcore variable vs a variable in > TLS? > >>> I suppose the relative cost diminishes if the variable is a larger > >> struct, compared to a simple uint64_t. > >>> > >> > >> In case all the relevant data is available in a cache close to the > >> core, > >> both options carry quite low overhead. > >> > >> Accessing a lcore variable will always require a TLS lookup, in the > >> form > >> of retrieving the lcore_id of the current thread. In that sense, > there > >> will likely be a number of extra instructions required to do the > lcore > >> variable address lookup (i.e., doing the load from rte_lcore_var > table > >> based on the lcore_id you just looked up, and adding the variable's > >> offset). > >> > >> A TLS lookup will incur an extra overhead of less than a clock > cycle, > >> compared to accessing a non-TLS static variable, in case static > linking > >> is used. For shared objects, TLS is much more expensive (something > >> often > >> visible in dynamically linked DPDK app flame graphs, in the form of > the > >> __tls_addr symbol). Then you need to add ~3 cc/access. This on a > micro > >> benchmark running on a x86_64 Raptor Lake P-core. > >> > >> (To visialize the difference between shared object and not, one can > use > >> Compiler Explorer and -fPIC versus -fPIE.) > >> > >> Things get more complicated if you access the same variable in the > same > >> section code, since then it can be left on the stack/in a register > by > >> the compiler, especially if LTO is used. In other words, if you do > >> rte_lcore_id() several times in a row, only the first one will cost > you > >> anything. This happens fairly often in DPDK, with rte_lcore_id(). > >> > >> Finally, if you do something like > >> > >> diff --git a/lib/eal/common/rte_random.c > b/lib/eal/common/rte_random.c > >> index af9fffd81b..a65c30d27e 100644 > >> --- a/lib/eal/common/rte_random.c > >> +++ b/lib/eal/common/rte_random.c > >> @@ -125,14 +125,7 @@ __rte_rand_lfsr258(struct rte_rand_state > *state) > >>static __rte_always_inline > >>struct rte_rand_state *__rte_rand_get_state(void) > >>{ > >> - unsigned int idx; > >> - > >> - idx = rte_lcore_id(); > >> - > >> - if (unlikely(idx == LCORE_ID_ANY)) > >> - return &unregistered_rand_state; > >> - > >> - return RTE_LCORE_VAR_PTR(rand_state); > >> + return &unregistered_rand_state; > >>} > >> > >>uint64_t > >> > >> ...and re-run the rand_perf_autotest, at least I see no difference > at > >> all (in a statically linked build). Both results in rte_rand() using > >> ~11 > >> cc/call. What that suggests is that TLS overhead is very small, and > >> that > >> any extra instructions required by lcore variables doesn't add much, > if > >> anything at all, at least in this particular case. > > > > Excellent. Thank you for a thorough and detailed answer, Mattias. > > > >> > >>> Some of my sugg
Re: [PATCH v7 1/2] lib/net: fix parsing of VLAN metadata
On 2/19/2024 9:31 AM, Alan Elder wrote: > Add common macros for extracting parts of VLAN tag. > > Fixes: 4e9c73e96e83 ("net/netvsc: add Hyper-V network device") > Cc: sta...@dpdk.org > > Signed-off-by: Alan Elder > Reviewed-by: Ferruh Yigit
Re: [PATCH v7 2/2] net/netvsc: fix parsing of VLAN metadata
On 2/19/2024 9:31 AM, Alan Elder wrote: > The previous code incorrectly parsed the VLAN ID and priority. > If the 16-bits of VLAN ID and priority/CFI on the wire was > 0123456789ABCDEF the code parsed it as 456789ABCDEF3012. There > were macros defined to handle this conversion but they were not > used. > > Fixes: 4e9c73e96e83 ("net/netvsc: add Hyper-V network device") > Cc: sta...@dpdk.org > > Signed-off-by: Alan Elder > Acked-by: Ferruh Yigit
Re: [PATCH v7 1/2] lib/net: fix parsing of VLAN metadata
On 2/19/2024 11:12 AM, Ferruh Yigit wrote: > On 2/19/2024 9:31 AM, Alan Elder wrote: >> Add common macros for extracting parts of VLAN tag. >> >> Fixes: 4e9c73e96e83 ("net/netvsc: add Hyper-V network device") >> Cc: sta...@dpdk.org >> >> Signed-off-by: Alan Elder >> > > Reviewed-by: Ferruh Yigit > Series applied to dpdk-next-net/main, thanks. Updated patch title as below while merging: net: add helper macros for VLAN metadata parsing Also kept fixes and stable tag, although patch itself is not fix, to request backporting the patch and highlight the reason of the request.
RE: [RFC v2 3/5] random: keep PRNG state in lcore variable
> From: Mattias Rönnblom [mailto:mattias.ronnb...@ericsson.com] > Sent: Monday, 19 February 2024 10.41 > > Replace keeping PRNG state in a RTE_MAX_LCORE-sized static array of > cache-aligned and RTE_CACHE_GUARDed struct instances with keeping the > same state in a more cache-friendly lcore variable. > > Signed-off-by: Mattias Rönnblom > --- [...] > @@ -19,14 +20,12 @@ struct rte_rand_state { > uint64_t z3; > uint64_t z4; > uint64_t z5; > - RTE_CACHE_GUARD; > -} __rte_cache_aligned; > +}; > > -/* One instance each for every lcore id-equipped thread, and one > - * additional instance to be shared by all others threads (i.e., all > - * unregistered non-EAL threads). > - */ > -static struct rte_rand_state rand_states[RTE_MAX_LCORE + 1]; > +RTE_LCORE_VAR_HANDLE(struct rte_rand_state, rand_state); > + > +/* instance to be shared by all unregistered non-EAL threads */ > +static struct rte_rand_state unregistered_rand_state > __rte_cache_aligned; The unregistered_rand_state instance is still __rte_cache_aligned; consider also adding an RTE_CACHE_GUARD to it.
Re: [PATCH] net/nfp: add support of UDP fragmentation offload
On 2/17/2024 4:47 PM, Stephen Hemminger wrote: > On Sat, 17 Feb 2024 09:54:10 +0800 > Chaoyong He wrote: > >> Add the support of UDP fragmentation offload feature. >> >> Signed-off-by: Chaoyong He >> Reviewed-by: Long Wu >> Reviewed-by: Peng Zhang >> --- >> drivers/common/nfp/nfp_common_ctrl.h | 1 + >> drivers/net/nfp/nfd3/nfp_nfd3_dp.c | 7 ++- >> drivers/net/nfp/nfdk/nfp_nfdk_dp.c | 8 +--- >> drivers/net/nfp/nfp_net_common.c | 8 ++-- >> 4 files changed, 18 insertions(+), 6 deletions(-) > Looks like this depends on earlier patch, it does not apply directly to main > branch. > It is a drivers/net patch, so should be on top of next-net, and applies clean to next-net. CI also detects sub-tree correctly and able to apply and test the patch.
Re: [PATCH] net/nfp: add support of UDP fragmentation offload
On 2/17/2024 1:54 AM, Chaoyong He wrote: > Add the support of UDP fragmentation offload feature. > > Signed-off-by: Chaoyong He > Reviewed-by: Long Wu > Reviewed-by: Peng Zhang <...> > diff --git a/drivers/net/nfp/nfp_net_common.c > b/drivers/net/nfp/nfp_net_common.c > index 72c9a41b00..99c319eb2d 100644 > --- a/drivers/net/nfp/nfp_net_common.c > +++ b/drivers/net/nfp/nfp_net_common.c > @@ -312,7 +312,7 @@ nfp_net_log_device_information(const struct nfp_net_hw > *hw) > hw->ver.major, hw->ver.minor, hw->max_mtu); > > PMD_INIT_LOG(INFO, "CAP: %#x", cap); > - PMD_INIT_LOG(INFO, > "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", > + PMD_INIT_LOG(INFO, > "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", > This seems getting out of hand, I assume this is done like this (instead of multiple print lines) to prevent line break, if so what do you think add a new macro that doesn't append \n automatically and refactor this code (in a separate patch) ?
Re: [RFC v2 3/5] random: keep PRNG state in lcore variable
On 2024-02-19 12:22, Morten Brørup wrote: From: Mattias Rönnblom [mailto:mattias.ronnb...@ericsson.com] Sent: Monday, 19 February 2024 10.41 Replace keeping PRNG state in a RTE_MAX_LCORE-sized static array of cache-aligned and RTE_CACHE_GUARDed struct instances with keeping the same state in a more cache-friendly lcore variable. Signed-off-by: Mattias Rönnblom --- [...] @@ -19,14 +20,12 @@ struct rte_rand_state { uint64_t z3; uint64_t z4; uint64_t z5; - RTE_CACHE_GUARD; -} __rte_cache_aligned; +}; -/* One instance each for every lcore id-equipped thread, and one - * additional instance to be shared by all others threads (i.e., all - * unregistered non-EAL threads). - */ -static struct rte_rand_state rand_states[RTE_MAX_LCORE + 1]; +RTE_LCORE_VAR_HANDLE(struct rte_rand_state, rand_state); + +/* instance to be shared by all unregistered non-EAL threads */ +static struct rte_rand_state unregistered_rand_state __rte_cache_aligned; The unregistered_rand_state instance is still __rte_cache_aligned; consider also adding an RTE_CACHE_GUARD to it. It shouldn't be cache-line aligned. I'll remove it. Thanks.
Re: [PATCH] net/nfp: add support of UDP fragmentation offload
On 2/17/2024 1:54 AM, Chaoyong He wrote: > Add the support of UDP fragmentation offload feature. > > Signed-off-by: Chaoyong He > Reviewed-by: Long Wu > Reviewed-by: Peng Zhang > Applied to dpdk-next-net/main, thanks.
Re: [RFC 1/5] eal: add static per-lcore memory allocation facility
On 2024-02-19 12:10, Morten Brørup wrote: From: Mattias Rönnblom [mailto:hof...@lysator.liu.se] Sent: Monday, 19 February 2024 08.49 On 2024-02-09 14:04, Morten Brørup wrote: From: Mattias Rönnblom [mailto:hof...@lysator.liu.se] Sent: Friday, 9 February 2024 12.46 On 2024-02-09 09:25, Morten Brørup wrote: From: Mattias Rönnblom [mailto:mattias.ronnb...@ericsson.com] Sent: Thursday, 8 February 2024 19.17 Introduce DPDK per-lcore id variables, or lcore variables for short. An lcore variable has one value for every current and future lcore id-equipped thread. The primary use case is for statically allocating small chunks of often-used data, which is related logically, but where there are performance benefits to reap from having updates being local to an lcore. Lcore variables are similar to thread-local storage (TLS, e.g., C11 _Thread_local), but decoupling the values' life time with that of the threads. Lcore variables are also similar in terms of functionality provided by FreeBSD kernel's DPCPU_*() family of macros and the associated build-time machinery. DPCPU uses linker scripts, which effectively prevents the reuse of its, otherwise seemingly viable, approach. The currently-prevailing way to solve the same problem as lcore variables is to keep a module's per-lcore data as RTE_MAX_LCORE- sized array of cache-aligned, RTE_CACHE_GUARDed structs. The benefit of lcore variables over this approach is that data related to the same lcore now is close (spatially, in memory), rather than data used by the same module, which in turn avoid excessive use of padding, polluting caches with unused data. Signed-off-by: Mattias Rönnblom --- This looks very promising. :-) Here's a bunch of comments, questions and suggestions. * Question: Performance. What is the cost of accessing an lcore variable vs a variable in TLS? I suppose the relative cost diminishes if the variable is a larger struct, compared to a simple uint64_t. In case all the relevant data is available in a cache close to the core, both options carry quite low overhead. Accessing a lcore variable will always require a TLS lookup, in the form of retrieving the lcore_id of the current thread. In that sense, there will likely be a number of extra instructions required to do the lcore variable address lookup (i.e., doing the load from rte_lcore_var table based on the lcore_id you just looked up, and adding the variable's offset). A TLS lookup will incur an extra overhead of less than a clock cycle, compared to accessing a non-TLS static variable, in case static linking is used. For shared objects, TLS is much more expensive (something often visible in dynamically linked DPDK app flame graphs, in the form of the __tls_addr symbol). Then you need to add ~3 cc/access. This on a micro benchmark running on a x86_64 Raptor Lake P-core. (To visialize the difference between shared object and not, one can use Compiler Explorer and -fPIC versus -fPIE.) Things get more complicated if you access the same variable in the same section code, since then it can be left on the stack/in a register by the compiler, especially if LTO is used. In other words, if you do rte_lcore_id() several times in a row, only the first one will cost you anything. This happens fairly often in DPDK, with rte_lcore_id(). Finally, if you do something like diff --git a/lib/eal/common/rte_random.c b/lib/eal/common/rte_random.c index af9fffd81b..a65c30d27e 100644 --- a/lib/eal/common/rte_random.c +++ b/lib/eal/common/rte_random.c @@ -125,14 +125,7 @@ __rte_rand_lfsr258(struct rte_rand_state *state) static __rte_always_inline struct rte_rand_state *__rte_rand_get_state(void) { - unsigned int idx; - - idx = rte_lcore_id(); - - if (unlikely(idx == LCORE_ID_ANY)) - return &unregistered_rand_state; - - return RTE_LCORE_VAR_PTR(rand_state); + return &unregistered_rand_state; } uint64_t ...and re-run the rand_perf_autotest, at least I see no difference at all (in a statically linked build). Both results in rte_rand() using ~11 cc/call. What that suggests is that TLS overhead is very small, and that any extra instructions required by lcore variables doesn't add much, if anything at all, at least in this particular case. Excellent. Thank you for a thorough and detailed answer, Mattias. Some of my suggestions below might also affect performance. * Advantage: Provides direct access to worker thread variables. With the current alternative (thread-local storage), the main thread cannot access the TLS variables of the worker threads, unless worker threads publish global access pointers. Lcore variables of any lcore thread can be direcctly accessed by any thread, which simplifies code. * Advantage: Roadmap towards hugemem. It would be nice if the lcore variable memory was allocated in hugemem, to reduce TLB misses. The current alternative (thread-local stor
RE: [RFC 1/5] eal: add static per-lcore memory allocation facility
> From: Mattias Rönnblom [mailto:hof...@lysator.liu.se] > Sent: Monday, 19 February 2024 15.32 > > On 2024-02-19 12:10, Morten Brørup wrote: > >> From: Mattias Rönnblom [mailto:hof...@lysator.liu.se] > >> Sent: Monday, 19 February 2024 08.49 > >> > >> On 2024-02-09 14:04, Morten Brørup wrote: > From: Mattias Rönnblom [mailto:hof...@lysator.liu.se] > Sent: Friday, 9 February 2024 12.46 > > On 2024-02-09 09:25, Morten Brørup wrote: > >> From: Mattias Rönnblom [mailto:mattias.ronnb...@ericsson.com] > >> Sent: Thursday, 8 February 2024 19.17 > >> > >> Introduce DPDK per-lcore id variables, or lcore variables for > >> short. > >> > >> An lcore variable has one value for every current and future > lcore > >> id-equipped thread. > >> > >> The primary use case is for statically > >> allocating > >> small chunks of often-used data, which is related logically, but > where > >> there are performance benefits to reap from having updates being > local > >> to an lcore. > >> > >> Lcore variables are similar to thread-local storage (TLS, e.g., > >> C11 > >> _Thread_local), but decoupling the values' life time with that > of > the > >> threads. > >> > >> Lcore variables are also similar in terms of functionality > >> provided > by > >> FreeBSD kernel's DPCPU_*() family of macros and the associated > >> build-time machinery. DPCPU uses linker scripts, which > effectively > >> prevents the reuse of its, otherwise seemingly viable, approach. > >> > >> The currently-prevailing way to solve the same problem as lcore > >> variables is to keep a module's per-lcore data as RTE_MAX_LCORE- > sized > >> array of cache-aligned, RTE_CACHE_GUARDed structs. The benefit > of > >> lcore variables over this approach is that data related to the > >> same > >> lcore now is close (spatially, in memory), rather than data used > >> by > >> the same module, which in turn avoid excessive use of padding, > >> polluting caches with unused data. > >> > >> Signed-off-by: Mattias Rönnblom > >> --- [...] > > Ups... wrong reference! I meant to refer to _lcore_id, not > _thread_id. Correction: > > > > OK. I subconsciously ignored this mistake, and read it as "_lcore_id". :-) [...] > >> For DPDK modules using lcore variables and which treat unregistered > >> threads as "full citizens", I expect special handling of > unregistered > >> threads to be the norm. Take rte_random.h as an example. Current API > >> does not guarantee MT safety for concurrent calls of unregistered > >> threads. It probably should, and it should probably be by means of a > >> mutex (not spinlock). > >> > >> The reason I'm not running off to make a rte_random.c patch is > that's > >> it's unclear to me what is the role of unregistered threads in DPDK. > >> I'm > >> reasonably comfortable with a model where there are many threads > that > >> basically don't interact with the DPDK APIs (except maybe some very > >> narrow exposure, like the preemption-safe ring variant). One example > of > >> such a design would be big slow control plane which uses multi- > >> threading > >> and the Linux process scheduler for work scheduling, hosted in the > same > >> process as a DPDK data plane app. > >> > >> What I find more strange is a scenario where there are unregistered > >> threads which interacts with a wide variety of DPDK APIs, does so > >> at-high-rates/with-high-performance-requirements and are expected to > be > >> preemption-safe. So they are basically EAL threads without a lcore > id. > > > > Yes, this is happening in the wild. > > E.g. our application has a mode where it uses fewer EAL threads, and > processes more in non-EAL threads. So to say, the same work is > processed either by an EAL thread or a non-EAL thread, depending on the > application's mode. > > The extra array entry would be useful for such use cases. > > > > Is there some particular reason you can't register those non-EAL > threads? Legacy. I suppose we could just do that instead. Thanks for the suggestion! > > >> > >> Support for that latter scenario has also been voiced, in previous > >> discussions, from what I recall. > >> > >> I think it's hard to answer the question of a "unregistered thread > >> spare" for lcore variables without first knowing what the future > should > >> look like for unregistered threads in DPDK, in terms of being able > to > >> call into DPDK APIs, preemption-safety guarantees, etc. > >> > >> It seems that until you have a clearer picture of how generally to > >> treat > >> unregistered threads, you are best off with just a per-lcore id > >> instance > >> of lcore variables. > > > > I get your point. It also reduces the risk of bugs caused by > incorrect use of the additional entry. > > > > I am arguing for a different angle: Providing the extra entry will > help uncovering relevant use cases. > > > > Maybe
RE: [RFC v2 3/5] random: keep PRNG state in lcore variable
> From: Mattias Rönnblom [mailto:hof...@lysator.liu.se] > Sent: Monday, 19 February 2024 15.04 > > On 2024-02-19 12:22, Morten Brørup wrote: > >> From: Mattias Rönnblom [mailto:mattias.ronnb...@ericsson.com] > >> Sent: Monday, 19 February 2024 10.41 > >> > >> Replace keeping PRNG state in a RTE_MAX_LCORE-sized static array of > >> cache-aligned and RTE_CACHE_GUARDed struct instances with keeping > the > >> same state in a more cache-friendly lcore variable. > >> > >> Signed-off-by: Mattias Rönnblom > >> --- > > > > [...] > > > >> @@ -19,14 +20,12 @@ struct rte_rand_state { > >>uint64_t z3; > >>uint64_t z4; > >>uint64_t z5; > >> - RTE_CACHE_GUARD; > >> -} __rte_cache_aligned; > >> +}; > >> > >> -/* One instance each for every lcore id-equipped thread, and one > >> - * additional instance to be shared by all others threads (i.e., > all > >> - * unregistered non-EAL threads). > >> - */ > >> -static struct rte_rand_state rand_states[RTE_MAX_LCORE + 1]; > >> +RTE_LCORE_VAR_HANDLE(struct rte_rand_state, rand_state); > >> + > >> +/* instance to be shared by all unregistered non-EAL threads */ > >> +static struct rte_rand_state unregistered_rand_state > >> __rte_cache_aligned; > > > > The unregistered_rand_state instance is still __rte_cache_aligned; > consider also adding an RTE_CACHE_GUARD to it. > > > > It shouldn't be cache-line aligned. I'll remove it. Thanks. Agreed; that fix is just as good. Then, Acked-by: Morten Brørup
Re: [PATCH 3/3] net/ionic: add vdev support for embedded applications
On 2/16/2024 5:07 PM, Andrew Boyer wrote: > Add support for running DPDK applications directly on AMD Pensando > embedded HW. The platform exposes the device BARs through UIO. The > UIO code in the common/ionic library walks the sysfs filesystem > to identify the relevant BARs and map them into process memory. > > The SoCs are named 'Capri' and 'Elba'. > > The vdev device interface code is located in ionic_dev_vdev.c. > > Some datapath operations are #ifdef-ed out to save on resources when > running in embedded mode. > > Some controlpath operations are skipped by the ionic_is_embedded() > helper function. > > Before ringing the doorbell, use an ARM 'dsb st' barrier. The normal > barrier inside rte_write64() is insufficient on these devices due to > a chip errata. > > Signed-off-by: Andrew Boyer > Signed-off-by: Neel Patel > Signed-off-by: R Mohamed Shah > Signed-off-by: Alfredo Cardigliano <...> > +static struct rte_vdev_driver rte_vdev_ionic_pmd = { > + .probe = eth_ionic_vdev_probe, > + .remove = eth_ionic_vdev_remove, > +}; > + > +RTE_PMD_REGISTER_VDEV(net_ionic, rte_vdev_ionic_pmd); > + > +static void > +vdev_ionic_scan_cb(__rte_unused void *arg) > +{ > + ionic_uio_scan_mnet_devices(); > +} > + > +RTE_INIT(vdev_ionic_custom_add) > +{ > + rte_vdev_add_custom_scan(vdev_ionic_scan_cb, NULL); > +} Hi Andrew, My understanding is 'rte_vdev_add_custom_scan()' to add a vdev automatically (via rte_devargs_add()) before vdev scan starts. As far as I can see you are not doing this, why callback is added? Why not call 'ionic_uio_scan_mnet_devices()' within the 'eth_ionic_vdev_probe()'?
Re: [PATCH] net/gve: Change ERR to DEBUG to prevent flooding of logs for Tx-Dqo.
On Mon, 19 Feb 2024 02:44:35 + Rushil Gupta wrote: > This was causing failure for testpmd runs (for queues >=15) > presumably due to flooding of logs due to descriptor ring being > overwritten. > > Fixes: a01854 ("net/gve: fix dqo bug for chained descriptors") > Cc: sta...@dpdk.org > > Signed-off-by: Rushil Gupta > Reviewed-by: Joshua Washington Isn't this still an error. What about the descriptor overwritten is there an mbuf leak? Maybe a statistic would be better than a message?
Re: [PATCH] lib/hash,lib/rcu: feature hidden key count in hash
Hi Abdullah, Could you please tell more about use cases where this API may be useful? a new API to get the hidden key count in the hash table if the rcu qsbr is enabled Here in commit message and down below in doxygen comments, I think this statement should be more specific because rcu can be created with RTE_HASH_QSBR_MODE_SYNC mode i.e. without defer queue. Also, new API must be reflected in release notes On 07/02/2024 15:33, Abdullah Ömer Yamaç wrote: This patch introduce a new API to get the hidden key count in the hash table if the rcu qsbr is enabled. When using rte_hash_count with rcu qsbr enabled, it will return the number of elements that are not in the free queue. Unless rte_rcu_qsbr_dq_reclaim is called, the number of elements in the defer queue will not be counted and freed. Therefore I added a new API to get the number of hidden (defer queue) elements in the hash table. Then the user can calculate the total number of elements that are available in the hash table. Signed-off-by: Abdullah Ömer Yamaç --- Cc: Honnappa Nagarahalli Cc: Yipeng Wang Cc: Sameh Gobriel Cc: Bruce Richardson Cc: Vladimir Medvedkin --- lib/hash/rte_cuckoo_hash.c | 9 + lib/hash/rte_hash.h| 13 + lib/hash/version.map | 1 + lib/rcu/rte_rcu_qsbr.c | 8 lib/rcu/rte_rcu_qsbr.h | 11 +++ lib/rcu/version.map| 1 + 6 files changed, 43 insertions(+) diff --git a/lib/hash/rte_cuckoo_hash.c b/lib/hash/rte_cuckoo_hash.c index 70456754c4..3553f3efc7 100644 --- a/lib/hash/rte_cuckoo_hash.c +++ b/lib/hash/rte_cuckoo_hash.c @@ -555,6 +555,15 @@ rte_hash_max_key_id(const struct rte_hash *h) return h->entries; } +int32_t +rte_hash_dq_count(const struct rte_hash *h) +{ + if (h->dq == NULL) input arguments must be checked since this is a public API, the same is true for rte_rcu_qsbr_dq_count() + return -EINVAL; why not just return 0? + + return rte_rcu_qsbr_dq_count(h->dq); +} + int32_t rte_hash_count(const struct rte_hash *h) { diff --git a/lib/hash/rte_hash.h b/lib/hash/rte_hash.h index 7ecc02..8ea97e297d 100644 --- a/lib/hash/rte_hash.h +++ b/lib/hash/rte_hash.h @@ -193,6 +193,19 @@ rte_hash_free(struct rte_hash *h); void rte_hash_reset(struct rte_hash *h); + +/** + * Return the number of records in the defer queue of the hash table + * if RCU is enabled. + * @param h + * Hash table to query from + * @return + * - -EINVAL if parameters are invalid + * - A value indicating how many records were inserted in the table. did you mean how many records are kept in defer queue? + */ +int32_t +rte_hash_dq_count(const struct rte_hash *h); + /** * Return the number of keys in the hash table * @param h diff --git a/lib/hash/version.map b/lib/hash/version.map index 6b2afebf6b..7f7b158cf1 100644 --- a/lib/hash/version.map +++ b/lib/hash/version.map @@ -9,6 +9,7 @@ DPDK_24 { rte_hash_add_key_with_hash; rte_hash_add_key_with_hash_data; rte_hash_count; + rte_hash_dq_count; new API must introduced as an experimental API. The same is true for rte_rcu_qsbr_dq_count() rte_hash_crc32_alg; rte_hash_crc_set_alg; rte_hash_create; diff --git a/lib/rcu/rte_rcu_qsbr.c b/lib/rcu/rte_rcu_qsbr.c index bd0b83be0c..89f8da4c4c 100644 --- a/lib/rcu/rte_rcu_qsbr.c +++ b/lib/rcu/rte_rcu_qsbr.c @@ -450,6 +450,14 @@ rte_rcu_qsbr_dq_reclaim(struct rte_rcu_qsbr_dq *dq, unsigned int n, return 0; } +/** + * Return the number of entries in a defer queue. + */ +unsigned int rte_rcu_qsbr_dq_count(struct rte_rcu_qsbr_dq *dq) +{ + return rte_ring_count(dq->r); +} + /* Delete a defer queue. */ int rte_rcu_qsbr_dq_delete(struct rte_rcu_qsbr_dq *dq) diff --git a/lib/rcu/rte_rcu_qsbr.h b/lib/rcu/rte_rcu_qsbr.h index 23c9f89805..ed5a590edd 100644 --- a/lib/rcu/rte_rcu_qsbr.h +++ b/lib/rcu/rte_rcu_qsbr.h @@ -794,6 +794,17 @@ int rte_rcu_qsbr_dq_reclaim(struct rte_rcu_qsbr_dq *dq, unsigned int n, unsigned int *freed, unsigned int *pending, unsigned int *available); +/** + * Return the number of entries in a defer queue. + * + * @param dq + * Defer queue. + * @return + * The number of entries in the defer queue. + */ +unsigned int +rte_rcu_qsbr_dq_count(struct rte_rcu_qsbr_dq *dq); + /** * Delete a defer queue. * diff --git a/lib/rcu/version.map b/lib/rcu/version.map index 982ffd59d9..f410ab41e7 100644 --- a/lib/rcu/version.map +++ b/lib/rcu/version.map @@ -5,6 +5,7 @@ DPDK_24 { rte_rcu_qsbr_dq_create; rte_rcu_qsbr_dq_delete; rte_rcu_qsbr_dq_enqueue; + rte_rcu_qsbr_dq_count; rte_rcu_qsbr_dq_reclaim; rte_rcu_qsbr_dump; rte_rcu_qsbr_get_memsize; -- Regards, Vladimir
Re: [PATCH] lib/hash,lib/rcu: feature hidden key count in hash
Hello, Let me explain a use case; I have a hash table whose key value is IP addresses, and data (let's say the username of the IP) is related to the IP address. The key point is matching these data with flows. Flows are dynamic, and this hash table is dynamic, as well; both can change anytime. For example, when a flow starts, we look up the hash table with the corresponding IP and retrieve the username. We need to hold this username until the flow terminates, although we removed this IP key from the hash table (multithread). That's why we have RCU and defer queue is necessary for high performance. In my application, I need to know the number of IP-username entries. These numbers can be calculated by rte_hash_count - defer queue size. I think if you need a non-blocking and multithreaded hash table, an RCU-enabled hash table is necessary. Also, this API is necessary if you need to get the actual matchable size. On Mon, Feb 19, 2024 at 8:36 PM Medvedkin, Vladimir < vladimir.medved...@intel.com> wrote: > Hi Abdullah, > > Could you please tell more about use cases where this API may be useful? > > >a new API to get the hidden key count in the hash table if the rcu qsbr > is enabled > > Here in commit message and down below in doxygen comments, I think this > statement should be more specific because rcu can be created with > RTE_HASH_QSBR_MODE_SYNC mode i.e. without defer queue. > > Also, new API must be reflected in release notes > > On 07/02/2024 15:33, Abdullah Ömer Yamaç wrote: > > This patch introduce a new API to get the hidden key count in the hash > > table if the rcu qsbr is enabled. When using rte_hash_count with rcu > > qsbr enabled, it will return the number of elements that are not in the > > free queue. Unless rte_rcu_qsbr_dq_reclaim is called, the number of > > elements in the defer queue will not be counted and freed. Therefore I > > added a new API to get the number of hidden (defer queue) elements > > in the hash table. Then the user can calculate the total number of > > elements that are available in the hash table. > > > > Signed-off-by: Abdullah Ömer Yamaç > > > > --- > > Cc: Honnappa Nagarahalli > > Cc: Yipeng Wang > > Cc: Sameh Gobriel > > Cc: Bruce Richardson > > Cc: Vladimir Medvedkin > > --- > > lib/hash/rte_cuckoo_hash.c | 9 + > > lib/hash/rte_hash.h| 13 + > > lib/hash/version.map | 1 + > > lib/rcu/rte_rcu_qsbr.c | 8 > > lib/rcu/rte_rcu_qsbr.h | 11 +++ > > lib/rcu/version.map| 1 + > > 6 files changed, 43 insertions(+) > > > > diff --git a/lib/hash/rte_cuckoo_hash.c b/lib/hash/rte_cuckoo_hash.c > > index 70456754c4..3553f3efc7 100644 > > --- a/lib/hash/rte_cuckoo_hash.c > > +++ b/lib/hash/rte_cuckoo_hash.c > > @@ -555,6 +555,15 @@ rte_hash_max_key_id(const struct rte_hash *h) > > return h->entries; > > } > > > > +int32_t > > +rte_hash_dq_count(const struct rte_hash *h) > > +{ > > + if (h->dq == NULL) > input arguments must be checked since this is a public API, the same is > true for rte_rcu_qsbr_dq_count() > > + return -EINVAL; > why not just return 0? > > + > > + return rte_rcu_qsbr_dq_count(h->dq); > > +} > > + > > int32_t > > rte_hash_count(const struct rte_hash *h) > > { > > diff --git a/lib/hash/rte_hash.h b/lib/hash/rte_hash.h > > index 7ecc02..8ea97e297d 100644 > > --- a/lib/hash/rte_hash.h > > +++ b/lib/hash/rte_hash.h > > @@ -193,6 +193,19 @@ rte_hash_free(struct rte_hash *h); > > void > > rte_hash_reset(struct rte_hash *h); > > > > + > > +/** > > + * Return the number of records in the defer queue of the hash table > > + * if RCU is enabled. > > + * @param h > > + * Hash table to query from > > + * @return > > + * - -EINVAL if parameters are invalid > > + * - A value indicating how many records were inserted in the table. > did you mean how many records are kept in defer queue? > > + */ > > +int32_t > > +rte_hash_dq_count(const struct rte_hash *h); > > + > > /** > >* Return the number of keys in the hash table > >* @param h > > diff --git a/lib/hash/version.map b/lib/hash/version.map > > index 6b2afebf6b..7f7b158cf1 100644 > > --- a/lib/hash/version.map > > +++ b/lib/hash/version.map > > @@ -9,6 +9,7 @@ DPDK_24 { > > rte_hash_add_key_with_hash; > > rte_hash_add_key_with_hash_data; > > rte_hash_count; > > + rte_hash_dq_count; > new API must introduced as an experimental API. The same is true for > rte_rcu_qsbr_dq_count() > > rte_hash_crc32_alg; > > rte_hash_crc_set_alg; > > rte_hash_create; > > diff --git a/lib/rcu/rte_rcu_qsbr.c b/lib/rcu/rte_rcu_qsbr.c > > index bd0b83be0c..89f8da4c4c 100644 > > --- a/lib/rcu/rte_rcu_qsbr.c > > +++ b/lib/rcu/rte_rcu_qsbr.c > > @@ -450,6 +450,14 @@ rte_rcu_qsbr_dq_reclaim(struct rte_rcu_qsbr_dq *dq, > unsigned int n, > > return 0; > > } > > > > +/** > > + * Return the number of entries in a defer queue. > > + */ > > +unsigned int rt
Re: [PATCH 3/3] net/ionic: add vdev support for embedded applications
On Feb 19, 2024, at 10:24 AM, Yigit, Ferruh wrote: On 2/16/2024 5:07 PM, Andrew Boyer wrote: Add support for running DPDK applications directly on AMD Pensando embedded HW. The platform exposes the device BARs through UIO. The UIO code in the common/ionic library walks the sysfs filesystem to identify the relevant BARs and map them into process memory. The SoCs are named 'Capri' and 'Elba'. The vdev device interface code is located in ionic_dev_vdev.c. Some datapath operations are #ifdef-ed out to save on resources when running in embedded mode. Some controlpath operations are skipped by the ionic_is_embedded() helper function. Before ringing the doorbell, use an ARM 'dsb st' barrier. The normal barrier inside rte_write64() is insufficient on these devices due to a chip errata. Signed-off-by: Andrew Boyer Signed-off-by: Neel Patel Signed-off-by: R Mohamed Shah Signed-off-by: Alfredo Cardigliano <...> +static struct rte_vdev_driver rte_vdev_ionic_pmd = { + .probe = eth_ionic_vdev_probe, + .remove = eth_ionic_vdev_remove, +}; + +RTE_PMD_REGISTER_VDEV(net_ionic, rte_vdev_ionic_pmd); + +static void +vdev_ionic_scan_cb(__rte_unused void *arg) +{ + ionic_uio_scan_mnet_devices(); +} + +RTE_INIT(vdev_ionic_custom_add) +{ + rte_vdev_add_custom_scan(vdev_ionic_scan_cb, NULL); +} Hi Andrew, My understanding is 'rte_vdev_add_custom_scan()' to add a vdev automatically (via rte_devargs_add()) before vdev scan starts. As far as I can see you are not doing this, why callback is added? Why not call 'ionic_uio_scan_mnet_devices()' within the 'eth_ionic_vdev_probe()'? I think you are correct and our scan should be restricted to the vdev_probe function. Otherwise it will run in all cases, even on irrelevant hardware. Thanks! Andrew
RE: [PATCH 2/3] net/ionic: remove duplicate barriers
Hi Andrew, I think that this barrier may have been added to ensure any writes to q->hw_index and q->head_idx complete before ionic_q_flush computes val. Dependency chains can also prevent reordering if that's the case this barrier is not required. However, I have the following concern. > diff --git a/drivers/net/ionic/ionic_main.c b/drivers/net/ionic/ionic_main.c > index 1f24f64a33..814bb3b8f4 100644 > --- a/drivers/net/ionic/ionic_main.c > +++ b/drivers/net/ionic/ionic_main.c > @@ -223,7 +223,6 @@ ionic_adminq_post(struct ionic_lif *lif, struct > ionic_admin_ctx *ctx) > q->head_idx = Q_NEXT_TO_POST(q, 1); > > /* Ring doorbell */ > - rte_wmb(); > ionic_q_flush(q); > > err_out: Ionic_q_flush(q) uses q->hw_index and q->head_idx to compute the value of val which it writes to the address stored in q->db. I can see that q->head_idx is updated before the removed rte_wmb(), therefore it's safe to assume that " q->head_idx = Q_NEXT_TO_POST(q, 1)" and "val = IONIC_DBELL_QID(q->hw_index) | q->head_idx" will maintain the program order due to that dependency. But I don't know if there exists a dependency chain over q->hw_index. If not, then that may have been the motive behind this barrier. > diff --git a/drivers/net/ionic/ionic_rxtx_sg.c > b/drivers/net/ionic/ionic_rxtx_sg.c > index e8dec99c04..3820fd850f 100644 > --- a/drivers/net/ionic/ionic_rxtx_sg.c > +++ b/drivers/net/ionic/ionic_rxtx_sg.c > @@ -218,7 +218,6 @@ ionic_xmit_pkts_sg(void *tx_queue, struct rte_mbuf > **tx_pkts, > } > > if (nb_tx > 0) { > - rte_wmb(); > ionic_txq_flush(q); > > txq->last_wdog_cycles = rte_get_timer_cycles(); diff --git > a/drivers/net/ionic/ionic_rxtx_simple.c > b/drivers/net/ionic/ionic_rxtx_simple.c > index 9674b4d1df..4c9b9415ad 100644 > --- a/drivers/net/ionic/ionic_rxtx_simple.c > +++ b/drivers/net/ionic/ionic_rxtx_simple.c > @@ -191,7 +191,6 @@ ionic_xmit_pkts(void *tx_queue, struct rte_mbuf > **tx_pkts, > } > > if (nb_tx > 0) { > - rte_wmb(); > ionic_txq_flush(q); > > txq->last_wdog_cycles = rte_get_timer_cycles(); > -- > 2.17.1
Re: [V1 0/5] support VXLAN-GPE header fields(flags, rsvd0 and rsvd1) matching
19/02/2024 20:50, Stephen Hemminger: > On Fri, 12 Jan 2024 10:02:05 +0200 > Gavin Li wrote: > > > Previously, VXLAN-GPE in DPDK only supports VNI and next protocol header > > fields. This patch series add support for flags and reserved field 0 and > > 1. > > > > Below is the VXLAN-GPE header defined in the lasted draft. > > 0 1 2 3 > > 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 > >+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ > >|R|R|Ver|I|P|B|O| Reserved|Next Protocol | > >+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ > >|VXLAN Network Identifier (VNI) | Reserved| > >+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ > > Would recommend against implementing anything in a draft RFC. > Things can change. Learned the hard way when doing VXLAN driver for Linux. > The hardcoded port value in the Linux VXLAN driver is wrong because it matched > the draft RFC (got changed in final version). Because of strict compatibility > requirements the Linux driver could not be changed to the correct value. The problem is that standardization may be slow. Would it be acceptable without any compatibility guarantee?
Re: [PATCH] lib/hash,lib/rcu: feature hidden key count in hash
> On Feb 19, 2024, at 3:28 PM, Abdullah Ömer Yamaç wrote: > > Hello, > > Let me explain a use case; > > I have a hash table whose key value is IP addresses, and data (let's say the > username of the IP) is related to the IP address. The key point is matching > these data with flows. Flows are dynamic, and this hash table is dynamic, as > well; both can change anytime. For example, when a flow starts, we look up > the hash table with the corresponding IP and retrieve the username. We need > to hold this username until the flow terminates, although we removed this IP > key from the hash table (multithread). That's why we have RCU and defer queue > is necessary for high performance. In my application, I need to know the > number of IP-username entries. These numbers can be calculated by > rte_hash_count - defer queue size. The entries in the defer queue are not reclaimed (there is a probability that all of them can be reclaimed) and hence they are not available for allocation. So, rte_hash_count - defer queue size might not give you the correct number you are expecting. Currently, there is no API in hash library that forces a reclaim. Does it makes sense to have an API that just does the reclaim (and returns the number of entries pending in the defer queue)? A call to rte_hash_count should provide the exact count you are looking for. > > I think if you need a non-blocking and multithreaded hash table, an > RCU-enabled hash table is necessary. Also, this API is necessary if you need > to get the actual matchable size. > > > > > > On Mon, Feb 19, 2024 at 8:36 PM Medvedkin, Vladimir > wrote: > Hi Abdullah, > > Could you please tell more about use cases where this API may be useful? > > >a new API to get the hidden key count in the hash table if the rcu qsbr is > >enabled > > Here in commit message and down below in doxygen comments, I think this > statement should be more specific because rcu can be created with > RTE_HASH_QSBR_MODE_SYNC mode i.e. without defer queue. > > Also, new API must be reflected in release notes > > On 07/02/2024 15:33, Abdullah Ömer Yamaç wrote: > > This patch introduce a new API to get the hidden key count in the hash > > table if the rcu qsbr is enabled. When using rte_hash_count with rcu > > qsbr enabled, it will return the number of elements that are not in the > > free queue. Unless rte_rcu_qsbr_dq_reclaim is called, the number of > > elements in the defer queue will not be counted and freed. Therefore I > > added a new API to get the number of hidden (defer queue) elements > > in the hash table. Then the user can calculate the total number of > > elements that are available in the hash table. > > > > Signed-off-by: Abdullah Ömer Yamaç > > > > --- > > Cc: Honnappa Nagarahalli > > Cc: Yipeng Wang > > Cc: Sameh Gobriel > > Cc: Bruce Richardson > > Cc: Vladimir Medvedkin > > --- > > lib/hash/rte_cuckoo_hash.c | 9 + > > lib/hash/rte_hash.h| 13 + > > lib/hash/version.map | 1 + > > lib/rcu/rte_rcu_qsbr.c | 8 > > lib/rcu/rte_rcu_qsbr.h | 11 +++ > > lib/rcu/version.map| 1 + > > 6 files changed, 43 insertions(+) > > > > diff --git a/lib/hash/rte_cuckoo_hash.c b/lib/hash/rte_cuckoo_hash.c > > index 70456754c4..3553f3efc7 100644 > > --- a/lib/hash/rte_cuckoo_hash.c > > +++ b/lib/hash/rte_cuckoo_hash.c > > @@ -555,6 +555,15 @@ rte_hash_max_key_id(const struct rte_hash *h) > > return h->entries; > > } > > > > +int32_t > > +rte_hash_dq_count(const struct rte_hash *h) > > +{ > > + if (h->dq == NULL) > input arguments must be checked since this is a public API, the same is > true for rte_rcu_qsbr_dq_count() > > + return -EINVAL; > why not just return 0? > > + > > + return rte_rcu_qsbr_dq_count(h->dq); > > +} > > + > > int32_t > > rte_hash_count(const struct rte_hash *h) > > { > > diff --git a/lib/hash/rte_hash.h b/lib/hash/rte_hash.h > > index 7ecc02..8ea97e297d 100644 > > --- a/lib/hash/rte_hash.h > > +++ b/lib/hash/rte_hash.h > > @@ -193,6 +193,19 @@ rte_hash_free(struct rte_hash *h); > > void > > rte_hash_reset(struct rte_hash *h); > > > > + > > +/** > > + * Return the number of records in the defer queue of the hash table > > + * if RCU is enabled. > > + * @param h > > + * Hash table to query from > > + * @return > > + * - -EINVAL if parameters are invalid > > + * - A value indicating how many records were inserted in the table. > did you mean how many records are kept in defer queue? > > + */ > > +int32_t > > +rte_hash_dq_count(const struct rte_hash *h); > > + > > /** > >* Return the number of keys in the hash table > >* @param h > > diff --git a/lib/hash/version.map b/lib/hash/version.map > > index 6b2afebf6b..7f7b158cf1 100644 > > --- a/lib/hash/version.map > > +++ b/lib/hash/version.map > > @@ -9,6 +9,7 @@ DPDK_24 { > > rte_hash_add_key_with_hash; > > rte_hash_add_key_with_hash
[PATCH v2 1/4] config/arm: add Neoverse V2 part number
Add Arm Neoverse V2 CPU part number Signed-off-by: Honnappa Nagarahalli Acked-by: Ruifeng Wang Reviewed-by: Wathsala Vithanage --- config/arm/meson.build | 10 ++ 1 file changed, 10 insertions(+) diff --git a/config/arm/meson.build b/config/arm/meson.build index 36f21d2259..18b595ead1 100644 --- a/config/arm/meson.build +++ b/config/arm/meson.build @@ -100,6 +100,16 @@ part_number_config_arm = { ['RTE_MAX_LCORE', 128], ['RTE_MAX_NUMA_NODES', 2] ] +}, +'0xd4f': { +'march_features': ['sve2'], +'compiler_options': ['-mcpu=neoverse-v2'], +'flags': [ +['RTE_MACHINE', '"neoverse-v2"'], +['RTE_ARM_FEATURE_ATOMICS', true], +['RTE_MAX_LCORE', 144], +['RTE_MAX_NUMA_NODES', 2] +] } } implementer_arm = { -- 2.34.1
[PATCH v2 2/4] config/arm: add generic V2 SoC
Add generic V2 CPU SoC. This will allow for compiling a binary that will run on any SoC that uses V2 CPU. Signed-off-by: Honnappa Nagarahalli Reviewed-by: Wathsala Vithanage --- config/arm/arm64_v2_linux_gcc | 16 config/arm/meson.build| 9 + 2 files changed, 25 insertions(+) create mode 100644 config/arm/arm64_v2_linux_gcc diff --git a/config/arm/arm64_v2_linux_gcc b/config/arm/arm64_v2_linux_gcc new file mode 100644 index 00..50d9be3da3 --- /dev/null +++ b/config/arm/arm64_v2_linux_gcc @@ -0,0 +1,16 @@ +[binaries] +c = ['ccache', 'aarch64-linux-gnu-gcc'] +cpp = ['ccache', 'aarch64-linux-gnu-g++'] +ar = 'aarch64-linux-gnu-gcc-ar' +strip = 'aarch64-linux-gnu-strip' +pkgconfig = 'aarch64-linux-gnu-pkg-config' +pcap-config = '' + +[host_machine] +system = 'linux' +cpu_family = 'aarch64' +cpu = 'armv9-a' +endian = 'little' + +[properties] +platform = 'v2' diff --git a/config/arm/meson.build b/config/arm/meson.build index 18b595ead1..f096ed9ebf 100644 --- a/config/arm/meson.build +++ b/config/arm/meson.build @@ -527,6 +527,13 @@ soc_bluefield3 = { 'numa': false } +soc_v2 = { +'description': 'Arm Neoverse V2', +'implementer': '0x41', +'part_number': '0xd4f', +'numa': true +} + ''' Start of SoCs list generic: Generic un-optimized build for armv8 aarch64 execution mode. @@ -555,6 +562,7 @@ stingray:Broadcom Stingray thunderx2: Marvell ThunderX2 T99 thunderxt88: Marvell ThunderX T88 thunderxt83: Marvell ThunderX T83 +v2: Arm Neoverse V2 End of SoCs list ''' # The string above is included in the documentation, keep it in sync with the @@ -586,6 +594,7 @@ socs = { 'thunderx2': soc_thunderx2, 'thunderxt88': soc_thunderxt88, 'thunderxt83': soc_thunderxt83, +'v2': soc_v2, } dpdk_conf.set('RTE_ARCH_ARM', 1) -- 2.34.1
[PATCH v2 3/4] config/arm: add NVIDIA Grace CPU
Add meson build configuration for NVIDIA Grace platform with 64-bit Arm Neoverse V2 cores. Signed-off-by: Honnappa Nagarahalli Acked-by: Ruifeng Wang Reviewed-by: Wathsala Vithanage --- config/arm/arm64_grace_linux_gcc | 16 config/arm/meson.build | 10 ++ 2 files changed, 26 insertions(+) create mode 100644 config/arm/arm64_grace_linux_gcc diff --git a/config/arm/arm64_grace_linux_gcc b/config/arm/arm64_grace_linux_gcc new file mode 100644 index 00..bde55b17a8 --- /dev/null +++ b/config/arm/arm64_grace_linux_gcc @@ -0,0 +1,16 @@ +[binaries] +c = ['ccache', 'aarch64-linux-gnu-gcc'] +cpp = ['ccache', 'aarch64-linux-gnu-g++'] +ar = 'aarch64-linux-gnu-gcc-ar' +strip = 'aarch64-linux-gnu-strip' +pkgconfig = 'aarch64-linux-gnu-pkg-config' +pcap-config = '' + +[host_machine] +system = 'linux' +cpu_family = 'aarch64' +cpu = 'armv9-a' +endian = 'little' + +[properties] +platform = 'grace' diff --git a/config/arm/meson.build b/config/arm/meson.build index f096ed9ebf..606d8942ca 100644 --- a/config/arm/meson.build +++ b/config/arm/meson.build @@ -424,6 +424,14 @@ soc_tys2500 = { 'numa': true } +soc_grace = { +'description': 'NVIDIA Grace', +'implementer': '0x41', +'part_number': '0xd4f', +'extra_march_features': ['crypto'], +'numa': true +} + soc_graviton2 = { 'description': 'AWS Graviton2', 'implementer': '0x41', @@ -551,6 +559,7 @@ dpaa:NXP DPAA emag:Ampere eMAG ft2000plus: Phytium FT-2000+ tys2500: Phytium TengYun S2500 +grace: NVIDIA Grace graviton2: AWS Graviton2 graviton3: AWS Graviton3 hip10: HiSilicon HIP10 @@ -583,6 +592,7 @@ socs = { 'emag': soc_emag, 'ft2000plus': soc_ft2000plus, 'tys2500': soc_tys2500, +'grace': soc_grace, 'graviton2': soc_graviton2, 'graviton3': soc_graviton3, 'hip10': soc_hip10, -- 2.34.1
[PATCH v2 4/4] config/arm: add AWS Graviton4 CPU
Add meson build configuration for AWS Graviton4 platform with 64-bit Arm Neoverse V2 cores. Signed-off-by: Honnappa Nagarahalli Reviewed-by: Wathsala Vithanage --- config/arm/arm64_graviton4_linux_gcc | 16 config/arm/meson.build | 14 ++ 2 files changed, 30 insertions(+) create mode 100644 config/arm/arm64_graviton4_linux_gcc diff --git a/config/arm/arm64_graviton4_linux_gcc b/config/arm/arm64_graviton4_linux_gcc new file mode 100644 index 00..839224bca7 --- /dev/null +++ b/config/arm/arm64_graviton4_linux_gcc @@ -0,0 +1,16 @@ +[binaries] +c = ['ccache', 'aarch64-linux-gnu-gcc'] +cpp = ['ccache', 'aarch64-linux-gnu-g++'] +ar = 'aarch64-linux-gnu-gcc-ar' +strip = 'aarch64-linux-gnu-strip' +pkgconfig = 'aarch64-linux-gnu-pkg-config' +pcap-config = '' + +[host_machine] +system = 'linux' +cpu_family = 'aarch64' +cpu = 'armv9-a' +endian = 'little' + +[properties] +platform = 'graviton4' diff --git a/config/arm/meson.build b/config/arm/meson.build index 606d8942ca..075d6d8e88 100644 --- a/config/arm/meson.build +++ b/config/arm/meson.build @@ -447,6 +447,18 @@ soc_graviton3 = { 'numa': false } +soc_graviton4 = { +'description': 'AWS Graviton4', +'implementer': '0x41', +'part_number': '0xd4f', +'extra_march_features': ['crypto'], +'flags': [ +['RTE_MAX_LCORE', 96], +['RTE_MAX_NUMA_NODES', 1] +], +'numa': false +} + soc_hip10 = { 'description': 'HiSilicon HIP10', 'implementer': '0x48', @@ -562,6 +574,7 @@ tys2500: Phytium TengYun S2500 grace: NVIDIA Grace graviton2: AWS Graviton2 graviton3: AWS Graviton3 +graviton4: AWS Graviton4 hip10: HiSilicon HIP10 kunpeng920: HiSilicon Kunpeng 920 kunpeng930: HiSilicon Kunpeng 930 @@ -595,6 +608,7 @@ socs = { 'grace': soc_grace, 'graviton2': soc_graviton2, 'graviton3': soc_graviton3, +'graviton4': soc_graviton4, 'hip10': soc_hip10, 'kunpeng920': soc_kunpeng920, 'kunpeng930': soc_kunpeng930, -- 2.34.1
RE: [PATCH 2/3] net/ionic: remove duplicate barriers
> -Original Message- > From: Wathsala Wathawana Vithanage > Sent: Monday, February 19, 2024 4:17 PM > To: Andrew Boyer ; dev@dpdk.org > Cc: Neel Patel ; nd ; Honnappa > Nagarahalli ; nd > Subject: RE: [PATCH 2/3] net/ionic: remove duplicate barriers > > Hi Andrew, > > I think that this barrier may have been added to ensure any writes to q- > >hw_index and q->head_idx complete before ionic_q_flush computes val. > Dependency chains can also prevent reordering if that's the case this barrier > is > not required. > However, I have the following concern. > > > diff --git a/drivers/net/ionic/ionic_main.c > > b/drivers/net/ionic/ionic_main.c index 1f24f64a33..814bb3b8f4 100644 > > --- a/drivers/net/ionic/ionic_main.c > > +++ b/drivers/net/ionic/ionic_main.c > > @@ -223,7 +223,6 @@ ionic_adminq_post(struct ionic_lif *lif, struct > > ionic_admin_ctx *ctx) > > q->head_idx = Q_NEXT_TO_POST(q, 1); > > > > /* Ring doorbell */ > > - rte_wmb(); > > ionic_q_flush(q); > > > > err_out: > > Ionic_q_flush(q) uses q->hw_index and q->head_idx to compute the value of > val which it writes to the address stored in q->db. I can see that > q->head_idx is > updated before the removed rte_wmb(), therefore it's safe to assume that " q- > >head_idx = Q_NEXT_TO_POST(q, 1)" and "val = IONIC_DBELL_QID(q- > >hw_index) | q->head_idx" will maintain the program order due to that > dependency. But I don't know if there exists a dependency chain over q- > >hw_index. If not, then that may have been the motive behind this barrier. > Since q->hw_index is also updated in the same CPU ionic_q_flush will always see the correct value, consequently val should be always correct. It's safe to remove this barrier. > > diff --git a/drivers/net/ionic/ionic_rxtx_sg.c > > b/drivers/net/ionic/ionic_rxtx_sg.c > > index e8dec99c04..3820fd850f 100644 > > --- a/drivers/net/ionic/ionic_rxtx_sg.c > > +++ b/drivers/net/ionic/ionic_rxtx_sg.c > > @@ -218,7 +218,6 @@ ionic_xmit_pkts_sg(void *tx_queue, struct > rte_mbuf > > **tx_pkts, > > } > > > > if (nb_tx > 0) { > > - rte_wmb(); > > ionic_txq_flush(q); > > > > txq->last_wdog_cycles = rte_get_timer_cycles(); diff --git > > a/drivers/net/ionic/ionic_rxtx_simple.c > > b/drivers/net/ionic/ionic_rxtx_simple.c > > index 9674b4d1df..4c9b9415ad 100644 > > --- a/drivers/net/ionic/ionic_rxtx_simple.c > > +++ b/drivers/net/ionic/ionic_rxtx_simple.c > > @@ -191,7 +191,6 @@ ionic_xmit_pkts(void *tx_queue, struct rte_mbuf > > **tx_pkts, > > } > > > > if (nb_tx > 0) { > > - rte_wmb(); > > ionic_txq_flush(q); > > > > txq->last_wdog_cycles = rte_get_timer_cycles(); > > -- > > 2.17.1
Re: [PATCH 3/3] net/ionic: add vdev support for embedded applications
> On Feb 16, 2024, at 11:07 AM, Andrew Boyer wrote: > > Add support for running DPDK applications directly on AMD Pensando > embedded HW. The platform exposes the device BARs through UIO. The > UIO code in the common/ionic library walks the sysfs filesystem > to identify the relevant BARs and map them into process memory. > > The SoCs are named 'Capri' and 'Elba'. > > The vdev device interface code is located in ionic_dev_vdev.c. > > Some datapath operations are #ifdef-ed out to save on resources when > running in embedded mode. > > Some controlpath operations are skipped by the ionic_is_embedded() > helper function. > > Before ringing the doorbell, use an ARM 'dsb st' barrier. The normal > barrier inside rte_write64() is insufficient on these devices due to > a chip errata. > > Signed-off-by: Andrew Boyer > Signed-off-by: Neel Patel > Signed-off-by: R Mohamed Shah > Signed-off-by: Alfredo Cardigliano > --- > config/arm/arm64_capri_linux_gcc | 16 +++ > config/arm/arm64_elba_linux_gcc| 16 +++ > config/arm/meson.build | 42 > drivers/net/ionic/ionic.h | 2 +- > drivers/net/ionic/ionic_dev.h | 17 > drivers/net/ionic/ionic_dev_vdev.c | 156 + > drivers/net/ionic/ionic_ethdev.c | 7 ++ > drivers/net/ionic/ionic_lif.c | 19 > drivers/net/ionic/ionic_rxtx.h | 4 + > drivers/net/ionic/meson.build | 1 + > 10 files changed, 279 insertions(+), 1 deletion(-) > create mode 100644 config/arm/arm64_capri_linux_gcc > create mode 100644 config/arm/arm64_elba_linux_gcc > create mode 100644 drivers/net/ionic/ionic_dev_vdev.c > > diff --git a/config/arm/arm64_capri_linux_gcc > b/config/arm/arm64_capri_linux_gcc > new file mode 100644 > index 00..1a6313e684 > --- /dev/null > +++ b/config/arm/arm64_capri_linux_gcc > @@ -0,0 +1,16 @@ > +[binaries] > +c = ['ccache', 'aarch64-linux-gnu-gcc'] > +cpp = ['ccache', 'aarch64-linux-gnu-g++'] > +ar = 'aarch64-linux-gnu-gcc-ar' > +strip = 'aarch64-linux-gnu-strip' > +pkgconfig = 'aarch64-linux-gnu-pkg-config' > +pcap-config = '' > + > +[host_machine] > +system = 'linux' > +cpu_family = 'aarch64' > +cpu = 'armv8-a' > +endian = 'little' > + > +[properties] > +platform = 'capri' > diff --git a/config/arm/arm64_elba_linux_gcc b/config/arm/arm64_elba_linux_gcc > new file mode 100644 > index 00..4d891bd5a7 > --- /dev/null > +++ b/config/arm/arm64_elba_linux_gcc > @@ -0,0 +1,16 @@ > +[binaries] > +c = ['ccache', 'aarch64-linux-gnu-gcc'] > +cpp = ['ccache', 'aarch64-linux-gnu-g++'] > +ar = 'aarch64-linux-gnu-gcc-ar' > +strip = 'aarch64-linux-gnu-strip' > +pkgconfig = 'aarch64-linux-gnu-pkg-config' > +pcap-config = '' > + > +[host_machine] > +system = 'linux' > +cpu_family = 'aarch64' > +cpu = 'armv8-a' > +endian = 'little' > + > +[properties] > +platform = 'elba' > diff --git a/config/arm/meson.build b/config/arm/meson.build > index 36f21d2259..2326021fed 100644 > --- a/config/arm/meson.build > +++ b/config/arm/meson.build > @@ -165,6 +165,33 @@ implementer_cavium = { > } > } > > +implementer_ionic = { > +'description': 'AMD Pensando', > +'flags': [ > +['RTE_MAX_NUMA_NODES', 1], > +['RTE_CACHE_LINE_SIZE', 64], > +['RTE_LIBRTE_VHOST_NUMA', false], > +['RTE_EAL_NUMA_AWARE_HUGEPAGES', false], > +['RTE_LIBRTE_IONIC_PMD_EMBEDDED', true], > +], > +'part_number_config': { > +'0xc1': { > +'compiler_options': ['-mcpu=cortex-a72'], > +'flags': [ > +['RTE_MAX_LCORE', 4], > +['RTE_LIBRTE_IONIC_PMD_BARRIER_ERRATA', true], > +] > +}, > +'0xc2': { > +'compiler_options': ['-mcpu=cortex-a72'], > +'flags': [ > +['RTE_MAX_LCORE', 16], > +['RTE_LIBRTE_IONIC_PMD_BARRIER_ERRATA', true], > +] > +} > +} > +} Can you place it such that it is ordered alphabetically? (I understand that currently things are not ordered alphabetically, I have plans to fix that) > + > implementer_ampere = { > 'description': 'Ampere Computing', > 'flags': [ > @@ -294,6 +321,7 @@ implementers = { > '0x50': implementer_ampere, > '0x51': implementer_qualcomm, > '0x70': implementer_phytium, > +'0x75': implementer_ionic, > '0xc0': implementer_ampere, > } > > @@ -517,6 +545,18 @@ soc_bluefield3 = { >'numa': false > } > > +soc_ionic_capri = { > +'description': 'AMD Pensando Capri', > +'implementer': '0x75', > +'part_number': '0xc1' > +} > + > +soc_ionic_elba = { > +'description': 'AMD Pensando Elba', > +'implementer': '0x75', > +'part_number': '0xc2' > +} > + > ''' > Start of SoCs list > generic: Generic un-optimized build for armv8 aarch64 execution mode. > @@ -576,6 +616,8 @@ socs = { > 'thunderx2': soc_thunderx2, > 'thunderxt88': soc_thunderxt88, > 'thunderxt83': soc_thunderxt83, > +'capri': soc_ionic_
[PATCH] examples/dma: fix max-frame-size cannot be zero
In the original implementation, the max_frame_size could be zero, but commit ("examples/dma: replace getopt with argparse") treat zero as an error. This commit fixes it. Also, since unsigned doesn't < 0, adjust "<= 0" judgement to "== 0". Fixes: 8d85afb19af7 ("examples/dma: replace getopt with argparse") Reported-by: Jiang, YuX Signed-off-by: Chengwen Feng --- examples/dma/dmafwd.c | 10 +- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/examples/dma/dmafwd.c b/examples/dma/dmafwd.c index f4a0bff06e..acceae6b7b 100644 --- a/examples/dma/dmafwd.c +++ b/examples/dma/dmafwd.c @@ -695,23 +695,23 @@ dma_parse_args(int argc, char **argv, unsigned int nb_ports) return ret; /* check argument's value which parsing by autosave. */ - if (dma_batch_sz <= 0 || dma_batch_sz > MAX_PKT_BURST) { + if (dma_batch_sz == 0 || dma_batch_sz > MAX_PKT_BURST) { printf("Invalid dma batch size, %d.\n", dma_batch_sz); return -1; } - if (max_frame_size <= 0 || max_frame_size > RTE_ETHER_MAX_JUMBO_FRAME_LEN) { + if (max_frame_size > RTE_ETHER_MAX_JUMBO_FRAME_LEN) { printf("Invalid max frame size, %d.\n", max_frame_size); return -1; } - if (nb_queues <= 0 || nb_queues > MAX_RX_QUEUES_COUNT) { + if (nb_queues == 0 || nb_queues > MAX_RX_QUEUES_COUNT) { printf("Invalid RX queues number %d. Max %u\n", nb_queues, MAX_RX_QUEUES_COUNT); return -1; } - if (ring_size <= 0) { + if (ring_size == 0) { printf("Invalid ring size, %d.\n", ring_size); return -1; } @@ -721,7 +721,7 @@ dma_parse_args(int argc, char **argv, unsigned int nb_ports) ring_size = MBUF_RING_SIZE; } - if (stats_interval <= 0) { + if (stats_interval == 0) { printf("Invalid stats interval, setting to 1\n"); stats_interval = 1; /* set to default */ } -- 2.17.1
[DPDK/ethdev Bug 1381] TAP device can not support 17 queues
https://bugs.dpdk.org/show_bug.cgi?id=1381 Bug ID: 1381 Summary: TAP device can not support 17 queues Product: DPDK Version: 23.11 Hardware: All OS: All Status: UNCONFIRMED Severity: normal Priority: Normal Component: ethdev Assignee: dev@dpdk.org Reporter: step...@networkplumber.org Target Milestone: --- If you try: # dpdk-testpmd --log-level=pmd.net.tap:debug -l 1-2 --vdev=net_tap0 -- -i --rxq=8 --txq=8 It will fail because: EAL: Detected CPU lcores: 8 EAL: Detected NUMA nodes: 1 EAL: Detected static linkage of DPDK EAL: Multi-process socket /var/run/dpdk/rte/mp_socket EAL: Selected IOVA mode 'VA' rte_pmd_tap_probe(): Initializing pmd_tap for net_tap0 eth_dev_tap_create(): TAP device on numa 0 tun_alloc(): /dev/net/tun Features 7173 tun_alloc(): Multi-queue support for 16 queues tun_alloc(): Device name is 'dtap0' tun_alloc(): Using rt-signal 35 eth_dev_tap_create(): allocated dtap0 Interactive-mode selected testpmd: create a new mbuf pool : n=155456, size=2176, socket=0 testpmd: preferred mempool ops selected: ring_mp_mc Warning! port-topology=paired and odd forward ports number, the last port will pair with itself. Configuring Port 0 (socket 0) tap_dev_configure(): net_tap0: dtap0: TX configured queues number: 8 tap_dev_configure(): net_tap0: dtap0: RX configured queues number: 8 tun_alloc(): /dev/net/tun Features 7173 tun_alloc(): Multi-queue support for 16 queues tun_alloc(): Device name is 'dtap0' tap_setup_queue(): dtap0: add tx queue for qid 0 fd 26 tap_tx_queue_setup(): TX TUNTAP device name dtap0, qid 0 on fd 26 csum off tun_alloc(): /dev/net/tun Features 7173 tun_alloc(): Multi-queue support for 16 queues tun_alloc(): Device name is 'dtap0' tap_setup_queue(): dtap0: add tx queue for qid 1 fd 212 tap_tx_queue_setup(): TX TUNTAP device name dtap0, qid 1 on fd 212 csum off tun_alloc(): /dev/net/tun Features 7173 tun_alloc(): Multi-queue support for 16 queues tun_alloc(): Device name is 'dtap0' tap_setup_queue(): dtap0: add tx queue for qid 2 fd 213 tap_tx_queue_setup(): TX TUNTAP device name dtap0, qid 2 on fd 213 csum off tun_alloc(): /dev/net/tun Features 7173 tun_alloc(): Multi-queue support for 16 queues tun_alloc(): Device name is 'dtap0' tap_setup_queue(): dtap0: add tx queue for qid 3 fd 214 tap_tx_queue_setup(): TX TUNTAP device name dtap0, qid 3 on fd 214 csum off tun_alloc(): /dev/net/tun Features 7173 tun_alloc(): Multi-queue support for 16 queues tun_alloc(): Device name is 'dtap0' tap_setup_queue(): dtap0: add tx queue for qid 4 fd 215 tap_tx_queue_setup(): TX TUNTAP device name dtap0, qid 4 on fd 215 csum off tun_alloc(): /dev/net/tun Features 7173 tun_alloc(): Multi-queue support for 16 queues tun_alloc(): Device name is 'dtap0' tap_setup_queue(): dtap0: add tx queue for qid 5 fd 216 tap_tx_queue_setup(): TX TUNTAP device name dtap0, qid 5 on fd 216 csum off tun_alloc(): /dev/net/tun Features 7173 tun_alloc(): Multi-queue support for 16 queues tun_alloc(): Device name is 'dtap0' tap_setup_queue(): dtap0: add tx queue for qid 6 fd 217 tap_tx_queue_setup(): TX TUNTAP device name dtap0, qid 6 on fd 217 csum off tun_alloc(): /dev/net/tun Features 7173 tun_alloc(): Multi-queue support for 16 queues tun_alloc(): Device name is 'dtap0' tap_setup_queue(): dtap0: add tx queue for qid 7 fd 218 tap_tx_queue_setup(): TX TUNTAP device name dtap0, qid 7 on fd 218 csum off tap_setup_queue(): dtap0: dup fd 26 for rx queue qid 0 (219) tap_rx_queue_setup(): RX TUNTAP device name dtap0, qid 0 on fd 219 tap_setup_queue(): dtap0: dup fd 212 for rx queue qid 1 (220) tap_rx_queue_setup(): RX TUNTAP device name dtap0, qid 1 on fd 220 tap_setup_queue(): dtap0: dup fd 213 for rx queue qid 2 (221) tap_rx_queue_setup(): RX TUNTAP device name dtap0, qid 2 on fd 221 tap_setup_queue(): dtap0: dup fd 214 for rx queue qid 3 (222) tap_rx_queue_setup(): RX TUNTAP device name dtap0, qid 3 on fd 222 tap_setup_queue(): dtap0: dup fd 215 for rx queue qid 4 (223) tap_rx_queue_setup(): RX TUNTAP device name dtap0, qid 4 on fd 223 tap_setup_queue(): dtap0: dup fd 216 for rx queue qid 5 (224) tap_rx_queue_setup(): RX TUNTAP device name dtap0, qid 5 on fd 224 tap_setup_queue(): dtap0: dup fd 217 for rx queue qid 6 (225) tap_rx_queue_setup(): RX TUNTAP device name dtap0, qid 6 on fd 225 tap_setup_queue(): dtap0: dup fd 218 for rx queue qid 7 (226) tap_rx_queue_setup(): RX TUNTAP device name dtap0, qid 7 on fd 226 EAL: Cannot send more than 8 FDs tap_mp_req_on_rxtx(): Failed to send start req to secondary 7 This is a regression caused by: commit c36ce7099c2187926cd62cff7ebd479823554929 Author: Kumara Parameshwaran Date: Mon Jan 31 20:02:34 2022 +0530 net/tap: fix to populate FDs in secondary process When a tap device is hotplugged to primary process which in turn adds the device to all sec
Re: [PATCH] net/gve: Change ERR to DEBUG to prevent flooding of logs for Tx-Dqo.
I agree. This bug has manifested for a while before I fixed it partially in "[PATCH] net/gve: fix dqo bug for chained descriptors" However, for higher queue counts (> 13); we still see this behavior. I'll add a statistic. On Mon, Feb 19, 2024, 10:56 PM Stephen Hemminger wrote: > On Mon, 19 Feb 2024 02:44:35 + > Rushil Gupta wrote: > > > This was causing failure for testpmd runs (for queues >=15) > > presumably due to flooding of logs due to descriptor ring being > > overwritten. > > > > Fixes: a01854 ("net/gve: fix dqo bug for chained descriptors") > > Cc: sta...@dpdk.org > > > > Signed-off-by: Rushil Gupta > > Reviewed-by: Joshua Washington > > Isn't this still an error. What about the descriptor overwritten is there > an mbuf leak? > Maybe a statistic would be better than a message? >
Re: [PATCH] net/hns3: fix Rx packet truncation when KEEP CRC enabled
Hi, Ferruh, Thanks for your review. On 2024/2/7 22:15, Ferruh Yigit wrote: On 2/6/2024 1:10 AM, Jie Hai wrote: From: Dengdui Huang When KEEP_CRC offload is enabled, some packets will be truncated and the CRC is still be stripped in following cases: 1. For HIP08 hardware, the packet type is TCP and the length is less than or equal to 60B. 2. For other hardwares, the packet type is IP and the length is less than or equal to 60B. If a device doesn't support the offload by some packets, it can be option to disable offload for that device, instead of calculating it in software and append it. The KEEP CRC feature of hns3 is faulty only in the specific packet type and small packet(<60B) case. What's more, the small ethernet packet is not common. Unless you have a specific usecase, or requirement to support the offload. Yes, some users of hns3 are already using this feature. So we cannot drop this offload <...> @@ -2492,10 +2544,16 @@ hns3_recv_pkts_simple(void *rx_queue, goto pkt_err; rxm->packet_type = hns3_rx_calc_ptype(rxq, l234_info, ol_info); - if (rxm->packet_type == RTE_PTYPE_L2_ETHER_TIMESYNC) rxm->ol_flags |= RTE_MBUF_F_RX_IEEE1588_PTP; + if (unlikely(rxq->crc_len > 0)) { + if (hns3_need_recalculate_crc(rxq, rxm)) + hns3_recalculate_crc(rxq, rxm); + rxm->pkt_len -= rxq->crc_len; + rxm->data_len -= rxq->crc_len; Removing 'crc_len' from 'mbuf->pkt_len' & 'mbuf->data_len' is practically same as stripping CRC. We don't count CRC length in the statistics, but it should be accessible in the payload by the user. Our drivers are behaving exactly as you say. .
[PATCH 0/4] cfgfile: enhance error detecting
When I was trying to debug a problem introduced by config.ini in test-dma-perf, I found the cfgfile library should enhance error detecting, so got this patchset. Chengwen Feng (4): cfgfile: remove dead code cfgfile: support verify name and value cfgfile: verify add section and entry result cfgfile: add unique name flag lib/cfgfile/rte_cfgfile.c | 70 +-- lib/cfgfile/rte_cfgfile.h | 7 2 files changed, 59 insertions(+), 18 deletions(-) -- 2.17.1
[PATCH 3/4] cfgfile: verify add section and entry result
The rte_cfgfile_add_section() and _add_entry()'s result were not verified, so that potential errors may not be detected. This commit adds the verification. Cc: sta...@dpdk.org Signed-off-by: Chengwen Feng --- lib/cfgfile/rte_cfgfile.c | 19 ++- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/lib/cfgfile/rte_cfgfile.c b/lib/cfgfile/rte_cfgfile.c index 6f8f46a406..ad9314dc14 100644 --- a/lib/cfgfile/rte_cfgfile.c +++ b/lib/cfgfile/rte_cfgfile.c @@ -182,6 +182,7 @@ rte_cfgfile_load_with_params(const char *filename, int flags, char buffer[CFG_NAME_LEN + CFG_VALUE_LEN + 4]; int lineno = 0; struct rte_cfgfile *cfg; + int ret; if (rte_cfgfile_check_params(params)) return NULL; @@ -226,7 +227,9 @@ rte_cfgfile_load_with_params(const char *filename, int flags, *end = '\0'; _strip(&buffer[1], end - &buffer[1]); - rte_cfgfile_add_section(cfg, &buffer[1]); + ret = rte_cfgfile_add_section(cfg, &buffer[1]); + if (ret != 0) + goto error1; } else { /* key and value line */ char *split[2] = {NULL}; @@ -261,8 +264,10 @@ rte_cfgfile_load_with_params(const char *filename, int flags, if (cfg->num_sections == 0) goto error1; - _add_entry(&cfg->sections[cfg->num_sections - 1], - split[0], split[1]); + ret = _add_entry(&cfg->sections[cfg->num_sections - 1], +split[0], split[1]); + if (ret != 0) + goto error1; } } fclose(f); @@ -277,6 +282,7 @@ struct rte_cfgfile * rte_cfgfile_create(int flags) { int i; + int ret; struct rte_cfgfile *cfg; /* future proof flags usage */ @@ -310,8 +316,11 @@ rte_cfgfile_create(int flags) cfg->sections[i].allocated_entries = CFG_ALLOC_ENTRY_BATCH; } - if (flags & CFG_FLAG_GLOBAL_SECTION) - rte_cfgfile_add_section(cfg, "GLOBAL"); + if (flags & CFG_FLAG_GLOBAL_SECTION) { + ret = rte_cfgfile_add_section(cfg, "GLOBAL"); + if (ret != 0) + goto error1; + } return cfg; error1: -- 2.17.1
[PATCH 1/4] cfgfile: remove dead code
This memchr() will never return NULL because rte_cfgfile_load() function will skip lines without useful content. Fixes: 74e0d3a17461 ("cfgfile: fix null pointer dereference in parsing") Cc: sta...@dpdk.org Signed-off-by: Chengwen Feng --- lib/cfgfile/rte_cfgfile.c | 6 -- 1 file changed, 6 deletions(-) diff --git a/lib/cfgfile/rte_cfgfile.c b/lib/cfgfile/rte_cfgfile.c index 6a5e4fd942..d7aa38b56f 100644 --- a/lib/cfgfile/rte_cfgfile.c +++ b/lib/cfgfile/rte_cfgfile.c @@ -223,12 +223,6 @@ rte_cfgfile_load_with_params(const char *filename, int flags, split[0] = buffer; split[1] = memchr(buffer, '=', len); - if (split[1] == NULL) { - CFG_LOG(ERR, - "line %d - no '=' character found", - lineno); - goto error1; - } *split[1] = '\0'; split[1]++; -- 2.17.1
[PATCH 2/4] cfgfile: support verify name and value
This patch supports verify section's name, entry's name and entry's value validity. Cc: sta...@dpdk.org Signed-off-by: Chengwen Feng --- lib/cfgfile/rte_cfgfile.c | 17 + 1 file changed, 17 insertions(+) diff --git a/lib/cfgfile/rte_cfgfile.c b/lib/cfgfile/rte_cfgfile.c index d7aa38b56f..6f8f46a406 100644 --- a/lib/cfgfile/rte_cfgfile.c +++ b/lib/cfgfile/rte_cfgfile.c @@ -105,6 +105,16 @@ static int _add_entry(struct rte_cfgfile_section *section, const char *entryname, const char *entryvalue) { + int name_len, value_len; + + name_len = strlen(entryname); + value_len = strlen(entryvalue); + if (name_len == 0 || name_len >= CFG_NAME_LEN || value_len >= CFG_VALUE_LEN) { + CFG_LOG(ERR, "invalid entry name %s or value %s in section %s", + entryname, entryvalue, section->name); + return -EINVAL; + } + /* resize entry structure if we don't have room for more entries */ if (section->num_entries == section->allocated_entries) { struct rte_cfgfile_entry *n_entries = realloc( @@ -322,6 +332,7 @@ rte_cfgfile_create(int flags) int rte_cfgfile_add_section(struct rte_cfgfile *cfg, const char *sectionname) { + int len; int i; if (cfg == NULL) @@ -330,6 +341,12 @@ rte_cfgfile_add_section(struct rte_cfgfile *cfg, const char *sectionname) if (sectionname == NULL) return -EINVAL; + len = strlen(sectionname); + if (len == 0 || len >= CFG_NAME_LEN) { + CFG_LOG(ERR, "invalid section name %s", sectionname); + return -EINVAL; + } + /* resize overall struct if we don't have room for more sections */ if (cfg->num_sections == cfg->allocated_sections) { -- 2.17.1
[PATCH 4/4] cfgfile: add unique name flag
The cfgfile supports duplicate section name and entry name when parsing config file, which may confused and hard to debug when accidentally set duplicate name. So add unique name flag, it will treat as error if encounter duplicate section name or entry name. Cc: sta...@dpdk.org Signed-off-by: Chengwen Feng --- lib/cfgfile/rte_cfgfile.c | 32 +++- lib/cfgfile/rte_cfgfile.h | 7 +++ 2 files changed, 30 insertions(+), 9 deletions(-) diff --git a/lib/cfgfile/rte_cfgfile.c b/lib/cfgfile/rte_cfgfile.c index ad9314dc14..9e901b0977 100644 --- a/lib/cfgfile/rte_cfgfile.c +++ b/lib/cfgfile/rte_cfgfile.c @@ -102,8 +102,8 @@ _get_section(struct rte_cfgfile *cfg, const char *sectionname) } static int -_add_entry(struct rte_cfgfile_section *section, const char *entryname, - const char *entryvalue) +_add_entry(struct rte_cfgfile *cfg, struct rte_cfgfile_section *section, + const char *entryname, const char *entryvalue, bool check_dup) { int name_len, value_len; @@ -115,6 +115,14 @@ _add_entry(struct rte_cfgfile_section *section, const char *entryname, return -EINVAL; } + if (check_dup) { + if (rte_cfgfile_has_entry(cfg, section->name, entryname) != 0) { + CFG_LOG(ERR, "duplicate entry name %s in section %s", + entryname, section->name); + return -EEXIST; + } + } + /* resize entry structure if we don't have room for more entries */ if (section->num_entries == section->allocated_entries) { struct rte_cfgfile_entry *n_entries = realloc( @@ -264,8 +272,9 @@ rte_cfgfile_load_with_params(const char *filename, int flags, if (cfg->num_sections == 0) goto error1; - ret = _add_entry(&cfg->sections[cfg->num_sections - 1], -split[0], split[1]); + ret = _add_entry(cfg, &cfg->sections[cfg->num_sections - 1], +split[0], split[1], +!!(flags & CFG_FLAG_UNIQUE_NAME)); if (ret != 0) goto error1; } @@ -286,7 +295,8 @@ rte_cfgfile_create(int flags) struct rte_cfgfile *cfg; /* future proof flags usage */ - if (flags & ~(CFG_FLAG_GLOBAL_SECTION | CFG_FLAG_EMPTY_VALUES)) + if (flags & ~(CFG_FLAG_GLOBAL_SECTION | CFG_FLAG_EMPTY_VALUES | + CFG_FLAG_UNIQUE_NAME)) return NULL; cfg = malloc(sizeof(*cfg)); @@ -356,6 +366,13 @@ rte_cfgfile_add_section(struct rte_cfgfile *cfg, const char *sectionname) return -EINVAL; } + if (cfg->flags & CFG_FLAG_UNIQUE_NAME) { + if (rte_cfgfile_has_section(cfg, sectionname) != 0) { + CFG_LOG(ERR, "duplicate section name %s", sectionname); + return -EEXIST; + } + } + /* resize overall struct if we don't have room for more sections */ if (cfg->num_sections == cfg->allocated_sections) { @@ -396,16 +413,13 @@ int rte_cfgfile_add_entry(struct rte_cfgfile *cfg, || (entryvalue == NULL)) return -EINVAL; - if (rte_cfgfile_has_entry(cfg, sectionname, entryname) != 0) - return -EEXIST; - /* search for section pointer by sectionname */ struct rte_cfgfile_section *curr_section = _get_section(cfg, sectionname); if (curr_section == NULL) return -EINVAL; - ret = _add_entry(curr_section, entryname, entryvalue); + ret = _add_entry(cfg, curr_section, entryname, entryvalue, true); return ret; } diff --git a/lib/cfgfile/rte_cfgfile.h b/lib/cfgfile/rte_cfgfile.h index 232c65c77b..74057c1b73 100644 --- a/lib/cfgfile/rte_cfgfile.h +++ b/lib/cfgfile/rte_cfgfile.h @@ -56,6 +56,13 @@ enum { * be zero length (e.g., "key="). */ CFG_FLAG_EMPTY_VALUES = 2, + + /** +* Indicates that file should have unique section name AND unique +* entry's name. If a duplicate name is detected, the operation will +* return error. +*/ + CFG_FLAG_UNIQUE_NAME = 4, }; /**@} */ -- 2.17.1
RE: [PATCH 1/1] net/mana: add vlan tagging support
> -Original Message- > From: Ferruh Yigit > Sent: Saturday, February 10, 2024 12:06 AM > To: Wei Hu ; andrew.rybche...@oktetlabs.ru; Thomas > Monjalon ; Long Li > Cc: dev@dpdk.org > Subject: Re: [PATCH 1/1] net/mana: add vlan tagging support > > On 2/9/2024 8:52 AM, Wei Hu wrote: > > For tx path, use LONG_PACKET_FORMAT if vlan tag is present. For rx, > > extract vlan id from oob, put into mbuf and set the vlan flags in > > mbuf. > > > > Also add myself to the maintainers list for vmbus, mana and netvsc. > > > > Signed-off-by: Wei Hu > > --- > > MAINTAINERS | 3 +++ > > drivers/net/mana/mana.h | 2 ++ > > drivers/net/mana/rx.c | 6 ++ > > drivers/net/mana/tx.c | 30 +++--- > > 4 files changed, 38 insertions(+), 3 deletions(-) > > > > diff --git a/MAINTAINERS b/MAINTAINERS index 5fb3a73f84..9983d013a6 > > 100644 > > --- a/MAINTAINERS > > +++ b/MAINTAINERS > > @@ -608,6 +608,7 @@ F: app/test/test_vdev.c > > > > VMBUS bus driver > > M: Long Li > > +M: Wei Hu > > F: drivers/bus/vmbus/ > > > > > > @@ -867,6 +868,7 @@ F: doc/guides/nics/features/mlx5.ini > > > > Microsoft mana > > M: Long Li > > +M: Wei Hu > > F: drivers/net/mana/ > > F: doc/guides/nics/mana.rst > > F: doc/guides/nics/features/mana.ini > > @@ -878,6 +880,7 @@ F: doc/guides/nics/vdev_netvsc.rst > > > > Microsoft Hyper-V netvsc > > M: Long Li > > +M: Wei Hu > > F: drivers/net/netvsc/ > > F: doc/guides/nics/netvsc.rst > > F: doc/guides/nics/features/netvsc.ini > > > > Hi Wei, > > Can you please separate the maintainer file update to its own patch? Sure. I will remove the maintainer file update and send it out later separately. Thanks, Wei
RE: [PATCH 1/1] net/mana: add vlan tagging support
> -Original Message- > From: Long Li > Sent: Saturday, February 10, 2024 2:48 AM > To: Wei Hu ; ferruh.yi...@amd.com; > andrew.rybche...@oktetlabs.ru; Thomas Monjalon > ; Alan Elder > Cc: dev@dpdk.org > Subject: RE: [PATCH 1/1] net/mana: add vlan tagging support > > > + if (oob->rx_vlan_tag_present) { > > + mbuf->ol_flags |= > > + RTE_MBUF_F_RX_VLAN | > > RTE_MBUF_F_RX_VLAN_STRIPPED; > > + mbuf->vlan_tci = oob->rx_vlan_id; > > + } > > + > > Netvsc has the following code for dealing with vlan on RX mbufs (in > hn_rxtx.c): > /* NDIS always strips tag, put it back if necessary */ > if (!hv->vlan_strip && rte_vlan_insert(&m)) { > > It seems we should do the same? Not sure if we want to do the same. Two reasons. 1. Searching the netvsc source, I don't see a place that it set hv->vlan_strip to false. It means !hv->vlan_string is always false, and rte_vlan_insert(&m) never run. 2. Usually vlan_strip can be set to true or false if the hardware supports this feature. In the mana case, the hardware strips off the vlan tag anyway. There is no way to tell the mana hardware to keep the tag. Adding the tag back by software not only slows things down, but it also complicates the code and test. Not sure if there is any real application needs it. I am open to add it. But in my opinion, we don't need it. Let me know what you think. > > > pkts[pkt_received++] = mbuf; > > rxq->stats.packets++; > > rxq->stats.bytes += mbuf->data_len; diff --git > > a/drivers/net/mana/tx.c b/drivers/net/mana/tx.c index > > 58c4a1d976..f075fcb0f5 100644 > > --- a/drivers/net/mana/tx.c > > +++ b/drivers/net/mana/tx.c > > @@ -180,6 +180,15 @@ get_vsq_frame_num(uint32_t vsq) > > return v.vsq_frame; > > } > > > > +#define VLAN_PRIO_MASK 0xe000 /* Priority Code Point */ > > +#define VLAN_PRIO_SHIFT13 > > +#define VLAN_CFI_MASK 0x1000 /* Canonical Format Indicator > > / Drop Eligible Indicator */ > > +#define VLAN_VID_MASK 0x0fff /* VLAN Identifier */ > > + > > +#define mana_mbuf_vlan_tag_get_id(m) ((m)->vlan_tci & > > VLAN_VID_MASK) > > +#define mana_mbuf_vlan_tag_get_cfi(m) (!!((m)->vlan_tci & > > VLAN_CFI_MASK)) > > +#define mana_mbuf_vlan_tag_get_prio(m) (((m)->vlan_tci & > > VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT) > > + > > Those definitions look like those in @Alan Elder's patch for netvsc. Can we > consolidate some of those definitions into a common place? > > Maybe in "lib/net/rte_ether.h"? > Ok. Will add it to rte_ether.h. Thanks, Wei > Thanks, > > Long
RE: [PATCH] net/nfp: add support of UDP fragmentation offload
> On 2/17/2024 1:54 AM, Chaoyong He wrote: > > Add the support of UDP fragmentation offload feature. > > > > Signed-off-by: Chaoyong He > > Reviewed-by: Long Wu > > Reviewed-by: Peng Zhang > > <...> > > > diff --git a/drivers/net/nfp/nfp_net_common.c > > b/drivers/net/nfp/nfp_net_common.c > > index 72c9a41b00..99c319eb2d 100644 > > --- a/drivers/net/nfp/nfp_net_common.c > > +++ b/drivers/net/nfp/nfp_net_common.c > > @@ -312,7 +312,7 @@ nfp_net_log_device_information(const struct > nfp_net_hw *hw) > > hw->ver.major, hw->ver.minor, hw->max_mtu); > > > > PMD_INIT_LOG(INFO, "CAP: %#x", cap); > > - PMD_INIT_LOG(INFO, > "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", > > + PMD_INIT_LOG(INFO, > > > +"%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s > %s", > > > > This seems getting out of hand, I assume this is done like this (instead of > multiple > print lines) to prevent line break, if so what do you think add a new macro > that > doesn't append \n automatically and refactor this code (in a separate patch) ? Yeah, that's a good point, thanks! I will add it into my to-do list, and send a patch at the suitable time.