Re: [RESEND v7 1/3] ring: fix unmatched type definition and usage

2024-02-19 Thread Jie Hai

On 2024/2/19 2:11, Thomas Monjalon wrote:

09/11/2023 11:20, Jie Hai:

Field 'flags' of struct rte_ring is defined as int type. However,
it is used as unsigned int. To ensure consistency, change the
type of flags to unsigned int. Since these two types has the
same byte size, this change is not an ABI change.

Fixes: af75078fece3 ("first public release")

Signed-off-by: Jie Hai 
Acked-by: Konstantin Ananyev 
Acked-by: Chengwen Feng 
Acked-by: Morten Brørup 
---
  lib/ring/rte_ring_core.h | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/ring/rte_ring_core.h b/lib/ring/rte_ring_core.h
index b7708730658a..14dac6495d83 100644
--- a/lib/ring/rte_ring_core.h
+++ b/lib/ring/rte_ring_core.h
@@ -119,7 +119,7 @@ struct rte_ring_hts_headtail {
  struct rte_ring {
char name[RTE_RING_NAMESIZE] __rte_cache_aligned;
/**< Name of the ring. */
-   int flags;   /**< Flags supplied at creation. */
+   uint32_t flags;   /**< Flags supplied at creation. */


This triggers a warning in our ABI checker:

   in pointed to type 'struct rte_ring' at rte_ring_core.h:119:1:
 type size hasn't changed
 1 data member change:
   type of 'int flags' changed:
 entity changed from 'int' to compatible type 'typedef uint32_t' at 
stdint-uintn.h:26:1
   type name changed from 'int' to 'unsigned int'
   type size hasn't changed

I guess we were supposed to merge this in 23.11, sorry about this.

How can we proceed?


How about we drop this amendment (patch 1/3) for now?


.


[PATCH v8 2/2] ring: add telemetry cmd for ring info

2024-02-19 Thread Jie Hai
This patch supports dump of ring information by its name.
An example using this command is shown below:

--> /ring/info,MP_mb_pool_0
{
  "/ring/info": {
"name": "MP_mb_pool_0",
"socket": 0,
"flags": 0,
"producer_type": "MP",
"consumer_type": "MC",
"size": 262144,
"mask": "0x3",
"capacity": 262143,
"used_count": 153197,
"mz_name": "RG_MP_mb_pool_0",
"mz_len": 2097536,
"mz_hugepage_sz": 1073741824,
"mz_socket_id": 0,
"mz_flags": "0x0"
  }
}

Signed-off-by: Jie Hai 
Reviewed-by: Honnappa Nagarahalli 
Acked-by: Konstantin Ananyev 
Acked-by: Huisong Li 
Acked-by: Chengwen Feng 
Acked-by: Morten Brørup 
---
 lib/ring/rte_ring.c | 95 +
 1 file changed, 95 insertions(+)

diff --git a/lib/ring/rte_ring.c b/lib/ring/rte_ring.c
index 75f53c723186..e6c746cce1f1 100644
--- a/lib/ring/rte_ring.c
+++ b/lib/ring/rte_ring.c
@@ -458,8 +458,103 @@ ring_handle_list(const char *cmd __rte_unused,
return 0;
 }
 
+static const char *
+ring_prod_sync_type_to_name(struct rte_ring *r)
+{
+   switch (r->prod.sync_type) {
+   case RTE_RING_SYNC_MT:
+   return "MP";
+   case RTE_RING_SYNC_ST:
+   return "SP";
+   case RTE_RING_SYNC_MT_RTS:
+   return "MP_RTS";
+   case RTE_RING_SYNC_MT_HTS:
+   return "MP_HTS";
+   default:
+   return "Unknown";
+   }
+}
+
+static const char *
+ring_cons_sync_type_to_name(struct rte_ring *r)
+{
+   switch (r->cons.sync_type) {
+   case RTE_RING_SYNC_MT:
+   return "MC";
+   case RTE_RING_SYNC_ST:
+   return "SC";
+   case RTE_RING_SYNC_MT_RTS:
+   return "MC_RTS";
+   case RTE_RING_SYNC_MT_HTS:
+   return "MC_HTS";
+   default:
+   return "Unknown";
+   }
+}
+
+struct ring_info_cb_arg {
+   char *ring_name;
+   struct rte_tel_data *d;
+};
+
+static void
+ring_info_cb(struct rte_ring *r, void *arg)
+{
+   struct ring_info_cb_arg *ring_arg = (struct ring_info_cb_arg *)arg;
+   struct rte_tel_data *d = ring_arg->d;
+   const struct rte_memzone *mz;
+
+   if (strncmp(r->name, ring_arg->ring_name, RTE_RING_NAMESIZE))
+   return;
+
+   rte_tel_data_add_dict_string(d, "name", r->name);
+   rte_tel_data_add_dict_int(d, "socket", r->memzone->socket_id);
+   rte_tel_data_add_dict_int(d, "flags", r->flags);
+   rte_tel_data_add_dict_string(d, "producer_type",
+   ring_prod_sync_type_to_name(r));
+   rte_tel_data_add_dict_string(d, "consumer_type",
+   ring_cons_sync_type_to_name(r));
+   rte_tel_data_add_dict_uint(d, "size", r->size);
+   rte_tel_data_add_dict_uint_hex(d, "mask", r->mask, 0);
+   rte_tel_data_add_dict_uint(d, "capacity", r->capacity);
+   rte_tel_data_add_dict_uint(d, "used_count", rte_ring_count(r));
+
+   mz = r->memzone;
+   if (mz == NULL)
+   return;
+   rte_tel_data_add_dict_string(d, "mz_name", mz->name);
+   rte_tel_data_add_dict_uint(d, "mz_len", mz->len);
+   rte_tel_data_add_dict_uint(d, "mz_hugepage_sz", mz->hugepage_sz);
+   rte_tel_data_add_dict_int(d, "mz_socket_id", mz->socket_id);
+   rte_tel_data_add_dict_uint_hex(d, "mz_flags", mz->flags, 0);
+}
+
+static int
+ring_handle_info(const char *cmd __rte_unused, const char *params,
+   struct rte_tel_data *d)
+{
+   char name[RTE_RING_NAMESIZE] = {0};
+   struct ring_info_cb_arg ring_arg;
+
+   if (params == NULL || strlen(params) == 0 ||
+   strlen(params) >= RTE_RING_NAMESIZE)
+   return -EINVAL;
+
+   rte_strlcpy(name, params, RTE_RING_NAMESIZE);
+
+   ring_arg.ring_name = name;
+   ring_arg.d = d;
+
+   rte_tel_data_start_dict(d);
+   ring_walk(ring_info_cb, &ring_arg);
+
+   return 0;
+}
+
 RTE_INIT(ring_init_telemetry)
 {
rte_telemetry_register_cmd("/ring/list", ring_handle_list,
"Returns list of available rings. Takes no parameters");
+   rte_telemetry_register_cmd("/ring/info", ring_handle_info,
+   "Returns ring info. Parameters: ring_name.");
 }
-- 
2.30.0



[PATCH v8 1/2] ring: add telemetry cmd to list rings

2024-02-19 Thread Jie Hai
Add a telemetry command to list the rings used in the system.
An example using this command is shown below:

--> /ring/list
{
  "/ring/list": [
"HT_:7d:00.2",
"MP_mb_pool_0"
  ]
}

Signed-off-by: Jie Hai 
Acked-by: Konstantin Ananyev 
Reviewed-by: Honnappa Nagarahalli 
Acked-by: Huisong Li 
Acked-by: Chengwen Feng 
Acked-by: Morten Brørup 
---
 lib/ring/meson.build |  1 +
 lib/ring/rte_ring.c  | 40 
 2 files changed, 41 insertions(+)

diff --git a/lib/ring/meson.build b/lib/ring/meson.build
index c20685c689ac..7fca958ed7fa 100644
--- a/lib/ring/meson.build
+++ b/lib/ring/meson.build
@@ -18,3 +18,4 @@ indirect_headers += files (
 'rte_ring_rts.h',
 'rte_ring_rts_elem_pvt.h',
 )
+deps += ['telemetry']
diff --git a/lib/ring/rte_ring.c b/lib/ring/rte_ring.c
index c59f62626383..75f53c723186 100644
--- a/lib/ring/rte_ring.c
+++ b/lib/ring/rte_ring.c
@@ -22,6 +22,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "rte_ring.h"
 #include "rte_ring_elem.h"
@@ -423,3 +424,42 @@ rte_ring_lookup(const char *name)
 
return r;
 }
+
+static void
+ring_walk(void (*func)(struct rte_ring *, void *), void *arg)
+{
+   struct rte_ring_list *ring_list;
+   struct rte_tailq_entry *tailq_entry;
+
+   ring_list = RTE_TAILQ_CAST(rte_ring_tailq.head, rte_ring_list);
+   rte_mcfg_tailq_read_lock();
+
+   TAILQ_FOREACH(tailq_entry, ring_list, next) {
+   (*func)((struct rte_ring *) tailq_entry->data, arg);
+   }
+
+   rte_mcfg_tailq_read_unlock();
+}
+
+static void
+ring_list_cb(struct rte_ring *r, void *arg)
+{
+   struct rte_tel_data *d = (struct rte_tel_data *)arg;
+
+   rte_tel_data_add_array_string(d, r->name);
+}
+
+static int
+ring_handle_list(const char *cmd __rte_unused,
+   const char *params __rte_unused, struct rte_tel_data *d)
+{
+   rte_tel_data_start_array(d, RTE_TEL_STRING_VAL);
+   ring_walk(ring_list_cb, d);
+   return 0;
+}
+
+RTE_INIT(ring_init_telemetry)
+{
+   rte_telemetry_register_cmd("/ring/list", ring_handle_list,
+   "Returns list of available rings. Takes no parameters");
+}
-- 
2.30.0



[PATCH v8 0/2] add telemetry cmds for ring

2024-02-19 Thread Jie Hai
This patch set supports telemetry cmd to list rings and dump information
of a ring by its name.

v1->v2:
1. Add space after "switch".
2. Fix wrong strlen parameter.

v2->v3:
1. Remove prefix "rte_" for static function.
2. Add Acked-by Konstantin Ananyev for PATCH 1.
3. Introduce functions to return strings instead copy strings.
4. Check pointer to memzone of ring.
5. Remove redundant variable.
6. Hold lock when access ring data.

v3->v4:
1. Update changelog according to reviews of Honnappa Nagarahalli.
2. Add Reviewed-by Honnappa Nagarahalli.
3. Correct grammar in help information.
4. Correct spell warning on "te" reported by checkpatch.pl.
5. Use ring_walk() to query ring info instead of rte_ring_lookup().
6. Fix that type definition the flag field of rte_ring does not match the usage.
7. Use rte_tel_data_add_dict_uint_hex instead of rte_tel_data_add_dict_u64
   for mask and flags.

v4->v5:
1. Add Acked-by Konstantin Ananyev and Chengwen Feng.
2. Add ABI change explanation for commit message of patch 1/3.

v5->v6:
1. Add Acked-by Morten Br?rup.
2. Fix incorrect reference of commit.

v6->v7:
1. Remove prod/consumer head/tail info.

v7->v8:
1. Drop patch 1/3 and related codes.

Jie Hai (2):
  ring: add telemetry cmd to list rings
  ring: add telemetry cmd for ring info

 lib/ring/meson.build |   1 +
 lib/ring/rte_ring.c  | 135 +++
 2 files changed, 136 insertions(+)

-- 
2.30.0



[PATCH v7 0/2] fix parsing of VLAN metadata

2024-02-19 Thread Alan Elder
The previous netvsc code incorrectly parsed the VLAN ID and priority.
If the 16-bits of VLAN ID and priority/CFI on the wire was
0123456789ABCDEF the code parsed it as 456789ABCDEF3012.  This patch
fixes netvsc parsing code and adds common macros for extracting and
setting parts of the VLAN tag.

Alan Elder (2):
  lib/net: fix parsing of VLAN metadata
  net/netvsc: fix parsing of VLAN metadata

---
v7:
* Split patches for lib and driver

v6:
* Line length can be 100 - un-split lines

v5:
* Move the VLAN parsing macros to rte_ether.h

v4:
* Make consistent with FreeBSD code

---
.mailmap |  1 +
 drivers/net/netvsc/hn_rxtx.c |  8 ++--
 lib/net/rte_ether.h  | 14 ++
 3 files changed, 21 insertions(+), 2 deletions(-)

-- 
2.25.1



[PATCH v7 1/2] lib/net: fix parsing of VLAN metadata

2024-02-19 Thread Alan Elder
Add common macros for extracting parts of VLAN tag.

Fixes: 4e9c73e96e83 ("net/netvsc: add Hyper-V network device")
Cc: sthem...@microsoft.com
Cc: sta...@dpdk.org

Signed-off-by: Alan Elder 
---
v7:
* Split patches for lib and driver

v6:
* Line length can be 100 - un-split lines

v5:
* Move the VLAN parsing macros to rte_ether.h

v4:
* Make consistent with FreeBSD code

---
 .mailmap|  1 +
 lib/net/rte_ether.h | 14 ++
 2 files changed, 15 insertions(+)

diff --git a/.mailmap b/.mailmap
index de339562f4..08fce0c472 100644
--- a/.mailmap
+++ b/.mailmap
@@ -33,6 +33,7 @@ Alain Leon 
 Alan Brady 
 Alan Carew 
 Alan Dewar  
+Alan Elder 
 Alan Liu 
 Alan Winkowski 
 Alejandro Lucero 
diff --git a/lib/net/rte_ether.h b/lib/net/rte_ether.h
index ce073ea818..75285bdd12 100644
--- a/lib/net/rte_ether.h
+++ b/lib/net/rte_ether.h
@@ -46,6 +46,20 @@ extern "C" {
 
 #define RTE_ETHER_MIN_MTU 68 /**< Minimum MTU for IPv4 packets, see RFC 791. */
 
+/* VLAN header fields */
+#define RTE_VLAN_DEI_SHIFT 12
+#define RTE_VLAN_PRI_SHIFT 13
+#define RTE_VLAN_PRI_MASK  0xe000 /* Priority Code Point */
+#define RTE_VLAN_DEI_MASK  0x1000 /* Drop Eligible Indicator */
+#define RTE_VLAN_ID_MASK   0x0fff /* VLAN Identifier */
+
+#define RTE_VLAN_TCI_ID(vlan_tci)  ((vlan_tci) & RTE_VLAN_ID_MASK)
+#define RTE_VLAN_TCI_PRI(vlan_tci) (((vlan_tci) & RTE_VLAN_PRI_MASK) >> 
RTE_VLAN_PRI_SHIFT)
+#define RTE_VLAN_TCI_DEI(vlan_tci) (((vlan_tci) & RTE_VLAN_DEI_MASK) >> 
RTE_VLAN_DEI_SHIFT)
+#define RTE_VLAN_TCI_MAKE(id, pri, dei)((id) | 
\
+((pri) << RTE_VLAN_PRI_SHIFT) |
\
+((dei) << RTE_VLAN_DEI_SHIFT))
+
 /**
  * Ethernet address:
  * A universally administered address is uniquely assigned to a device by its
-- 
2.25.1



[PATCH v7 2/2] net/netvsc: fix parsing of VLAN metadata

2024-02-19 Thread Alan Elder
The previous code incorrectly parsed the VLAN ID and priority.
If the 16-bits of VLAN ID and priority/CFI on the wire was
0123456789ABCDEF the code parsed it as 456789ABCDEF3012.  There
were macros defined to handle this conversion but they were not
used.

Fixes: 4e9c73e96e83 ("net/netvsc: add Hyper-V network device")
Cc: sthem...@microsoft.com
Cc: sta...@dpdk.org

Signed-off-by: Alan Elder 
---
v7:
* Split into two patches, one for lib and one for driver

v6:
* Line length can be 100 - un-split lines

v5:
* Move the VLAN parsing macros to rte_ether.h

v4:
* Make consistent with FreeBSD code

---
 drivers/net/netvsc/hn_rxtx.c | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/net/netvsc/hn_rxtx.c b/drivers/net/netvsc/hn_rxtx.c
index e4f5015aa3..9bf1ec5509 100644
--- a/drivers/net/netvsc/hn_rxtx.c
+++ b/drivers/net/netvsc/hn_rxtx.c
@@ -612,7 +612,9 @@ static void hn_rxpkt(struct hn_rx_queue *rxq, struct 
hn_rx_bufinfo *rxb,
   RTE_PTYPE_L4_MASK);
 
if (info->vlan_info != HN_NDIS_VLAN_INFO_INVALID) {
-   m->vlan_tci = info->vlan_info;
+   m->vlan_tci = 
RTE_VLAN_TCI_MAKE(NDIS_VLAN_INFO_ID(info->vlan_info),
+   
NDIS_VLAN_INFO_PRI(info->vlan_info),
+   
NDIS_VLAN_INFO_CFI(info->vlan_info));
m->ol_flags |= RTE_MBUF_F_RX_VLAN_STRIPPED | RTE_MBUF_F_RX_VLAN;
 
/* NDIS always strips tag, put it back if necessary */
@@ -1332,7 +1334,9 @@ static void hn_encap(struct rndis_packet_msg *pkt,
if (m->ol_flags & RTE_MBUF_F_TX_VLAN) {
pi_data = hn_rndis_pktinfo_append(pkt, NDIS_VLAN_INFO_SIZE,
  NDIS_PKTINFO_TYPE_VLAN);
-   *pi_data = m->vlan_tci;
+   *pi_data = NDIS_VLAN_INFO_MAKE(RTE_VLAN_TCI_ID(m->vlan_tci),
+  RTE_VLAN_TCI_PRI(m->vlan_tci),
+  RTE_VLAN_TCI_DEI(m->vlan_tci));
}
 
if (m->ol_flags & RTE_MBUF_F_TX_TCP_SEG) {
-- 
2.25.1



Re: [PATCH] net/gve: Change ERR to DEBUG to prevent flooding of logs for Tx-Dqo.

2024-02-19 Thread Ferruh Yigit
On 2/19/2024 2:44 AM, Rushil Gupta wrote:
> This was causing failure for testpmd runs (for queues >=15)
> presumably due to flooding of logs due to descriptor ring being
> overwritten.
> 
> Fixes: a01854 ("net/gve: fix dqo bug for chained descriptors")
> Cc: sta...@dpdk.org
> 
> Signed-off-by: Rushil Gupta 
> Reviewed-by: Joshua Washington 
> ---
>  drivers/net/gve/gve_tx_dqo.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/net/gve/gve_tx_dqo.c b/drivers/net/gve/gve_tx_dqo.c
> index 1a8eb96ea9..30a1455b20 100644
> --- a/drivers/net/gve/gve_tx_dqo.c
> +++ b/drivers/net/gve/gve_tx_dqo.c
> @@ -116,7 +116,7 @@ gve_tx_burst_dqo(void *tx_queue, struct rte_mbuf 
> **tx_pkts, uint16_t nb_pkts)
>   first_sw_id = sw_id;
>   do {
>   if (sw_ring[sw_id] != NULL)
> - PMD_DRV_LOG(ERR, "Overwriting an entry in 
> sw_ring");
> + PMD_DRV_LOG(DEBUG, "Overwriting an entry in 
> sw_ring");
>  
>   txd = &txr[tx_id];
>   sw_ring[sw_id] = tx_pkt;

Hi Rushil,

I will continue with this patch, BUT
logging in the datapath has potential problems like this, also it may
have performance impact even log is not printed, because of additional
check in the log function.

For datapath, you can prefer:
- Add log macros controlled by RTE_ETHDEV_DEBUG_RX & RTE_ETHDEV_DEBUG_TX
flags
- Or use RTE_LOG_DP() macro which is compiled out if default log level
is less than the log uses


Also another perspective for the logs, when end-user observes this bloat
of logs, what she can do?
Can she change some driver arguments or environment variables to fix the
issue, if not what is the point of the log?
If this is a condition that can occur dynamically based on received
traffic and user doesn't have control on it, maybe driver should handle
the error without log?
And if this is a log for driver developer, perhaps it should be assert
or similar that is disabled in the release build?


RE: [EXTERNAL] Re: [PATCH v6] net/netvsc: fix parsing of VLAN metadata

2024-02-19 Thread Alan Elder
On 2/16/2024 11:40 AM, Ferruh Yigit:
> I missed v6 but put some comment on v5, briefly can you please split the 
> patch for lib/net and driver?

I've tried to split the patch - hopefully got the formatting right - please let 
me know if not (it's my first time submitting a patch and I don't have all the 
tooling set up)


[RFC v2 0/5] Lcore variables

2024-02-19 Thread Mattias Rönnblom
This RFC presents a new API  for static per-lcore id
data allocation.

Please refer to the  API documentation for both a
rationale for this new API, and a comparison to the alternatives
available.

The adoption of this API would affect many different DPDK modules, but
the author updated only a few, mostly to serve as examples in this
RFC, and to iron out some, but surely not all, wrinkles in the API.

The question on how to best allocate static per-lcore memory has been
up several times on the dev mailing list, for example in the thread on
"random: use per lcore state" RFC by Stephen Hemminger.

Lcore variables are surely not the answer to all your per-lcore-data
needs, since it only allows for more-or-less static allocation. In the
author's opinion, it does however provide a reasonably simple and
clean and seemingly very much performant solution to a real problem.

One thing is unclear to the author is how this API relates to
potential future per-lcore dynamic allocator (e.g., a per-lcore heap).

Contrary to what the version.map edit suggests, this RFC is not meant
for a proposal for DPDK 24.03.

Mattias Rönnblom (5):
  eal: add static per-lcore memory allocation facility
  eal: add lcore variable test suite
  random: keep PRNG state in lcore variable
  power: keep per-lcore state in lcore variable
  service: keep per-lcore state in lcore variable

 app/test/meson.build  |   1 +
 app/test/test_lcore_var.c | 408 ++
 config/rte_config.h   |   1 +
 doc/api/doxy-api-index.md |   1 +
 lib/eal/common/eal_common_lcore_var.c |  82 ++
 lib/eal/common/meson.build|   1 +
 lib/eal/common/rte_random.c   |  30 +-
 lib/eal/common/rte_service.c  | 119 
 lib/eal/include/meson.build   |   1 +
 lib/eal/include/rte_lcore_var.h   | 374 +++
 lib/eal/version.map   |   4 +
 lib/power/rte_power_pmd_mgmt.c|  27 +-
 12 files changed, 973 insertions(+), 76 deletions(-)
 create mode 100644 app/test/test_lcore_var.c
 create mode 100644 lib/eal/common/eal_common_lcore_var.c
 create mode 100644 lib/eal/include/rte_lcore_var.h

-- 
2.34.1



[RFC v2 3/5] random: keep PRNG state in lcore variable

2024-02-19 Thread Mattias Rönnblom
Replace keeping PRNG state in a RTE_MAX_LCORE-sized static array of
cache-aligned and RTE_CACHE_GUARDed struct instances with keeping the
same state in a more cache-friendly lcore variable.

Signed-off-by: Mattias Rönnblom 
---
 lib/eal/common/rte_random.c | 30 ++
 1 file changed, 18 insertions(+), 12 deletions(-)

diff --git a/lib/eal/common/rte_random.c b/lib/eal/common/rte_random.c
index 7709b8f2c6..af9fffd81b 100644
--- a/lib/eal/common/rte_random.c
+++ b/lib/eal/common/rte_random.c
@@ -11,6 +11,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 struct rte_rand_state {
@@ -19,14 +20,12 @@ struct rte_rand_state {
uint64_t z3;
uint64_t z4;
uint64_t z5;
-   RTE_CACHE_GUARD;
-} __rte_cache_aligned;
+};
 
-/* One instance each for every lcore id-equipped thread, and one
- * additional instance to be shared by all others threads (i.e., all
- * unregistered non-EAL threads).
- */
-static struct rte_rand_state rand_states[RTE_MAX_LCORE + 1];
+RTE_LCORE_VAR_HANDLE(struct rte_rand_state, rand_state);
+
+/* instance to be shared by all unregistered non-EAL threads */
+static struct rte_rand_state unregistered_rand_state __rte_cache_aligned;
 
 static uint32_t
 __rte_rand_lcg32(uint32_t *seed)
@@ -85,8 +84,14 @@ rte_srand(uint64_t seed)
unsigned int lcore_id;
 
/* add lcore_id to seed to avoid having the same sequence */
-   for (lcore_id = 0; lcore_id < RTE_DIM(rand_states); lcore_id++)
-   __rte_srand_lfsr258(seed + lcore_id, &rand_states[lcore_id]);
+   for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+   struct rte_rand_state *lcore_state =
+   RTE_LCORE_VAR_LCORE_PTR(lcore_id, rand_state);
+
+   __rte_srand_lfsr258(seed + lcore_id, lcore_state);
+   }
+
+   __rte_srand_lfsr258(seed + lcore_id, &unregistered_rand_state);
 }
 
 static __rte_always_inline uint64_t
@@ -124,11 +129,10 @@ struct rte_rand_state *__rte_rand_get_state(void)
 
idx = rte_lcore_id();
 
-   /* last instance reserved for unregistered non-EAL threads */
if (unlikely(idx == LCORE_ID_ANY))
-   idx = RTE_MAX_LCORE;
+   return &unregistered_rand_state;
 
-   return &rand_states[idx];
+   return RTE_LCORE_VAR_PTR(rand_state);
 }
 
 uint64_t
@@ -228,6 +232,8 @@ RTE_INIT(rte_rand_init)
 {
uint64_t seed;
 
+   RTE_LCORE_VAR_ALLOC(rand_state);
+
seed = __rte_random_initial_seed();
 
rte_srand(seed);
-- 
2.34.1



[RFC v2 4/5] power: keep per-lcore state in lcore variable

2024-02-19 Thread Mattias Rönnblom
Signed-off-by: Mattias Rönnblom 
---
 lib/power/rte_power_pmd_mgmt.c | 27 ++-
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/lib/power/rte_power_pmd_mgmt.c b/lib/power/rte_power_pmd_mgmt.c
index 591fc69f36..bb20e564de 100644
--- a/lib/power/rte_power_pmd_mgmt.c
+++ b/lib/power/rte_power_pmd_mgmt.c
@@ -5,6 +5,7 @@
 #include 
 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -68,8 +69,8 @@ struct pmd_core_cfg {
/**< Number of queues ready to enter power optimized state */
uint64_t sleep_target;
/**< Prevent a queue from triggering sleep multiple times */
-} __rte_cache_aligned;
-static struct pmd_core_cfg lcore_cfgs[RTE_MAX_LCORE];
+};
+static RTE_LCORE_VAR_HANDLE(struct pmd_core_cfg, lcore_cfgs);
 
 static inline bool
 queue_equal(const union queue *l, const union queue *r)
@@ -252,12 +253,11 @@ clb_multiwait(uint16_t port_id __rte_unused, uint16_t 
qidx __rte_unused,
struct rte_mbuf **pkts __rte_unused, uint16_t nb_rx,
uint16_t max_pkts __rte_unused, void *arg)
 {
-   const unsigned int lcore = rte_lcore_id();
struct queue_list_entry *queue_conf = arg;
struct pmd_core_cfg *lcore_conf;
const bool empty = nb_rx == 0;
 
-   lcore_conf = &lcore_cfgs[lcore];
+   lcore_conf = RTE_LCORE_VAR_PTR(lcore_cfgs);
 
/* early exit */
if (likely(!empty))
@@ -317,13 +317,12 @@ clb_pause(uint16_t port_id __rte_unused, uint16_t qidx 
__rte_unused,
struct rte_mbuf **pkts __rte_unused, uint16_t nb_rx,
uint16_t max_pkts __rte_unused, void *arg)
 {
-   const unsigned int lcore = rte_lcore_id();
struct queue_list_entry *queue_conf = arg;
struct pmd_core_cfg *lcore_conf;
const bool empty = nb_rx == 0;
uint32_t pause_duration = rte_power_pmd_mgmt_get_pause_duration();
 
-   lcore_conf = &lcore_cfgs[lcore];
+   lcore_conf = RTE_LCORE_VAR_PTR(lcore_cfgs);
 
if (likely(!empty))
/* early exit */
@@ -358,9 +357,8 @@ clb_scale_freq(uint16_t port_id __rte_unused, uint16_t qidx 
__rte_unused,
struct rte_mbuf **pkts __rte_unused, uint16_t nb_rx,
uint16_t max_pkts __rte_unused, void *arg)
 {
-   const unsigned int lcore = rte_lcore_id();
const bool empty = nb_rx == 0;
-   struct pmd_core_cfg *lcore_conf = &lcore_cfgs[lcore];
+   struct pmd_core_cfg *lcore_conf = RTE_LCORE_VAR_PTR(lcore_cfgs);
struct queue_list_entry *queue_conf = arg;
 
if (likely(!empty)) {
@@ -518,7 +516,7 @@ rte_power_ethdev_pmgmt_queue_enable(unsigned int lcore_id, 
uint16_t port_id,
goto end;
}
 
-   lcore_cfg = &lcore_cfgs[lcore_id];
+   lcore_cfg = RTE_LCORE_VAR_LCORE_PTR(lcore_id, lcore_cfgs);
 
/* check if other queues are stopped as well */
ret = cfg_queues_stopped(lcore_cfg);
@@ -619,7 +617,7 @@ rte_power_ethdev_pmgmt_queue_disable(unsigned int lcore_id,
}
 
/* no need to check queue id as wrong queue id would not be enabled */
-   lcore_cfg = &lcore_cfgs[lcore_id];
+   lcore_cfg = RTE_LCORE_VAR_LCORE_PTR(lcore_id, lcore_cfgs);
 
/* check if other queues are stopped as well */
ret = cfg_queues_stopped(lcore_cfg);
@@ -772,10 +770,13 @@ RTE_INIT(rte_power_ethdev_pmgmt_init) {
size_t i;
int j;
 
+   RTE_LCORE_VAR_ALLOC(lcore_cfgs);
+
/* initialize all tailqs */
-   for (i = 0; i < RTE_DIM(lcore_cfgs); i++) {
-   struct pmd_core_cfg *cfg = &lcore_cfgs[i];
-   TAILQ_INIT(&cfg->head);
+   for (i = 0; i < RTE_MAX_LCORE; i++) {
+   struct pmd_core_cfg *lcore_cfg =
+   RTE_LCORE_VAR_LCORE_PTR(i, lcore_cfgs);
+   TAILQ_INIT(&lcore_cfg->head);
}
 
/* initialize config defaults */
-- 
2.34.1



[RFC v2 2/5] eal: add lcore variable test suite

2024-02-19 Thread Mattias Rönnblom
RFC v2:
 * Improve alignment-related test coverage.

Signed-off-by: Mattias Rönnblom 
---
 app/test/meson.build  |   1 +
 app/test/test_lcore_var.c | 408 ++
 2 files changed, 409 insertions(+)
 create mode 100644 app/test/test_lcore_var.c

diff --git a/app/test/meson.build b/app/test/meson.build
index 6389ae83ee..93412cce51 100644
--- a/app/test/meson.build
+++ b/app/test/meson.build
@@ -101,6 +101,7 @@ source_file_deps = {
 'test_ipsec_sad.c': ['ipsec'],
 'test_kvargs.c': ['kvargs'],
 'test_latencystats.c': ['ethdev', 'latencystats', 'metrics'] + 
sample_packet_forward_deps,
+'test_lcore_var.c': [],
 'test_lcores.c': [],
 'test_link_bonding.c': ['ethdev', 'net_bond',
 'net'] + packet_burst_generator_deps + virtual_pmd_deps,
diff --git a/app/test/test_lcore_var.c b/app/test/test_lcore_var.c
new file mode 100644
index 00..310d32e10d
--- /dev/null
+++ b/app/test/test_lcore_var.c
@@ -0,0 +1,408 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2024 Ericsson AB
+ */
+
+#include 
+#include 
+#include 
+
+#include 
+#include 
+#include 
+
+#include "test.h"
+
+#define MIN_LCORES 2
+
+RTE_LCORE_VAR_HANDLE(int, test_int);
+RTE_LCORE_VAR_HANDLE(char, test_char);
+RTE_LCORE_VAR_HANDLE(long, test_long_sized);
+RTE_LCORE_VAR_HANDLE(short, test_short);
+RTE_LCORE_VAR_HANDLE(long, test_long_sized_aligned);
+
+struct int_checker_state {
+   int old_value;
+   int new_value;
+   bool success;
+};
+
+static bool
+rand_bool(void)
+{
+   return rte_rand() & 1;
+}
+
+static void
+rand_blk(void *blk, size_t size)
+{
+   size_t i;
+
+   for (i = 0; i < size; i++)
+   ((unsigned char *)blk)[i] = (unsigned char)rte_rand();
+}
+
+static bool
+is_ptr_aligned(const void *ptr, size_t align)
+{
+   return ptr != NULL ? (uintptr_t)ptr % align == 0 : false;
+}
+
+static int
+check_int(void *arg)
+{
+   struct int_checker_state *state = arg;
+
+   int *ptr = RTE_LCORE_VAR_PTR(test_int);
+
+   bool naturally_aligned = is_ptr_aligned(ptr, sizeof(int));
+
+   bool equal;
+
+   if (rand_bool())
+   equal = RTE_LCORE_VAR_GET(test_int) == state->old_value;
+   else
+   equal = *(RTE_LCORE_VAR_PTR(test_int)) == state->old_value;
+
+   state->success = equal && naturally_aligned;
+
+   if (rand_bool())
+   RTE_LCORE_VAR_SET(test_int, state->new_value);
+   else
+   *ptr = state->new_value;
+
+   return 0;
+}
+
+RTE_LCORE_VAR_INIT(test_int);
+RTE_LCORE_VAR_INIT(test_char);
+RTE_LCORE_VAR_INIT_SIZE(test_long_sized, 32);
+RTE_LCORE_VAR_INIT(test_short);
+RTE_LCORE_VAR_INIT_SIZE_ALIGN(test_long_sized_aligned, sizeof(long),
+ RTE_CACHE_LINE_SIZE);
+
+static int
+test_int_lvar(void)
+{
+   unsigned int lcore_id;
+
+   struct int_checker_state states[RTE_MAX_LCORE] = {};
+
+   RTE_LCORE_FOREACH_WORKER(lcore_id) {
+   struct int_checker_state *state = &states[lcore_id];
+
+   state->old_value = (int)rte_rand();
+   state->new_value = (int)rte_rand();
+
+   RTE_LCORE_VAR_LCORE_SET(lcore_id, test_int, state->old_value);
+   }
+
+   RTE_LCORE_FOREACH_WORKER(lcore_id)
+   rte_eal_remote_launch(check_int, &states[lcore_id], lcore_id);
+
+   rte_eal_mp_wait_lcore();
+
+   RTE_LCORE_FOREACH_WORKER(lcore_id) {
+   struct int_checker_state *state = &states[lcore_id];
+
+   TEST_ASSERT(state->success, "Unexpected value "
+   "encountered on lcore %d", lcore_id);
+
+   TEST_ASSERT_EQUAL(state->new_value,
+ RTE_LCORE_VAR_LCORE_GET(lcore_id, test_int),
+ "Lcore %d failed to update int", lcore_id);
+   }
+
+   /* take the opportunity to test the foreach macro */
+   int *v;
+   lcore_id = 0;
+   RTE_LCORE_VAR_FOREACH_VALUE(v, test_int) {
+   TEST_ASSERT_EQUAL(states[lcore_id].new_value, *v,
+ "Unexpected value on lcore %d during "
+ "iteration", lcore_id);
+   lcore_id++;
+   }
+
+   return TEST_SUCCESS;
+}
+
+static int
+test_sized_alignment(void)
+{
+   long *v;
+
+   RTE_LCORE_VAR_FOREACH_VALUE(v, test_long_sized) {
+   TEST_ASSERT(is_ptr_aligned(v, alignof(long)),
+   "Type-derived alignment failed");
+   }
+
+   RTE_LCORE_VAR_FOREACH_VALUE(v, test_long_sized_aligned) {
+   TEST_ASSERT(is_ptr_aligned(v, RTE_CACHE_LINE_SIZE),
+   "Explicit alignment failed");
+   }
+
+   return TEST_SUCCESS;
+}
+
+/* private, larger, struct */
+#define TEST_STRUCT_DATA_SIZE 1234
+
+struct test_struct {
+   uint8_t data[TEST_STRUCT_DATA_SIZE];
+};
+
+static RTE_LCORE_VAR_HANDLE(char, before_struct);
+static RTE_LCORE_V

[RFC v2 1/5] eal: add static per-lcore memory allocation facility

2024-02-19 Thread Mattias Rönnblom
Introduce DPDK per-lcore id variables, or lcore variables for short.

An lcore variable has one value for every current and future lcore
id-equipped thread.

The primary  use case is for statically allocating
small chunks of often-used data, which is related logically, but where
there are performance benefits to reap from having updates being local
to an lcore.

Lcore variables are similar to thread-local storage (TLS, e.g., C11
_Thread_local), but decoupling the values' life time with that of the
threads.

Lcore variables are also similar in terms of functionality provided by
FreeBSD kernel's DPCPU_*() family of macros and the associated
build-time machinery. DPCPU uses linker scripts, which effectively
prevents the reuse of its, otherwise seemingly viable, approach.

The currently-prevailing way to solve the same problem as lcore
variables is to keep a module's per-lcore data as RTE_MAX_LCORE-sized
array of cache-aligned, RTE_CACHE_GUARDed structs. The benefit of
lcore variables over this approach is that data related to the same
lcore now is close (spatially, in memory), rather than data used by
the same module, which in turn avoid excessive use of padding,
polluting caches with unused data.

RFC v2:
 * Use alignof to derive alignment requirements. (Morten Brørup)
 * Change name of FOREACH to make it distinct from 's
   *per-EAL-thread* RTE_LCORE_FOREACH(). (Morten Brørup)
 * Allow user-specified alignment, but limit max to cache line size.

Signed-off-by: Mattias Rönnblom 
---
 config/rte_config.h   |   1 +
 doc/api/doxy-api-index.md |   1 +
 lib/eal/common/eal_common_lcore_var.c |  82 ++
 lib/eal/common/meson.build|   1 +
 lib/eal/include/meson.build   |   1 +
 lib/eal/include/rte_lcore_var.h   | 374 ++
 lib/eal/version.map   |   4 +
 7 files changed, 464 insertions(+)
 create mode 100644 lib/eal/common/eal_common_lcore_var.c
 create mode 100644 lib/eal/include/rte_lcore_var.h

diff --git a/config/rte_config.h b/config/rte_config.h
index da265d7dd2..884482e473 100644
--- a/config/rte_config.h
+++ b/config/rte_config.h
@@ -30,6 +30,7 @@
 /* EAL defines */
 #define RTE_CACHE_GUARD_LINES 1
 #define RTE_MAX_HEAPS 32
+#define RTE_MAX_LCORE_VAR 1048576
 #define RTE_MAX_MEMSEG_LISTS 128
 #define RTE_MAX_MEMSEG_PER_LIST 8192
 #define RTE_MAX_MEM_MB_PER_LIST 32768
diff --git a/doc/api/doxy-api-index.md b/doc/api/doxy-api-index.md
index a6a768bd7c..bb06bb7ca1 100644
--- a/doc/api/doxy-api-index.md
+++ b/doc/api/doxy-api-index.md
@@ -98,6 +98,7 @@ The public API headers are grouped by topics:
   [interrupts](@ref rte_interrupts.h),
   [launch](@ref rte_launch.h),
   [lcore](@ref rte_lcore.h),
+  [lcore-varible](@ref rte_lcore_var.h),
   [per-lcore](@ref rte_per_lcore.h),
   [service cores](@ref rte_service.h),
   [keepalive](@ref rte_keepalive.h),
diff --git a/lib/eal/common/eal_common_lcore_var.c 
b/lib/eal/common/eal_common_lcore_var.c
new file mode 100644
index 00..dfd11cbd0b
--- /dev/null
+++ b/lib/eal/common/eal_common_lcore_var.c
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2024 Ericsson AB
+ */
+
+#include 
+
+#include 
+#include 
+#include 
+
+#include 
+
+#include "eal_private.h"
+
+#define WARN_THRESHOLD 75
+
+/*
+ * Avoid using offset zero, since it would result in a NULL-value
+ * "handle" (offset) pointer, which in principle and per the API
+ * definition shouldn't be an issue, but may confuse some tools and
+ * users.
+ */
+#define INITIAL_OFFSET 1
+
+char rte_lcore_var[RTE_MAX_LCORE][RTE_MAX_LCORE_VAR] __rte_cache_aligned;
+
+static uintptr_t allocated = INITIAL_OFFSET;
+
+static void
+verify_allocation(uintptr_t new_allocated)
+{
+   static bool has_warned;
+
+   RTE_VERIFY(new_allocated < RTE_MAX_LCORE_VAR);
+
+   if (new_allocated > (WARN_THRESHOLD * RTE_MAX_LCORE_VAR) / 100 &&
+   !has_warned) {
+   EAL_LOG(WARNING, "Per-lcore data usage has exceeded %d%% "
+   "of the maximum capacity (%d bytes)", WARN_THRESHOLD,
+   RTE_MAX_LCORE_VAR);
+   has_warned = true;
+   }
+}
+
+static void *
+lcore_var_alloc(size_t size, size_t align)
+{
+   uintptr_t new_allocated = RTE_ALIGN_CEIL(allocated, align);
+
+   void *offset = (void *)new_allocated;
+
+   new_allocated += size;
+
+   verify_allocation(new_allocated);
+
+   allocated = new_allocated;
+
+   EAL_LOG(DEBUG, "Allocated %"PRIuPTR" bytes of per-lcore data with a "
+   "%"PRIuPTR"-byte alignment", size, align);
+
+   return offset;
+}
+
+void *
+rte_lcore_var_alloc(size_t size, size_t align)
+{
+   /* Having the per-lcore buffer size aligned on cache lines
+* assures as well as having the base pointer aligned on cache
+* size assures that aligned offsets also translate to aligned
+* pointers across all values.
+*/
+   RTE_BUILD_BUG_ON(RTE_MAX_LCORE_

[RFC v2 5/5] service: keep per-lcore state in lcore variable

2024-02-19 Thread Mattias Rönnblom
Signed-off-by: Mattias Rönnblom 
---
 lib/eal/common/rte_service.c | 119 ---
 1 file changed, 68 insertions(+), 51 deletions(-)

diff --git a/lib/eal/common/rte_service.c b/lib/eal/common/rte_service.c
index d959c91459..de205c5da5 100644
--- a/lib/eal/common/rte_service.c
+++ b/lib/eal/common/rte_service.c
@@ -11,6 +11,7 @@
 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -75,7 +76,7 @@ struct core_state {
 
 static uint32_t rte_service_count;
 static struct rte_service_spec_impl *rte_services;
-static struct core_state *lcore_states;
+static RTE_LCORE_VAR_HANDLE(struct core_state, lcore_states);
 static uint32_t rte_service_library_initialized;
 
 int32_t
@@ -101,11 +102,12 @@ rte_service_init(void)
goto fail_mem;
}
 
-   lcore_states = rte_calloc("rte_service_core_states", RTE_MAX_LCORE,
-   sizeof(struct core_state), RTE_CACHE_LINE_SIZE);
-   if (!lcore_states) {
-   EAL_LOG(ERR, "error allocating core states array");
-   goto fail_mem;
+   if (lcore_states == NULL)
+   RTE_LCORE_VAR_ALLOC(lcore_states);
+   else {
+   struct core_state *cs;
+   RTE_LCORE_VAR_FOREACH_VALUE(cs, lcore_states)
+   memset(cs, 0, sizeof(struct core_state));
}
 
int i;
@@ -122,7 +124,6 @@ rte_service_init(void)
return 0;
 fail_mem:
rte_free(rte_services);
-   rte_free(lcore_states);
return -ENOMEM;
 }
 
@@ -136,7 +137,6 @@ rte_service_finalize(void)
rte_eal_mp_wait_lcore();
 
rte_free(rte_services);
-   rte_free(lcore_states);
 
rte_service_library_initialized = 0;
 }
@@ -286,7 +286,6 @@ rte_service_component_register(const struct 
rte_service_spec *spec,
 int32_t
 rte_service_component_unregister(uint32_t id)
 {
-   uint32_t i;
struct rte_service_spec_impl *s;
SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL);
 
@@ -294,9 +293,10 @@ rte_service_component_unregister(uint32_t id)
 
s->internal_flags &= ~(SERVICE_F_REGISTERED);
 
+   struct core_state *cs;
/* clear the run-bit in all cores */
-   for (i = 0; i < RTE_MAX_LCORE; i++)
-   lcore_states[i].service_mask &= ~(UINT64_C(1) << id);
+   RTE_LCORE_VAR_FOREACH_VALUE(cs, lcore_states)
+   cs->service_mask &= ~(UINT64_C(1) << id);
 
memset(&rte_services[id], 0, sizeof(struct rte_service_spec_impl));
 
@@ -454,7 +454,10 @@ rte_service_may_be_active(uint32_t id)
return -EINVAL;
 
for (i = 0; i < lcore_count; i++) {
-   if (lcore_states[ids[i]].service_active_on_lcore[id])
+   struct core_state *cs =
+   RTE_LCORE_VAR_LCORE_PTR(ids[i], lcore_states);
+
+   if (cs->service_active_on_lcore[id])
return 1;
}
 
@@ -464,7 +467,7 @@ rte_service_may_be_active(uint32_t id)
 int32_t
 rte_service_run_iter_on_app_lcore(uint32_t id, uint32_t serialize_mt_unsafe)
 {
-   struct core_state *cs = &lcore_states[rte_lcore_id()];
+   struct core_state *cs = RTE_LCORE_VAR_PTR(lcore_states);
struct rte_service_spec_impl *s;
 
SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL);
@@ -486,8 +489,7 @@ service_runner_func(void *arg)
 {
RTE_SET_USED(arg);
uint8_t i;
-   const int lcore = rte_lcore_id();
-   struct core_state *cs = &lcore_states[lcore];
+   struct core_state *cs = RTE_LCORE_VAR_PTR(lcore_states);
 
rte_atomic_store_explicit(&cs->thread_active, 1, 
rte_memory_order_seq_cst);
 
@@ -533,13 +535,16 @@ service_runner_func(void *arg)
 int32_t
 rte_service_lcore_may_be_active(uint32_t lcore)
 {
-   if (lcore >= RTE_MAX_LCORE || !lcore_states[lcore].is_service_core)
+   struct core_state *cs =
+   RTE_LCORE_VAR_LCORE_PTR(lcore, lcore_states);
+
+   if (lcore >= RTE_MAX_LCORE || !cs->is_service_core)
return -EINVAL;
 
/* Load thread_active using ACQUIRE to avoid instructions dependent on
 * the result being re-ordered before this load completes.
 */
-   return rte_atomic_load_explicit(&lcore_states[lcore].thread_active,
+   return rte_atomic_load_explicit(&cs->thread_active,
   rte_memory_order_acquire);
 }
 
@@ -547,9 +552,11 @@ int32_t
 rte_service_lcore_count(void)
 {
int32_t count = 0;
-   uint32_t i;
-   for (i = 0; i < RTE_MAX_LCORE; i++)
-   count += lcore_states[i].is_service_core;
+
+   struct core_state *cs;
+   RTE_LCORE_VAR_FOREACH_VALUE(cs, lcore_states)
+   count += cs->is_service_core;
+
return count;
 }
 
@@ -566,7 +573,8 @@ rte_service_lcore_list(uint32_t array[], uint32_t n)
uint32_t i;
uint32_t idx = 0;
for (i = 0; i < RTE_MAX_LCORE; i++) {
-   struct core_state *cs = &lcore_states[i];
+ 

Re: [PATCH v8 0/2] add telemetry cmds for ring

2024-02-19 Thread Thomas Monjalon
> Jie Hai (2):
>   ring: add telemetry cmd to list rings
>   ring: add telemetry cmd for ring info

Applied, thanks.





[PATCH v12] net/iavf: add diagnostic support in TX path

2024-02-19 Thread Mingjin Ye
Implemented a Tx wrapper to perform a thorough check on mbufs,
categorizing and counting invalid cases by types for diagnostic
purposes. The count of invalid cases is accessible through xstats_get.

Also, the devarg option "mbuf_check" was introduced to configure the
diagnostic parameters to enable the appropriate diagnostic features.

supported cases: mbuf, size, segment, offload.
 1. mbuf: check for corrupted mbuf.
 2. size: check min/max packet length according to hw spec.
 3. segment: check number of mbuf segments not exceed hw limitation.
 4. offload: check any unsupported offload flag.

parameter format: "mbuf_check=" or "mbuf_check=[,]"
eg: dpdk-testpmd -a :81:01.0,mbuf_check=[mbuf,size] -- -i

Signed-off-by: Mingjin Ye 
---
v2: Remove call chain.
---
v3: Optimisation implementation.
---
v4: Fix Windows os compilation error.
---
v5: Split Patch.
---
v6: remove strict.
---
v9: Modify the description document.
---
v10: Modify vf rst document.
---
v11: modify comment log.
---
v12: Fix buggy logs and add necessary notes.
---
 doc/guides/nics/intel_vf.rst   | 11 
 drivers/net/iavf/iavf.h| 11 
 drivers/net/iavf/iavf_ethdev.c | 79 +++
 drivers/net/iavf/iavf_rxtx.c   | 98 ++
 drivers/net/iavf/iavf_rxtx.h   |  2 +
 5 files changed, 201 insertions(+)

diff --git a/doc/guides/nics/intel_vf.rst b/doc/guides/nics/intel_vf.rst
index ce96c2e1f8..b84ea214d4 100644
--- a/doc/guides/nics/intel_vf.rst
+++ b/doc/guides/nics/intel_vf.rst
@@ -111,6 +111,17 @@ For more detail on SR-IOV, please refer to the following 
documents:
 by setting the ``devargs`` parameter like ``-a 
18:01.0,no-poll-on-link-down=1``
 when IAVF is backed by an Intel\ |reg| E810 device or an Intel\ |reg| 700 
Series Ethernet device.
 
+When IAVF is backed by an Intel?? E810 device or an Intel?? 700 Series 
Ethernet device.
+Set the ``devargs`` parameter ``mbuf_check`` to enable TX diagnostics. For 
example,
+``-a 18:01.0,mbuf_check=`` or ``-a 
18:01.0,mbuf_check=[,...]``. Also,
+``xstats_get`` can be used to get the error counts, which are collected in 
``tx_mbuf_error_packets``
+xstats. For example, ``testpmd> show port xstats all``. Supported cases:
+
+*   mbuf: Check for corrupted mbuf.
+*   size: Check min/max packet length according to hw spec.
+*   segment: Check number of mbuf segments not exceed hw limitation.
+*   offload: Check any unsupported offload flag.
+
 The PCIE host-interface of Intel Ethernet Switch FM1 Series VF 
infrastructure
 
^
 
diff --git a/drivers/net/iavf/iavf.h b/drivers/net/iavf/iavf.h
index ab24cb02c3..824ae4aa02 100644
--- a/drivers/net/iavf/iavf.h
+++ b/drivers/net/iavf/iavf.h
@@ -114,9 +114,14 @@ struct iavf_ipsec_crypto_stats {
} ierrors;
 };
 
+struct iavf_mbuf_stats {
+   uint64_t tx_pkt_errors;
+};
+
 struct iavf_eth_xstats {
struct virtchnl_eth_stats eth_stats;
struct iavf_ipsec_crypto_stats ips_stats;
+   struct iavf_mbuf_stats mbuf_stats;
 };
 
 /* Structure that defines a VSI, associated with a adapter. */
@@ -310,6 +315,7 @@ struct iavf_devargs {
uint32_t watchdog_period;
int auto_reset;
int no_poll_on_link_down;
+   uint64_t mbuf_check;
 };
 
 struct iavf_security_ctx;
@@ -353,6 +359,11 @@ enum iavf_tx_burst_type {
IAVF_TX_AVX512_CTX_OFFLOAD,
 };
 
+#define IAVF_MBUF_CHECK_F_TX_MBUF(1ULL << 0)
+#define IAVF_MBUF_CHECK_F_TX_SIZE(1ULL << 1)
+#define IAVF_MBUF_CHECK_F_TX_SEGMENT (1ULL << 2)
+#define IAVF_MBUF_CHECK_F_TX_OFFLOAD (1ULL << 3)
+
 /* Structure to store private data for each VF instance. */
 struct iavf_adapter {
struct iavf_hw hw;
diff --git a/drivers/net/iavf/iavf_ethdev.c b/drivers/net/iavf/iavf_ethdev.c
index 1fb876e827..3fb255d748 100644
--- a/drivers/net/iavf/iavf_ethdev.c
+++ b/drivers/net/iavf/iavf_ethdev.c
@@ -13,6 +13,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -39,6 +40,7 @@
 #define IAVF_RESET_WATCHDOG_ARG"watchdog_period"
 #define IAVF_ENABLE_AUTO_RESET_ARG "auto_reset"
 #define IAVF_NO_POLL_ON_LINK_DOWN_ARG "no-poll-on-link-down"
+#define IAVF_MBUF_CHECK_ARG   "mbuf_check"
 uint64_t iavf_timestamp_dynflag;
 int iavf_timestamp_dynfield_offset = -1;
 int rte_pmd_iavf_tx_lldp_dynfield_offset = -1;
@@ -49,6 +51,7 @@ static const char * const iavf_valid_args[] = {
IAVF_RESET_WATCHDOG_ARG,
IAVF_ENABLE_AUTO_RESET_ARG,
IAVF_NO_POLL_ON_LINK_DOWN_ARG,
+   IAVF_MBUF_CHECK_ARG,
NULL
 };
 
@@ -175,6 +178,7 @@ static const struct rte_iavf_xstats_name_off 
rte_iavf_stats_strings[] = {
{"tx_broadcast_packets", _OFF_OF(eth_stats.tx_broadcast)},
{"tx_dropped_packets", _OFF_OF(eth_stats.tx_discards)},
{"tx_error_packets", _OFF_OF(eth_stats.tx_errors)},
+   {"tx_mbuf_error_packets", _OFF_OF(mbuf_stats.t

Re: [PATCH] net/gve: Change ERR to DEBUG to prevent flooding of logs for Tx-Dqo.

2024-02-19 Thread Ferruh Yigit
On 2/19/2024 2:44 AM, Rushil Gupta wrote:
> This was causing failure for testpmd runs (for queues >=15)
> presumably due to flooding of logs due to descriptor ring being
> overwritten.
> 
> Fixes: a01854 ("net/gve: fix dqo bug for chained descriptors")
> Cc: sta...@dpdk.org
> 
> Signed-off-by: Rushil Gupta 
> Reviewed-by: Joshua Washington 
>

Squashed into relevant commit in next-net, thanks.


Re: [PATCH] net/nfp: add support of UDP fragmentation offload

2024-02-19 Thread Bruce Richardson
On Sun, Feb 18, 2024 at 11:05:35AM +0100, Morten Brørup wrote:
> > From: Stephen Hemminger [mailto:step...@networkplumber.org]
> > Sent: Saturday, 17 February 2024 19.11
> > 
> > On Sat, 17 Feb 2024 19:02:30 +0100
> > Morten Brørup  wrote:
> > 
> > > Not formally... it follows the official DPDK Coding Style [1].
> > >
> > > [1]:
> > https://doc.dpdk.org/guides/contributing/coding_style.html#general
> > >
> > > > Should be:
> > > >
> > > > if ((ol_flags & RTE_MBUF_F_TX_TCP_SEG) == 0 &&
> > > > (ol_flags & RTE_MBUF_F_TX_UDP_SEG) == 0)
> > > > goto clean_txd;
> > >
> > > This indentation style is mentioned as an alternative in the guide.
> > But the example in the guide also uses two tabs for a similar long
> > comparison.
> > >
> > > Personally, I also prefer the style suggested by Stephen, so we might
> > want to update the Coding Style. ;-)
> > 
> > 
> > The two tabs is an Intel thing, and I prefer the kernel, line up the
> > conditional style.
> 
> I prefer 4 space indentation, which is sufficient to notice the indentation. 
> 8 spaces seems overkill to me, and quickly makes the lines too long.
> With the editor configured to show tab as 4 spaces, the kernel alignment 
> style ends up with the same indentation for the condition and the code block:
> 
> if (a &&
> b)
> ctr++;
> 
> Whereas with the "tab as 4 spaces" editor configuration, the double 
> indentation style clearly separates the continued condition from code block:
> 
> if (a &&
> b)
> ctr++;
> 

These two above are the main reasons I much prefer the double indent on
continuation, though I'd also add a third one: it means we don't have a mix
of tabs and spaces for indentation.

However, as stated already indent can be a matter of taste, and there will
be some disagreement about it. The existing coding standards document what
was done in the code base when they were written, and I don't think we
should look to change them. It's a bit annoying having 2 standards for
continuation rather than 1, but it's not exactly a free-for-all, and it's
not something that applies to every line, only to a small subset.

> On the other hand, complex conditions are easier readable when aligning 
> logically instead of by a fixed number of tabs, e.g.:
> 
> if (a |
> (b &
>  (c ^ d)) |
> (e ^ f) |
> g)
> ctr++;
> 

Apart from the alignment of the increment at the end, yes, I admit it is a
little more readable. However, I also think that it's still pretty complex
even with the helpers!

> Placing the operators at the beginning also makes the code more readable:
> 
> if (a
> | (b
>& (c ^ d))
> | (e ^ f)
> | g)
> ctr++;
> 

+1 to this. I think having operators at the beginning of lines is good. It
also makes it visually clearer that the indent is for line continuation.

> I guess that coding styles are mostly a matter of taste.
> 
> I wonder if any research into coding styles has reached any conclusions or 
> recommendations, beyond mixing coding styles is bad for readability.
> 
> > We really should have a style that can be describe by clang format.
> > Other projects like VPP have a target that reformats the code and uses
> > one of the clang format templates.
> 
> Automated code formatting seems like a good idea.
> 

Yep. The trouble is that, if you don't do this from the start, the deltas
will be massive, and confusing in our git history.

/Bruce


Re: [PATCH] net/nfp: add support of UDP fragmentation offload

2024-02-19 Thread Ferruh Yigit
On 2/19/2024 10:26 AM, Bruce Richardson wrote:
> On Sun, Feb 18, 2024 at 11:05:35AM +0100, Morten Brørup wrote:
>>> From: Stephen Hemminger [mailto:step...@networkplumber.org]
>>> Sent: Saturday, 17 February 2024 19.11
>>>
>>> On Sat, 17 Feb 2024 19:02:30 +0100
>>> Morten Brørup  wrote:
>>>
 Not formally... it follows the official DPDK Coding Style [1].

 [1]:
>>> https://doc.dpdk.org/guides/contributing/coding_style.html#general

> Should be:
>
>   if ((ol_flags & RTE_MBUF_F_TX_TCP_SEG) == 0 &&
>   (ol_flags & RTE_MBUF_F_TX_UDP_SEG) == 0)
>   goto clean_txd;

 This indentation style is mentioned as an alternative in the guide.
>>> But the example in the guide also uses two tabs for a similar long
>>> comparison.

 Personally, I also prefer the style suggested by Stephen, so we might
>>> want to update the Coding Style. ;-)
>>>
>>>
>>> The two tabs is an Intel thing, and I prefer the kernel, line up the
>>> conditional style.
>>
>> I prefer 4 space indentation, which is sufficient to notice the indentation. 
>> 8 spaces seems overkill to me, and quickly makes the lines too long.
>> With the editor configured to show tab as 4 spaces, the kernel alignment 
>> style ends up with the same indentation for the condition and the code block:
>>
>> if (a &&
>> b)
>> ctr++;
>>
>> Whereas with the "tab as 4 spaces" editor configuration, the double 
>> indentation style clearly separates the continued condition from code block:
>>
>> if (a &&
>> b)
>> ctr++;
>>
> 
> These two above are the main reasons I much prefer the double indent on
> continuation, though I'd also add a third one: it means we don't have a mix
> of tabs and spaces for indentation.
> 
> However, as stated already indent can be a matter of taste, and there will
> be some disagreement about it. The existing coding standards document what
> was done in the code base when they were written, and I don't think we
> should look to change them. It's a bit annoying having 2 standards for
> continuation rather than 1, but it's not exactly a free-for-all, and it's
> not something that applies to every line, only to a small subset.
> 

++1



Re: [PATCH v2 5/5] vhost: enhance virtqueue access lock asserts

2024-02-19 Thread Thomas Monjalon
05/12/2023 10:45, David Marchand:
> +#define VHOST_USER_ASSERT_LOCK(dev, vq, id) do { \
> + RTE_BUILD_BUG_ON(!vhost_message_handlers[id].lock_all_qps); \
> + vq_assert_lock(dev, vq); \
> +} while (0)

Since "eal: enhance compile-time checks using C11 assert",
it is not allowed to have non-constant check in RTE_BUILD_BUG_ON:

lib/vhost/vhost_user.c:413:25: note: in expansion of macro 
'VHOST_USER_ASSERT_LOCK'
lib/vhost/vhost_user.c: In function 'vhost_user_set_vring_addr':
lib/eal/include/rte_common.h:518:56: error: expression in static assertion is 
not constant
  #define RTE_BUILD_BUG_ON(condition) do { static_assert(!(condition), 
#condition); } while (0)

I suppose we can make this check at compile-time with few adjustments.
For -rc1, I am dropping this patch.





RE: [RFC 1/5] eal: add static per-lcore memory allocation facility

2024-02-19 Thread Morten Brørup
> From: Mattias Rönnblom [mailto:hof...@lysator.liu.se]
> Sent: Monday, 19 February 2024 08.49
> 
> On 2024-02-09 14:04, Morten Brørup wrote:
> >> From: Mattias Rönnblom [mailto:hof...@lysator.liu.se]
> >> Sent: Friday, 9 February 2024 12.46
> >>
> >> On 2024-02-09 09:25, Morten Brørup wrote:
>  From: Mattias Rönnblom [mailto:mattias.ronnb...@ericsson.com]
>  Sent: Thursday, 8 February 2024 19.17
> 
>  Introduce DPDK per-lcore id variables, or lcore variables for
> short.
> 
>  An lcore variable has one value for every current and future lcore
>  id-equipped thread.
> 
>  The primary  use case is for statically
> allocating
>  small chunks of often-used data, which is related logically, but
> >> where
>  there are performance benefits to reap from having updates being
> >> local
>  to an lcore.
> 
>  Lcore variables are similar to thread-local storage (TLS, e.g.,
> C11
>  _Thread_local), but decoupling the values' life time with that of
> >> the
>  threads.
> 
>  Lcore variables are also similar in terms of functionality
> provided
> >> by
>  FreeBSD kernel's DPCPU_*() family of macros and the associated
>  build-time machinery. DPCPU uses linker scripts, which effectively
>  prevents the reuse of its, otherwise seemingly viable, approach.
> 
>  The currently-prevailing way to solve the same problem as lcore
>  variables is to keep a module's per-lcore data as RTE_MAX_LCORE-
> >> sized
>  array of cache-aligned, RTE_CACHE_GUARDed structs. The benefit of
>  lcore variables over this approach is that data related to the
> same
>  lcore now is close (spatially, in memory), rather than data used
> by
>  the same module, which in turn avoid excessive use of padding,
>  polluting caches with unused data.
> 
>  Signed-off-by: Mattias Rönnblom 
>  ---
> >>>
> >>> This looks very promising. :-)
> >>>
> >>> Here's a bunch of comments, questions and suggestions.
> >>>
> >>>
> >>> * Question: Performance.
> >>> What is the cost of accessing an lcore variable vs a variable in
> TLS?
> >>> I suppose the relative cost diminishes if the variable is a larger
> >> struct, compared to a simple uint64_t.
> >>>
> >>
> >> In case all the relevant data is available in a cache close to the
> >> core,
> >> both options carry quite low overhead.
> >>
> >> Accessing a lcore variable will always require a TLS lookup, in the
> >> form
> >> of retrieving the lcore_id of the current thread. In that sense,
> there
> >> will likely be a number of extra instructions required to do the
> lcore
> >> variable address lookup (i.e., doing the load from rte_lcore_var
> table
> >> based on the lcore_id you just looked up, and adding the variable's
> >> offset).
> >>
> >> A TLS lookup will incur an extra overhead of less than a clock
> cycle,
> >> compared to accessing a non-TLS static variable, in case static
> linking
> >> is used. For shared objects, TLS is much more expensive (something
> >> often
> >> visible in dynamically linked DPDK app flame graphs, in the form of
> the
> >> __tls_addr symbol). Then you need to add ~3 cc/access. This on a
> micro
> >> benchmark running on a x86_64 Raptor Lake P-core.
> >>
> >> (To visialize the difference between shared object and not, one can
> use
> >> Compiler Explorer and -fPIC versus -fPIE.)
> >>
> >> Things get more complicated if you access the same variable in the
> same
> >> section code, since then it can be left on the stack/in a register
> by
> >> the compiler, especially if LTO is used. In other words, if you do
> >> rte_lcore_id() several times in a row, only the first one will cost
> you
> >> anything. This happens fairly often in DPDK, with rte_lcore_id().
> >>
> >> Finally, if you do something like
> >>
> >> diff --git a/lib/eal/common/rte_random.c
> b/lib/eal/common/rte_random.c
> >> index af9fffd81b..a65c30d27e 100644
> >> --- a/lib/eal/common/rte_random.c
> >> +++ b/lib/eal/common/rte_random.c
> >> @@ -125,14 +125,7 @@ __rte_rand_lfsr258(struct rte_rand_state
> *state)
> >>static __rte_always_inline
> >>struct rte_rand_state *__rte_rand_get_state(void)
> >>{
> >> -   unsigned int idx;
> >> -
> >> -   idx = rte_lcore_id();
> >> -
> >> -   if (unlikely(idx == LCORE_ID_ANY))
> >> -   return &unregistered_rand_state;
> >> -
> >> -   return RTE_LCORE_VAR_PTR(rand_state);
> >> +   return &unregistered_rand_state;
> >>}
> >>
> >>uint64_t
> >>
> >> ...and re-run the rand_perf_autotest, at least I see no difference
> at
> >> all (in a statically linked build). Both results in rte_rand() using
> >> ~11
> >> cc/call. What that suggests is that TLS overhead is very small, and
> >> that
> >> any extra instructions required by lcore variables doesn't add much,
> if
> >> anything at all, at least in this particular case.
> >
> > Excellent. Thank you for a thorough and detailed answer, Mattias.
> >
> >>
> >>> Some of my sugg

Re: [PATCH v7 1/2] lib/net: fix parsing of VLAN metadata

2024-02-19 Thread Ferruh Yigit
On 2/19/2024 9:31 AM, Alan Elder wrote:
> Add common macros for extracting parts of VLAN tag.
> 
> Fixes: 4e9c73e96e83 ("net/netvsc: add Hyper-V network device")
> Cc: sta...@dpdk.org
> 
> Signed-off-by: Alan Elder 
>

Reviewed-by: Ferruh Yigit 


Re: [PATCH v7 2/2] net/netvsc: fix parsing of VLAN metadata

2024-02-19 Thread Ferruh Yigit
On 2/19/2024 9:31 AM, Alan Elder wrote:
> The previous code incorrectly parsed the VLAN ID and priority.
> If the 16-bits of VLAN ID and priority/CFI on the wire was
> 0123456789ABCDEF the code parsed it as 456789ABCDEF3012.  There
> were macros defined to handle this conversion but they were not
> used.
> 
> Fixes: 4e9c73e96e83 ("net/netvsc: add Hyper-V network device")
> Cc: sta...@dpdk.org
> 
> Signed-off-by: Alan Elder 
>

Acked-by: Ferruh Yigit 


Re: [PATCH v7 1/2] lib/net: fix parsing of VLAN metadata

2024-02-19 Thread Ferruh Yigit
On 2/19/2024 11:12 AM, Ferruh Yigit wrote:
> On 2/19/2024 9:31 AM, Alan Elder wrote:
>> Add common macros for extracting parts of VLAN tag.
>>
>> Fixes: 4e9c73e96e83 ("net/netvsc: add Hyper-V network device")
>> Cc: sta...@dpdk.org
>>
>> Signed-off-by: Alan Elder 
>>
> 
> Reviewed-by: Ferruh Yigit 
>

Series applied to dpdk-next-net/main, thanks.


Updated patch title as below while merging:
net: add helper macros for VLAN metadata parsing


Also kept fixes and stable tag, although patch itself is not fix, to
request backporting the patch and highlight the reason of the request.


RE: [RFC v2 3/5] random: keep PRNG state in lcore variable

2024-02-19 Thread Morten Brørup
> From: Mattias Rönnblom [mailto:mattias.ronnb...@ericsson.com]
> Sent: Monday, 19 February 2024 10.41
> 
> Replace keeping PRNG state in a RTE_MAX_LCORE-sized static array of
> cache-aligned and RTE_CACHE_GUARDed struct instances with keeping the
> same state in a more cache-friendly lcore variable.
> 
> Signed-off-by: Mattias Rönnblom 
> ---

[...]

> @@ -19,14 +20,12 @@ struct rte_rand_state {
>   uint64_t z3;
>   uint64_t z4;
>   uint64_t z5;
> - RTE_CACHE_GUARD;
> -} __rte_cache_aligned;
> +};
> 
> -/* One instance each for every lcore id-equipped thread, and one
> - * additional instance to be shared by all others threads (i.e., all
> - * unregistered non-EAL threads).
> - */
> -static struct rte_rand_state rand_states[RTE_MAX_LCORE + 1];
> +RTE_LCORE_VAR_HANDLE(struct rte_rand_state, rand_state);
> +
> +/* instance to be shared by all unregistered non-EAL threads */
> +static struct rte_rand_state unregistered_rand_state
> __rte_cache_aligned;

The unregistered_rand_state instance is still __rte_cache_aligned; consider 
also adding an RTE_CACHE_GUARD to it.



Re: [PATCH] net/nfp: add support of UDP fragmentation offload

2024-02-19 Thread Ferruh Yigit
On 2/17/2024 4:47 PM, Stephen Hemminger wrote:
> On Sat, 17 Feb 2024 09:54:10 +0800
> Chaoyong He  wrote:
> 
>> Add the support of UDP fragmentation offload feature.
>>
>> Signed-off-by: Chaoyong He 
>> Reviewed-by: Long Wu 
>> Reviewed-by: Peng Zhang 
>> ---
>>  drivers/common/nfp/nfp_common_ctrl.h | 1 +
>>  drivers/net/nfp/nfd3/nfp_nfd3_dp.c   | 7 ++-
>>  drivers/net/nfp/nfdk/nfp_nfdk_dp.c   | 8 +---
>>  drivers/net/nfp/nfp_net_common.c | 8 ++--
>>  4 files changed, 18 insertions(+), 6 deletions(-)
> Looks like this depends on earlier patch, it does not apply directly to main 
> branch.
>

It is a drivers/net patch, so should be on top of next-net, and applies
clean to next-net.
CI also detects sub-tree correctly and able to apply and test the patch.



Re: [PATCH] net/nfp: add support of UDP fragmentation offload

2024-02-19 Thread Ferruh Yigit
On 2/17/2024 1:54 AM, Chaoyong He wrote:
> Add the support of UDP fragmentation offload feature.
> 
> Signed-off-by: Chaoyong He 
> Reviewed-by: Long Wu 
> Reviewed-by: Peng Zhang 

<...>

> diff --git a/drivers/net/nfp/nfp_net_common.c 
> b/drivers/net/nfp/nfp_net_common.c
> index 72c9a41b00..99c319eb2d 100644
> --- a/drivers/net/nfp/nfp_net_common.c
> +++ b/drivers/net/nfp/nfp_net_common.c
> @@ -312,7 +312,7 @@ nfp_net_log_device_information(const struct nfp_net_hw 
> *hw)
>   hw->ver.major, hw->ver.minor, hw->max_mtu);
>  
>   PMD_INIT_LOG(INFO, "CAP: %#x", cap);
> - PMD_INIT_LOG(INFO, 
> "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
> + PMD_INIT_LOG(INFO, 
> "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
>

This seems getting out of hand, I assume this is done like this (instead
of multiple print lines) to prevent line break, if so what do you think
add a new macro that doesn't append \n automatically and refactor this
code (in a separate patch) ?



Re: [RFC v2 3/5] random: keep PRNG state in lcore variable

2024-02-19 Thread Mattias Rönnblom

On 2024-02-19 12:22, Morten Brørup wrote:

From: Mattias Rönnblom [mailto:mattias.ronnb...@ericsson.com]
Sent: Monday, 19 February 2024 10.41

Replace keeping PRNG state in a RTE_MAX_LCORE-sized static array of
cache-aligned and RTE_CACHE_GUARDed struct instances with keeping the
same state in a more cache-friendly lcore variable.

Signed-off-by: Mattias Rönnblom 
---


[...]


@@ -19,14 +20,12 @@ struct rte_rand_state {
uint64_t z3;
uint64_t z4;
uint64_t z5;
-   RTE_CACHE_GUARD;
-} __rte_cache_aligned;
+};

-/* One instance each for every lcore id-equipped thread, and one
- * additional instance to be shared by all others threads (i.e., all
- * unregistered non-EAL threads).
- */
-static struct rte_rand_state rand_states[RTE_MAX_LCORE + 1];
+RTE_LCORE_VAR_HANDLE(struct rte_rand_state, rand_state);
+
+/* instance to be shared by all unregistered non-EAL threads */
+static struct rte_rand_state unregistered_rand_state
__rte_cache_aligned;


The unregistered_rand_state instance is still __rte_cache_aligned; consider 
also adding an RTE_CACHE_GUARD to it.



It shouldn't be cache-line aligned. I'll remove it. Thanks.


Re: [PATCH] net/nfp: add support of UDP fragmentation offload

2024-02-19 Thread Ferruh Yigit
On 2/17/2024 1:54 AM, Chaoyong He wrote:
> Add the support of UDP fragmentation offload feature.
> 
> Signed-off-by: Chaoyong He 
> Reviewed-by: Long Wu 
> Reviewed-by: Peng Zhang 
>

Applied to dpdk-next-net/main, thanks.


Re: [RFC 1/5] eal: add static per-lcore memory allocation facility

2024-02-19 Thread Mattias Rönnblom

On 2024-02-19 12:10, Morten Brørup wrote:

From: Mattias Rönnblom [mailto:hof...@lysator.liu.se]
Sent: Monday, 19 February 2024 08.49

On 2024-02-09 14:04, Morten Brørup wrote:

From: Mattias Rönnblom [mailto:hof...@lysator.liu.se]
Sent: Friday, 9 February 2024 12.46

On 2024-02-09 09:25, Morten Brørup wrote:

From: Mattias Rönnblom [mailto:mattias.ronnb...@ericsson.com]
Sent: Thursday, 8 February 2024 19.17

Introduce DPDK per-lcore id variables, or lcore variables for

short.


An lcore variable has one value for every current and future lcore
id-equipped thread.

The primary  use case is for statically

allocating

small chunks of often-used data, which is related logically, but

where

there are performance benefits to reap from having updates being

local

to an lcore.

Lcore variables are similar to thread-local storage (TLS, e.g.,

C11

_Thread_local), but decoupling the values' life time with that of

the

threads.

Lcore variables are also similar in terms of functionality

provided

by

FreeBSD kernel's DPCPU_*() family of macros and the associated
build-time machinery. DPCPU uses linker scripts, which effectively
prevents the reuse of its, otherwise seemingly viable, approach.

The currently-prevailing way to solve the same problem as lcore
variables is to keep a module's per-lcore data as RTE_MAX_LCORE-

sized

array of cache-aligned, RTE_CACHE_GUARDed structs. The benefit of
lcore variables over this approach is that data related to the

same

lcore now is close (spatially, in memory), rather than data used

by

the same module, which in turn avoid excessive use of padding,
polluting caches with unused data.

Signed-off-by: Mattias Rönnblom 
---


This looks very promising. :-)

Here's a bunch of comments, questions and suggestions.


* Question: Performance.
What is the cost of accessing an lcore variable vs a variable in

TLS?

I suppose the relative cost diminishes if the variable is a larger

struct, compared to a simple uint64_t.




In case all the relevant data is available in a cache close to the
core,
both options carry quite low overhead.

Accessing a lcore variable will always require a TLS lookup, in the
form
of retrieving the lcore_id of the current thread. In that sense,

there

will likely be a number of extra instructions required to do the

lcore

variable address lookup (i.e., doing the load from rte_lcore_var

table

based on the lcore_id you just looked up, and adding the variable's
offset).

A TLS lookup will incur an extra overhead of less than a clock

cycle,

compared to accessing a non-TLS static variable, in case static

linking

is used. For shared objects, TLS is much more expensive (something
often
visible in dynamically linked DPDK app flame graphs, in the form of

the

__tls_addr symbol). Then you need to add ~3 cc/access. This on a

micro

benchmark running on a x86_64 Raptor Lake P-core.

(To visialize the difference between shared object and not, one can

use

Compiler Explorer and -fPIC versus -fPIE.)

Things get more complicated if you access the same variable in the

same

section code, since then it can be left on the stack/in a register

by

the compiler, especially if LTO is used. In other words, if you do
rte_lcore_id() several times in a row, only the first one will cost

you

anything. This happens fairly often in DPDK, with rte_lcore_id().

Finally, if you do something like

diff --git a/lib/eal/common/rte_random.c

b/lib/eal/common/rte_random.c

index af9fffd81b..a65c30d27e 100644
--- a/lib/eal/common/rte_random.c
+++ b/lib/eal/common/rte_random.c
@@ -125,14 +125,7 @@ __rte_rand_lfsr258(struct rte_rand_state

*state)

static __rte_always_inline
struct rte_rand_state *__rte_rand_get_state(void)
{
-   unsigned int idx;
-
-   idx = rte_lcore_id();
-
-   if (unlikely(idx == LCORE_ID_ANY))
-   return &unregistered_rand_state;
-
-   return RTE_LCORE_VAR_PTR(rand_state);
+   return &unregistered_rand_state;
}

uint64_t

...and re-run the rand_perf_autotest, at least I see no difference

at

all (in a statically linked build). Both results in rte_rand() using
~11
cc/call. What that suggests is that TLS overhead is very small, and
that
any extra instructions required by lcore variables doesn't add much,

if

anything at all, at least in this particular case.


Excellent. Thank you for a thorough and detailed answer, Mattias.




Some of my suggestions below might also affect performance.


* Advantage: Provides direct access to worker thread variables.
With the current alternative (thread-local storage), the main

thread

cannot access the TLS variables of the worker threads,

unless worker threads publish global access pointers.
Lcore variables of any lcore thread can be direcctly accessed by

any

thread, which simplifies code.



* Advantage: Roadmap towards hugemem.
It would be nice if the lcore variable memory was allocated in

hugemem, to reduce TLB misses.

The current alternative (thread-local stor

RE: [RFC 1/5] eal: add static per-lcore memory allocation facility

2024-02-19 Thread Morten Brørup
> From: Mattias Rönnblom [mailto:hof...@lysator.liu.se]
> Sent: Monday, 19 February 2024 15.32
> 
> On 2024-02-19 12:10, Morten Brørup wrote:
> >> From: Mattias Rönnblom [mailto:hof...@lysator.liu.se]
> >> Sent: Monday, 19 February 2024 08.49
> >>
> >> On 2024-02-09 14:04, Morten Brørup wrote:
>  From: Mattias Rönnblom [mailto:hof...@lysator.liu.se]
>  Sent: Friday, 9 February 2024 12.46
> 
>  On 2024-02-09 09:25, Morten Brørup wrote:
> >> From: Mattias Rönnblom [mailto:mattias.ronnb...@ericsson.com]
> >> Sent: Thursday, 8 February 2024 19.17
> >>
> >> Introduce DPDK per-lcore id variables, or lcore variables for
> >> short.
> >>
> >> An lcore variable has one value for every current and future
> lcore
> >> id-equipped thread.
> >>
> >> The primary  use case is for statically
> >> allocating
> >> small chunks of often-used data, which is related logically, but
>  where
> >> there are performance benefits to reap from having updates being
>  local
> >> to an lcore.
> >>
> >> Lcore variables are similar to thread-local storage (TLS, e.g.,
> >> C11
> >> _Thread_local), but decoupling the values' life time with that
> of
>  the
> >> threads.
> >>
> >> Lcore variables are also similar in terms of functionality
> >> provided
>  by
> >> FreeBSD kernel's DPCPU_*() family of macros and the associated
> >> build-time machinery. DPCPU uses linker scripts, which
> effectively
> >> prevents the reuse of its, otherwise seemingly viable, approach.
> >>
> >> The currently-prevailing way to solve the same problem as lcore
> >> variables is to keep a module's per-lcore data as RTE_MAX_LCORE-
>  sized
> >> array of cache-aligned, RTE_CACHE_GUARDed structs. The benefit
> of
> >> lcore variables over this approach is that data related to the
> >> same
> >> lcore now is close (spatially, in memory), rather than data used
> >> by
> >> the same module, which in turn avoid excessive use of padding,
> >> polluting caches with unused data.
> >>
> >> Signed-off-by: Mattias Rönnblom 
> >> ---

[...]

> > Ups... wrong reference! I meant to refer to _lcore_id, not
> _thread_id. Correction:
> >
> 
> OK. I subconsciously ignored this mistake, and read it as "_lcore_id".

:-)

[...]

> >> For DPDK modules using lcore variables and which treat unregistered
> >> threads as "full citizens", I expect special handling of
> unregistered
> >> threads to be the norm. Take rte_random.h as an example. Current API
> >> does not guarantee MT safety for concurrent calls of unregistered
> >> threads. It probably should, and it should probably be by means of a
> >> mutex (not spinlock).
> >>
> >> The reason I'm not running off to make a rte_random.c patch is
> that's
> >> it's unclear to me what is the role of unregistered threads in DPDK.
> >> I'm
> >> reasonably comfortable with a model where there are many threads
> that
> >> basically don't interact with the DPDK APIs (except maybe some very
> >> narrow exposure, like the preemption-safe ring variant). One example
> of
> >> such a design would be big slow control plane which uses multi-
> >> threading
> >> and the Linux process scheduler for work scheduling, hosted in the
> same
> >> process as a DPDK data plane app.
> >>
> >> What I find more strange is a scenario where there are unregistered
> >> threads which interacts with a wide variety of DPDK APIs, does so
> >> at-high-rates/with-high-performance-requirements and are expected to
> be
> >> preemption-safe. So they are basically EAL threads without a lcore
> id.
> >
> > Yes, this is happening in the wild.
> > E.g. our application has a mode where it uses fewer EAL threads, and
> processes more in non-EAL threads. So to say, the same work is
> processed either by an EAL thread or a non-EAL thread, depending on the
> application's mode.
> > The extra array entry would be useful for such use cases.
> >
> 
> Is there some particular reason you can't register those non-EAL
> threads?

Legacy. I suppose we could just do that instead.
Thanks for the suggestion!

> 
> >>
> >> Support for that latter scenario has also been voiced, in previous
> >> discussions, from what I recall.
> >>
> >> I think it's hard to answer the question of a "unregistered thread
> >> spare" for lcore variables without first knowing what the future
> should
> >> look like for unregistered threads in DPDK, in terms of being able
> to
> >> call into DPDK APIs, preemption-safety guarantees, etc.
> >>
> >> It seems that until you have a clearer picture of how generally to
> >> treat
> >> unregistered threads, you are best off with just a per-lcore id
> >> instance
> >> of lcore variables.
> >
> > I get your point. It also reduces the risk of bugs caused by
> incorrect use of the additional entry.
> >
> > I am arguing for a different angle: Providing the extra entry will
> help uncovering relevant use cases.
> >
> 
> Maybe

RE: [RFC v2 3/5] random: keep PRNG state in lcore variable

2024-02-19 Thread Morten Brørup
> From: Mattias Rönnblom [mailto:hof...@lysator.liu.se]
> Sent: Monday, 19 February 2024 15.04
> 
> On 2024-02-19 12:22, Morten Brørup wrote:
> >> From: Mattias Rönnblom [mailto:mattias.ronnb...@ericsson.com]
> >> Sent: Monday, 19 February 2024 10.41
> >>
> >> Replace keeping PRNG state in a RTE_MAX_LCORE-sized static array of
> >> cache-aligned and RTE_CACHE_GUARDed struct instances with keeping
> the
> >> same state in a more cache-friendly lcore variable.
> >>
> >> Signed-off-by: Mattias Rönnblom 
> >> ---
> >
> > [...]
> >
> >> @@ -19,14 +20,12 @@ struct rte_rand_state {
> >>uint64_t z3;
> >>uint64_t z4;
> >>uint64_t z5;
> >> -  RTE_CACHE_GUARD;
> >> -} __rte_cache_aligned;
> >> +};
> >>
> >> -/* One instance each for every lcore id-equipped thread, and one
> >> - * additional instance to be shared by all others threads (i.e.,
> all
> >> - * unregistered non-EAL threads).
> >> - */
> >> -static struct rte_rand_state rand_states[RTE_MAX_LCORE + 1];
> >> +RTE_LCORE_VAR_HANDLE(struct rte_rand_state, rand_state);
> >> +
> >> +/* instance to be shared by all unregistered non-EAL threads */
> >> +static struct rte_rand_state unregistered_rand_state
> >> __rte_cache_aligned;
> >
> > The unregistered_rand_state instance is still __rte_cache_aligned;
> consider also adding an RTE_CACHE_GUARD to it.
> >
> 
> It shouldn't be cache-line aligned. I'll remove it. Thanks.

Agreed; that fix is just as good. Then,

Acked-by: Morten Brørup 



Re: [PATCH 3/3] net/ionic: add vdev support for embedded applications

2024-02-19 Thread Ferruh Yigit
On 2/16/2024 5:07 PM, Andrew Boyer wrote:
> Add support for running DPDK applications directly on AMD Pensando
> embedded HW. The platform exposes the device BARs through UIO. The
> UIO code in the common/ionic library walks the sysfs filesystem
> to identify the relevant BARs and map them into process memory.
> 
> The SoCs are named 'Capri' and 'Elba'.
> 
> The vdev device interface code is located in ionic_dev_vdev.c.
> 
> Some datapath operations are #ifdef-ed out to save on resources when
> running in embedded mode.
> 
> Some controlpath operations are skipped by the ionic_is_embedded()
> helper function.
> 
> Before ringing the doorbell, use an ARM 'dsb st' barrier. The normal
> barrier inside rte_write64() is insufficient on these devices due to
> a chip errata.
> 
> Signed-off-by: Andrew Boyer 
> Signed-off-by: Neel Patel 
> Signed-off-by: R Mohamed Shah 
> Signed-off-by: Alfredo Cardigliano 

<...>

> +static struct rte_vdev_driver rte_vdev_ionic_pmd = {
> + .probe = eth_ionic_vdev_probe,
> + .remove = eth_ionic_vdev_remove,
> +};
> +
> +RTE_PMD_REGISTER_VDEV(net_ionic, rte_vdev_ionic_pmd);
> +
> +static void
> +vdev_ionic_scan_cb(__rte_unused void *arg)
> +{
> + ionic_uio_scan_mnet_devices();
> +}
> +
> +RTE_INIT(vdev_ionic_custom_add)
> +{
> + rte_vdev_add_custom_scan(vdev_ionic_scan_cb, NULL);
> +}

Hi Andrew,

My understanding is 'rte_vdev_add_custom_scan()' to add a vdev
automatically (via rte_devargs_add()) before vdev scan starts.

As far as I can see you are not doing this, why callback is added?
Why not call 'ionic_uio_scan_mnet_devices()' within the
'eth_ionic_vdev_probe()'?


Re: [PATCH] net/gve: Change ERR to DEBUG to prevent flooding of logs for Tx-Dqo.

2024-02-19 Thread Stephen Hemminger
On Mon, 19 Feb 2024 02:44:35 +
Rushil Gupta  wrote:

> This was causing failure for testpmd runs (for queues >=15)
> presumably due to flooding of logs due to descriptor ring being
> overwritten.
> 
> Fixes: a01854 ("net/gve: fix dqo bug for chained descriptors")
> Cc: sta...@dpdk.org
> 
> Signed-off-by: Rushil Gupta 
> Reviewed-by: Joshua Washington 

Isn't this still an error. What about the descriptor overwritten is there an 
mbuf leak?
Maybe a statistic would be better than a message?


Re: [PATCH] lib/hash,lib/rcu: feature hidden key count in hash

2024-02-19 Thread Medvedkin, Vladimir

Hi Abdullah,

Could you please tell more about use cases where this API may be useful?


a new API to get the hidden key count in the hash table if the rcu qsbr is 
enabled


Here in commit message and down below in doxygen comments, I think this 
statement should be more specific because rcu can be created with 
RTE_HASH_QSBR_MODE_SYNC mode i.e. without defer queue.


Also, new API must be reflected in release notes

On 07/02/2024 15:33, Abdullah Ömer Yamaç wrote:

This patch introduce a new API to get the hidden key count in the hash
table if the rcu qsbr is enabled. When using rte_hash_count with rcu
qsbr enabled, it will return the number of elements that are not in the
free queue. Unless rte_rcu_qsbr_dq_reclaim is called, the number of
elements in the defer queue will not be counted and freed. Therefore I
added a new API to get the number of hidden (defer queue) elements
in the hash table. Then the user can calculate the total number of
elements that are available in the hash table.

Signed-off-by: Abdullah Ömer Yamaç 

---
Cc: Honnappa Nagarahalli 
Cc: Yipeng Wang 
Cc: Sameh Gobriel 
Cc: Bruce Richardson 
Cc: Vladimir Medvedkin 
---
  lib/hash/rte_cuckoo_hash.c |  9 +
  lib/hash/rte_hash.h| 13 +
  lib/hash/version.map   |  1 +
  lib/rcu/rte_rcu_qsbr.c |  8 
  lib/rcu/rte_rcu_qsbr.h | 11 +++
  lib/rcu/version.map|  1 +
  6 files changed, 43 insertions(+)

diff --git a/lib/hash/rte_cuckoo_hash.c b/lib/hash/rte_cuckoo_hash.c
index 70456754c4..3553f3efc7 100644
--- a/lib/hash/rte_cuckoo_hash.c
+++ b/lib/hash/rte_cuckoo_hash.c
@@ -555,6 +555,15 @@ rte_hash_max_key_id(const struct rte_hash *h)
return h->entries;
  }
  
+int32_t

+rte_hash_dq_count(const struct rte_hash *h)
+{
+   if (h->dq == NULL)
input arguments must be checked since this is a public API, the same is 
true for rte_rcu_qsbr_dq_count()

+   return -EINVAL;

why not just return 0?

+
+   return rte_rcu_qsbr_dq_count(h->dq);
+}
+
  int32_t
  rte_hash_count(const struct rte_hash *h)
  {
diff --git a/lib/hash/rte_hash.h b/lib/hash/rte_hash.h
index 7ecc02..8ea97e297d 100644
--- a/lib/hash/rte_hash.h
+++ b/lib/hash/rte_hash.h
@@ -193,6 +193,19 @@ rte_hash_free(struct rte_hash *h);
  void
  rte_hash_reset(struct rte_hash *h);
  
+

+/**
+ * Return the number of records in the defer queue of the hash table
+ * if RCU is enabled.
+ * @param h
+ *  Hash table to query from
+ * @return
+ *   - -EINVAL if parameters are invalid
+ *   - A value indicating how many records were inserted in the table.

did you mean how many records are kept in defer queue?

+ */
+int32_t
+rte_hash_dq_count(const struct rte_hash *h);
+
  /**
   * Return the number of keys in the hash table
   * @param h
diff --git a/lib/hash/version.map b/lib/hash/version.map
index 6b2afebf6b..7f7b158cf1 100644
--- a/lib/hash/version.map
+++ b/lib/hash/version.map
@@ -9,6 +9,7 @@ DPDK_24 {
rte_hash_add_key_with_hash;
rte_hash_add_key_with_hash_data;
rte_hash_count;
+   rte_hash_dq_count;
new API must introduced as an experimental API. The same is true for 
rte_rcu_qsbr_dq_count()

rte_hash_crc32_alg;
rte_hash_crc_set_alg;
rte_hash_create;
diff --git a/lib/rcu/rte_rcu_qsbr.c b/lib/rcu/rte_rcu_qsbr.c
index bd0b83be0c..89f8da4c4c 100644
--- a/lib/rcu/rte_rcu_qsbr.c
+++ b/lib/rcu/rte_rcu_qsbr.c
@@ -450,6 +450,14 @@ rte_rcu_qsbr_dq_reclaim(struct rte_rcu_qsbr_dq *dq, 
unsigned int n,
return 0;
  }
  
+/**

+ * Return the number of entries in a defer queue.
+ */
+unsigned int rte_rcu_qsbr_dq_count(struct rte_rcu_qsbr_dq *dq)
+{
+   return rte_ring_count(dq->r);
+}
+
  /* Delete a defer queue. */
  int
  rte_rcu_qsbr_dq_delete(struct rte_rcu_qsbr_dq *dq)
diff --git a/lib/rcu/rte_rcu_qsbr.h b/lib/rcu/rte_rcu_qsbr.h
index 23c9f89805..ed5a590edd 100644
--- a/lib/rcu/rte_rcu_qsbr.h
+++ b/lib/rcu/rte_rcu_qsbr.h
@@ -794,6 +794,17 @@ int
  rte_rcu_qsbr_dq_reclaim(struct rte_rcu_qsbr_dq *dq, unsigned int n,
unsigned int *freed, unsigned int *pending, unsigned int *available);
  
+/**

+ * Return the number of entries in a defer queue.
+ *
+ * @param dq
+ *   Defer queue.
+ * @return
+ *   The number of entries in the defer queue.
+ */
+unsigned int
+rte_rcu_qsbr_dq_count(struct rte_rcu_qsbr_dq *dq);
+
  /**
   * Delete a defer queue.
   *
diff --git a/lib/rcu/version.map b/lib/rcu/version.map
index 982ffd59d9..f410ab41e7 100644
--- a/lib/rcu/version.map
+++ b/lib/rcu/version.map
@@ -5,6 +5,7 @@ DPDK_24 {
rte_rcu_qsbr_dq_create;
rte_rcu_qsbr_dq_delete;
rte_rcu_qsbr_dq_enqueue;
+   rte_rcu_qsbr_dq_count;
rte_rcu_qsbr_dq_reclaim;
rte_rcu_qsbr_dump;
rte_rcu_qsbr_get_memsize;


--
Regards,
Vladimir



Re: [PATCH] lib/hash,lib/rcu: feature hidden key count in hash

2024-02-19 Thread Abdullah Ömer Yamaç
Hello,

Let me explain a use case;

I have a hash table whose key value is IP addresses, and data (let's say
the username of the IP) is related to the IP address. The key point is
matching these data with flows. Flows are dynamic, and this hash table is
dynamic, as well; both can change anytime. For example, when a flow starts,
we look up the hash table with the corresponding IP and retrieve the
username. We need to hold this username until the flow terminates, although
we removed this IP key from the hash table (multithread). That's why we
have RCU and defer queue is necessary for high performance. In my
application, I need to know the number of IP-username entries. These
numbers can be calculated by rte_hash_count - defer queue size.

I think if you need a non-blocking and multithreaded hash table, an
RCU-enabled hash table is necessary. Also, this API is necessary if you
need to get the actual matchable size.





On Mon, Feb 19, 2024 at 8:36 PM Medvedkin, Vladimir <
vladimir.medved...@intel.com> wrote:

> Hi Abdullah,
>
> Could you please tell more about use cases where this API may be useful?
>
> >a new API to get the hidden key count in the hash table if the rcu qsbr
> is enabled
>
> Here in commit message and down below in doxygen comments, I think this
> statement should be more specific because rcu can be created with
> RTE_HASH_QSBR_MODE_SYNC mode i.e. without defer queue.
>
> Also, new API must be reflected in release notes
>
> On 07/02/2024 15:33, Abdullah Ömer Yamaç wrote:
> > This patch introduce a new API to get the hidden key count in the hash
> > table if the rcu qsbr is enabled. When using rte_hash_count with rcu
> > qsbr enabled, it will return the number of elements that are not in the
> > free queue. Unless rte_rcu_qsbr_dq_reclaim is called, the number of
> > elements in the defer queue will not be counted and freed. Therefore I
> > added a new API to get the number of hidden (defer queue) elements
> > in the hash table. Then the user can calculate the total number of
> > elements that are available in the hash table.
> >
> > Signed-off-by: Abdullah Ömer Yamaç 
> >
> > ---
> > Cc: Honnappa Nagarahalli 
> > Cc: Yipeng Wang 
> > Cc: Sameh Gobriel 
> > Cc: Bruce Richardson 
> > Cc: Vladimir Medvedkin 
> > ---
> >   lib/hash/rte_cuckoo_hash.c |  9 +
> >   lib/hash/rte_hash.h| 13 +
> >   lib/hash/version.map   |  1 +
> >   lib/rcu/rte_rcu_qsbr.c |  8 
> >   lib/rcu/rte_rcu_qsbr.h | 11 +++
> >   lib/rcu/version.map|  1 +
> >   6 files changed, 43 insertions(+)
> >
> > diff --git a/lib/hash/rte_cuckoo_hash.c b/lib/hash/rte_cuckoo_hash.c
> > index 70456754c4..3553f3efc7 100644
> > --- a/lib/hash/rte_cuckoo_hash.c
> > +++ b/lib/hash/rte_cuckoo_hash.c
> > @@ -555,6 +555,15 @@ rte_hash_max_key_id(const struct rte_hash *h)
> >   return h->entries;
> >   }
> >
> > +int32_t
> > +rte_hash_dq_count(const struct rte_hash *h)
> > +{
> > + if (h->dq == NULL)
> input arguments must be checked since this is a public API, the same is
> true for rte_rcu_qsbr_dq_count()
> > + return -EINVAL;
> why not just return 0?
> > +
> > + return rte_rcu_qsbr_dq_count(h->dq);
> > +}
> > +
> >   int32_t
> >   rte_hash_count(const struct rte_hash *h)
> >   {
> > diff --git a/lib/hash/rte_hash.h b/lib/hash/rte_hash.h
> > index 7ecc02..8ea97e297d 100644
> > --- a/lib/hash/rte_hash.h
> > +++ b/lib/hash/rte_hash.h
> > @@ -193,6 +193,19 @@ rte_hash_free(struct rte_hash *h);
> >   void
> >   rte_hash_reset(struct rte_hash *h);
> >
> > +
> > +/**
> > + * Return the number of records in the defer queue of the hash table
> > + * if RCU is enabled.
> > + * @param h
> > + *  Hash table to query from
> > + * @return
> > + *   - -EINVAL if parameters are invalid
> > + *   - A value indicating how many records were inserted in the table.
> did you mean how many records are kept in defer queue?
> > + */
> > +int32_t
> > +rte_hash_dq_count(const struct rte_hash *h);
> > +
> >   /**
> >* Return the number of keys in the hash table
> >* @param h
> > diff --git a/lib/hash/version.map b/lib/hash/version.map
> > index 6b2afebf6b..7f7b158cf1 100644
> > --- a/lib/hash/version.map
> > +++ b/lib/hash/version.map
> > @@ -9,6 +9,7 @@ DPDK_24 {
> >   rte_hash_add_key_with_hash;
> >   rte_hash_add_key_with_hash_data;
> >   rte_hash_count;
> > + rte_hash_dq_count;
> new API must introduced as an experimental API. The same is true for
> rte_rcu_qsbr_dq_count()
> >   rte_hash_crc32_alg;
> >   rte_hash_crc_set_alg;
> >   rte_hash_create;
> > diff --git a/lib/rcu/rte_rcu_qsbr.c b/lib/rcu/rte_rcu_qsbr.c
> > index bd0b83be0c..89f8da4c4c 100644
> > --- a/lib/rcu/rte_rcu_qsbr.c
> > +++ b/lib/rcu/rte_rcu_qsbr.c
> > @@ -450,6 +450,14 @@ rte_rcu_qsbr_dq_reclaim(struct rte_rcu_qsbr_dq *dq,
> unsigned int n,
> >   return 0;
> >   }
> >
> > +/**
> > + * Return the number of entries in a defer queue.
> > + */
> > +unsigned int rt

Re: [PATCH 3/3] net/ionic: add vdev support for embedded applications

2024-02-19 Thread Boyer, Andrew


On Feb 19, 2024, at 10:24 AM, Yigit, Ferruh  wrote:

On 2/16/2024 5:07 PM, Andrew Boyer wrote:
Add support for running DPDK applications directly on AMD Pensando
embedded HW. The platform exposes the device BARs through UIO. The
UIO code in the common/ionic library walks the sysfs filesystem
to identify the relevant BARs and map them into process memory.

The SoCs are named 'Capri' and 'Elba'.

The vdev device interface code is located in ionic_dev_vdev.c.

Some datapath operations are #ifdef-ed out to save on resources when
running in embedded mode.

Some controlpath operations are skipped by the ionic_is_embedded()
helper function.

Before ringing the doorbell, use an ARM 'dsb st' barrier. The normal
barrier inside rte_write64() is insufficient on these devices due to
a chip errata.

Signed-off-by: Andrew Boyer 
Signed-off-by: Neel Patel 
Signed-off-by: R Mohamed Shah 
Signed-off-by: Alfredo Cardigliano 

<...>

+static struct rte_vdev_driver rte_vdev_ionic_pmd = {
+ .probe = eth_ionic_vdev_probe,
+ .remove = eth_ionic_vdev_remove,
+};
+
+RTE_PMD_REGISTER_VDEV(net_ionic, rte_vdev_ionic_pmd);
+
+static void
+vdev_ionic_scan_cb(__rte_unused void *arg)
+{
+ ionic_uio_scan_mnet_devices();
+}
+
+RTE_INIT(vdev_ionic_custom_add)
+{
+ rte_vdev_add_custom_scan(vdev_ionic_scan_cb, NULL);
+}

Hi Andrew,

My understanding is 'rte_vdev_add_custom_scan()' to add a vdev
automatically (via rte_devargs_add()) before vdev scan starts.

As far as I can see you are not doing this, why callback is added?
Why not call 'ionic_uio_scan_mnet_devices()' within the
'eth_ionic_vdev_probe()'?

I think you are correct and our scan should be restricted to the vdev_probe 
function. Otherwise it will run in all cases, even on irrelevant hardware.

Thanks!
Andrew



RE: [PATCH 2/3] net/ionic: remove duplicate barriers

2024-02-19 Thread Wathsala Wathawana Vithanage
Hi Andrew,

I think that this barrier may have been added to ensure any writes to 
q->hw_index
and q->head_idx complete before ionic_q_flush computes val. Dependency chains
can also prevent reordering if that's the case this barrier is not required.
However, I have the following concern.

> diff --git a/drivers/net/ionic/ionic_main.c b/drivers/net/ionic/ionic_main.c
> index 1f24f64a33..814bb3b8f4 100644
> --- a/drivers/net/ionic/ionic_main.c
> +++ b/drivers/net/ionic/ionic_main.c
> @@ -223,7 +223,6 @@ ionic_adminq_post(struct ionic_lif *lif, struct
> ionic_admin_ctx *ctx)
>   q->head_idx = Q_NEXT_TO_POST(q, 1);
> 
>   /* Ring doorbell */
> - rte_wmb();
>   ionic_q_flush(q);
> 
>  err_out:

Ionic_q_flush(q) uses q->hw_index and q->head_idx to compute the value of val 
which
it writes to the address stored in q->db. I can see that q->head_idx is updated 
before
the removed rte_wmb(), therefore it's safe to assume that 
" q->head_idx = Q_NEXT_TO_POST(q, 1)" and 
"val = IONIC_DBELL_QID(q->hw_index) | q->head_idx" will maintain the program 
order
due to that dependency. But I don't know if there exists a dependency chain 
over q->hw_index. If not, then that may have been the motive behind this 
barrier.

> diff --git a/drivers/net/ionic/ionic_rxtx_sg.c
> b/drivers/net/ionic/ionic_rxtx_sg.c
> index e8dec99c04..3820fd850f 100644
> --- a/drivers/net/ionic/ionic_rxtx_sg.c
> +++ b/drivers/net/ionic/ionic_rxtx_sg.c
> @@ -218,7 +218,6 @@ ionic_xmit_pkts_sg(void *tx_queue, struct rte_mbuf
> **tx_pkts,
>   }
> 
>   if (nb_tx > 0) {
> - rte_wmb();
>   ionic_txq_flush(q);
> 
>   txq->last_wdog_cycles = rte_get_timer_cycles(); diff --git
> a/drivers/net/ionic/ionic_rxtx_simple.c
> b/drivers/net/ionic/ionic_rxtx_simple.c
> index 9674b4d1df..4c9b9415ad 100644
> --- a/drivers/net/ionic/ionic_rxtx_simple.c
> +++ b/drivers/net/ionic/ionic_rxtx_simple.c
> @@ -191,7 +191,6 @@ ionic_xmit_pkts(void *tx_queue, struct rte_mbuf
> **tx_pkts,
>   }
> 
>   if (nb_tx > 0) {
> - rte_wmb();
>   ionic_txq_flush(q);
> 
>   txq->last_wdog_cycles = rte_get_timer_cycles();
> --
> 2.17.1


Re: [V1 0/5] support VXLAN-GPE header fields(flags, rsvd0 and rsvd1) matching

2024-02-19 Thread Thomas Monjalon
19/02/2024 20:50, Stephen Hemminger:
> On Fri, 12 Jan 2024 10:02:05 +0200
> Gavin Li  wrote:
> 
> > Previously, VXLAN-GPE in DPDK only supports VNI and next protocol header
> > fields. This patch series add support for flags and reserved field 0 and
> > 1.
> > 
> > Below is the VXLAN-GPE header defined in the lasted draft.
> > 0   1   2   3
> > 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
> >+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
> >|R|R|Ver|I|P|B|O|   Reserved|Next Protocol  |
> >+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
> >|VXLAN Network Identifier (VNI) |   Reserved|
> >+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
> 
> Would recommend against implementing anything in a draft RFC.
> Things can change.  Learned the hard way when doing VXLAN driver for Linux.
> The hardcoded port value in the Linux VXLAN driver is wrong because it matched
> the draft RFC (got changed in final version). Because of strict compatibility
> requirements the Linux driver could not be changed to the correct value.

The problem is that standardization may be slow.
Would it be acceptable without any compatibility guarantee?




Re: [PATCH] lib/hash,lib/rcu: feature hidden key count in hash

2024-02-19 Thread Honnappa Nagarahalli


> On Feb 19, 2024, at 3:28 PM, Abdullah Ömer Yamaç  wrote:
> 
> Hello,
> 
> Let me explain a use case;
> 
> I have a hash table whose key value is IP addresses, and data (let's say the 
> username of the IP) is related to the IP address. The key point is matching 
> these data with flows. Flows are dynamic, and this hash table is dynamic, as 
> well; both can change anytime. For example, when a flow starts, we look up 
> the hash table with the corresponding IP and retrieve the username. We need 
> to hold this username until the flow terminates, although we removed this IP 
> key from the hash table (multithread). That's why we have RCU and defer queue 
> is necessary for high performance. In my application, I need to know the 
> number of IP-username entries. These numbers can be calculated by 
> rte_hash_count - defer queue size.
The entries in the defer queue are not reclaimed (there is a probability that 
all of them can be reclaimed) and hence they are not available for allocation. 
So, rte_hash_count - defer queue size might not give you the correct number you 
are expecting.

Currently, there is no API in hash library that forces a reclaim. Does it makes 
sense to have an API that just does the reclaim (and returns the number of 
entries pending in the defer queue)? A call to rte_hash_count should provide 
the exact count you are looking for.

> 
> I think if you need a non-blocking and multithreaded hash table, an 
> RCU-enabled hash table is necessary. Also, this API is necessary if you need 
> to get the actual matchable size.
> 
> 
> 
> 
> 
> On Mon, Feb 19, 2024 at 8:36 PM Medvedkin, Vladimir 
>  wrote:
> Hi Abdullah,
> 
> Could you please tell more about use cases where this API may be useful?
> 
> >a new API to get the hidden key count in the hash table if the rcu qsbr is 
> >enabled
> 
> Here in commit message and down below in doxygen comments, I think this 
> statement should be more specific because rcu can be created with 
> RTE_HASH_QSBR_MODE_SYNC mode i.e. without defer queue.
> 
> Also, new API must be reflected in release notes
> 
> On 07/02/2024 15:33, Abdullah Ömer Yamaç wrote:
> > This patch introduce a new API to get the hidden key count in the hash
> > table if the rcu qsbr is enabled. When using rte_hash_count with rcu
> > qsbr enabled, it will return the number of elements that are not in the
> > free queue. Unless rte_rcu_qsbr_dq_reclaim is called, the number of
> > elements in the defer queue will not be counted and freed. Therefore I
> > added a new API to get the number of hidden (defer queue) elements
> > in the hash table. Then the user can calculate the total number of
> > elements that are available in the hash table.
> >
> > Signed-off-by: Abdullah Ömer Yamaç 
> >
> > ---
> > Cc: Honnappa Nagarahalli 
> > Cc: Yipeng Wang 
> > Cc: Sameh Gobriel 
> > Cc: Bruce Richardson 
> > Cc: Vladimir Medvedkin 
> > ---
> >   lib/hash/rte_cuckoo_hash.c |  9 +
> >   lib/hash/rte_hash.h| 13 +
> >   lib/hash/version.map   |  1 +
> >   lib/rcu/rte_rcu_qsbr.c |  8 
> >   lib/rcu/rte_rcu_qsbr.h | 11 +++
> >   lib/rcu/version.map|  1 +
> >   6 files changed, 43 insertions(+)
> >
> > diff --git a/lib/hash/rte_cuckoo_hash.c b/lib/hash/rte_cuckoo_hash.c
> > index 70456754c4..3553f3efc7 100644
> > --- a/lib/hash/rte_cuckoo_hash.c
> > +++ b/lib/hash/rte_cuckoo_hash.c
> > @@ -555,6 +555,15 @@ rte_hash_max_key_id(const struct rte_hash *h)
> >   return h->entries;
> >   }
> >   
> > +int32_t
> > +rte_hash_dq_count(const struct rte_hash *h)
> > +{
> > + if (h->dq == NULL)
> input arguments must be checked since this is a public API, the same is 
> true for rte_rcu_qsbr_dq_count()
> > + return -EINVAL;
> why not just return 0?
> > +
> > + return rte_rcu_qsbr_dq_count(h->dq);
> > +}
> > +
> >   int32_t
> >   rte_hash_count(const struct rte_hash *h)
> >   {
> > diff --git a/lib/hash/rte_hash.h b/lib/hash/rte_hash.h
> > index 7ecc02..8ea97e297d 100644
> > --- a/lib/hash/rte_hash.h
> > +++ b/lib/hash/rte_hash.h
> > @@ -193,6 +193,19 @@ rte_hash_free(struct rte_hash *h);
> >   void
> >   rte_hash_reset(struct rte_hash *h);
> >   
> > +
> > +/**
> > + * Return the number of records in the defer queue of the hash table
> > + * if RCU is enabled.
> > + * @param h
> > + *  Hash table to query from
> > + * @return
> > + *   - -EINVAL if parameters are invalid
> > + *   - A value indicating how many records were inserted in the table.
> did you mean how many records are kept in defer queue?
> > + */
> > +int32_t
> > +rte_hash_dq_count(const struct rte_hash *h);
> > +
> >   /**
> >* Return the number of keys in the hash table
> >* @param h
> > diff --git a/lib/hash/version.map b/lib/hash/version.map
> > index 6b2afebf6b..7f7b158cf1 100644
> > --- a/lib/hash/version.map
> > +++ b/lib/hash/version.map
> > @@ -9,6 +9,7 @@ DPDK_24 {
> >   rte_hash_add_key_with_hash;
> >   rte_hash_add_key_with_hash

[PATCH v2 1/4] config/arm: add Neoverse V2 part number

2024-02-19 Thread Honnappa Nagarahalli
Add Arm Neoverse V2 CPU part number

Signed-off-by: Honnappa Nagarahalli 
Acked-by: Ruifeng Wang 
Reviewed-by: Wathsala Vithanage 
---
 config/arm/meson.build | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/config/arm/meson.build b/config/arm/meson.build
index 36f21d2259..18b595ead1 100644
--- a/config/arm/meson.build
+++ b/config/arm/meson.build
@@ -100,6 +100,16 @@ part_number_config_arm = {
 ['RTE_MAX_LCORE', 128],
 ['RTE_MAX_NUMA_NODES', 2]
 ]
+},
+'0xd4f': {
+'march_features': ['sve2'],
+'compiler_options': ['-mcpu=neoverse-v2'],
+'flags': [
+['RTE_MACHINE', '"neoverse-v2"'],
+['RTE_ARM_FEATURE_ATOMICS', true],
+['RTE_MAX_LCORE', 144],
+['RTE_MAX_NUMA_NODES', 2]
+]
 }
 }
 implementer_arm = {
-- 
2.34.1



[PATCH v2 2/4] config/arm: add generic V2 SoC

2024-02-19 Thread Honnappa Nagarahalli
Add generic V2 CPU SoC. This will allow for compiling a binary
that will run on any SoC that uses V2 CPU.

Signed-off-by: Honnappa Nagarahalli 
Reviewed-by: Wathsala Vithanage 
---
 config/arm/arm64_v2_linux_gcc | 16 
 config/arm/meson.build|  9 +
 2 files changed, 25 insertions(+)
 create mode 100644 config/arm/arm64_v2_linux_gcc

diff --git a/config/arm/arm64_v2_linux_gcc b/config/arm/arm64_v2_linux_gcc
new file mode 100644
index 00..50d9be3da3
--- /dev/null
+++ b/config/arm/arm64_v2_linux_gcc
@@ -0,0 +1,16 @@
+[binaries]
+c = ['ccache', 'aarch64-linux-gnu-gcc']
+cpp = ['ccache', 'aarch64-linux-gnu-g++']
+ar = 'aarch64-linux-gnu-gcc-ar'
+strip = 'aarch64-linux-gnu-strip'
+pkgconfig = 'aarch64-linux-gnu-pkg-config'
+pcap-config = ''
+
+[host_machine]
+system = 'linux'
+cpu_family = 'aarch64'
+cpu = 'armv9-a'
+endian = 'little'
+
+[properties]
+platform = 'v2'
diff --git a/config/arm/meson.build b/config/arm/meson.build
index 18b595ead1..f096ed9ebf 100644
--- a/config/arm/meson.build
+++ b/config/arm/meson.build
@@ -527,6 +527,13 @@ soc_bluefield3 = {
'numa': false
 }
 
+soc_v2 = {
+'description': 'Arm Neoverse V2',
+'implementer': '0x41',
+'part_number': '0xd4f',
+'numa': true
+}
+
 '''
 Start of SoCs list
 generic: Generic un-optimized build for armv8 aarch64 execution mode.
@@ -555,6 +562,7 @@ stingray:Broadcom Stingray
 thunderx2:   Marvell ThunderX2 T99
 thunderxt88: Marvell ThunderX T88
 thunderxt83: Marvell ThunderX T83
+v2:  Arm Neoverse V2
 End of SoCs list
 '''
 # The string above is included in the documentation, keep it in sync with the
@@ -586,6 +594,7 @@ socs = {
 'thunderx2': soc_thunderx2,
 'thunderxt88': soc_thunderxt88,
 'thunderxt83': soc_thunderxt83,
+'v2': soc_v2,
 }
 
 dpdk_conf.set('RTE_ARCH_ARM', 1)
-- 
2.34.1



[PATCH v2 3/4] config/arm: add NVIDIA Grace CPU

2024-02-19 Thread Honnappa Nagarahalli
Add meson build configuration for NVIDIA Grace platform
with 64-bit Arm Neoverse V2 cores.

Signed-off-by: Honnappa Nagarahalli 
Acked-by: Ruifeng Wang 
Reviewed-by: Wathsala Vithanage 
---
 config/arm/arm64_grace_linux_gcc | 16 
 config/arm/meson.build   | 10 ++
 2 files changed, 26 insertions(+)
 create mode 100644 config/arm/arm64_grace_linux_gcc

diff --git a/config/arm/arm64_grace_linux_gcc b/config/arm/arm64_grace_linux_gcc
new file mode 100644
index 00..bde55b17a8
--- /dev/null
+++ b/config/arm/arm64_grace_linux_gcc
@@ -0,0 +1,16 @@
+[binaries]
+c = ['ccache', 'aarch64-linux-gnu-gcc']
+cpp = ['ccache', 'aarch64-linux-gnu-g++']
+ar = 'aarch64-linux-gnu-gcc-ar'
+strip = 'aarch64-linux-gnu-strip'
+pkgconfig = 'aarch64-linux-gnu-pkg-config'
+pcap-config = ''
+
+[host_machine]
+system = 'linux'
+cpu_family = 'aarch64'
+cpu = 'armv9-a'
+endian = 'little'
+
+[properties]
+platform = 'grace'
diff --git a/config/arm/meson.build b/config/arm/meson.build
index f096ed9ebf..606d8942ca 100644
--- a/config/arm/meson.build
+++ b/config/arm/meson.build
@@ -424,6 +424,14 @@ soc_tys2500 = {
 'numa': true
 }
 
+soc_grace = {
+'description': 'NVIDIA Grace',
+'implementer': '0x41',
+'part_number': '0xd4f',
+'extra_march_features': ['crypto'],
+'numa': true
+}
+
 soc_graviton2 = {
 'description': 'AWS Graviton2',
 'implementer': '0x41',
@@ -551,6 +559,7 @@ dpaa:NXP DPAA
 emag:Ampere eMAG
 ft2000plus:  Phytium FT-2000+
 tys2500: Phytium TengYun S2500
+grace:   NVIDIA Grace
 graviton2:   AWS Graviton2
 graviton3:   AWS Graviton3
 hip10:   HiSilicon HIP10
@@ -583,6 +592,7 @@ socs = {
 'emag': soc_emag,
 'ft2000plus': soc_ft2000plus,
 'tys2500': soc_tys2500,
+'grace': soc_grace,
 'graviton2': soc_graviton2,
 'graviton3': soc_graviton3,
 'hip10': soc_hip10,
-- 
2.34.1



[PATCH v2 4/4] config/arm: add AWS Graviton4 CPU

2024-02-19 Thread Honnappa Nagarahalli
Add meson build configuration for AWS Graviton4 platform
with 64-bit Arm Neoverse V2 cores.

Signed-off-by: Honnappa Nagarahalli 
Reviewed-by: Wathsala Vithanage 
---
 config/arm/arm64_graviton4_linux_gcc | 16 
 config/arm/meson.build   | 14 ++
 2 files changed, 30 insertions(+)
 create mode 100644 config/arm/arm64_graviton4_linux_gcc

diff --git a/config/arm/arm64_graviton4_linux_gcc 
b/config/arm/arm64_graviton4_linux_gcc
new file mode 100644
index 00..839224bca7
--- /dev/null
+++ b/config/arm/arm64_graviton4_linux_gcc
@@ -0,0 +1,16 @@
+[binaries]
+c = ['ccache', 'aarch64-linux-gnu-gcc']
+cpp = ['ccache', 'aarch64-linux-gnu-g++']
+ar = 'aarch64-linux-gnu-gcc-ar'
+strip = 'aarch64-linux-gnu-strip'
+pkgconfig = 'aarch64-linux-gnu-pkg-config'
+pcap-config = ''
+
+[host_machine]
+system = 'linux'
+cpu_family = 'aarch64'
+cpu = 'armv9-a'
+endian = 'little'
+
+[properties]
+platform = 'graviton4'
diff --git a/config/arm/meson.build b/config/arm/meson.build
index 606d8942ca..075d6d8e88 100644
--- a/config/arm/meson.build
+++ b/config/arm/meson.build
@@ -447,6 +447,18 @@ soc_graviton3 = {
 'numa': false
 }
 
+soc_graviton4 = {
+'description': 'AWS Graviton4',
+'implementer': '0x41',
+'part_number': '0xd4f',
+'extra_march_features': ['crypto'],
+'flags': [
+['RTE_MAX_LCORE', 96],
+['RTE_MAX_NUMA_NODES', 1]
+],
+'numa': false
+}
+
 soc_hip10 = {
 'description': 'HiSilicon HIP10',
 'implementer': '0x48',
@@ -562,6 +574,7 @@ tys2500: Phytium TengYun S2500
 grace:   NVIDIA Grace
 graviton2:   AWS Graviton2
 graviton3:   AWS Graviton3
+graviton4:   AWS Graviton4
 hip10:   HiSilicon HIP10
 kunpeng920:  HiSilicon Kunpeng 920
 kunpeng930:  HiSilicon Kunpeng 930
@@ -595,6 +608,7 @@ socs = {
 'grace': soc_grace,
 'graviton2': soc_graviton2,
 'graviton3': soc_graviton3,
+'graviton4': soc_graviton4,
 'hip10': soc_hip10,
 'kunpeng920': soc_kunpeng920,
 'kunpeng930': soc_kunpeng930,
-- 
2.34.1



RE: [PATCH 2/3] net/ionic: remove duplicate barriers

2024-02-19 Thread Wathsala Wathawana Vithanage



> -Original Message-
> From: Wathsala Wathawana Vithanage 
> Sent: Monday, February 19, 2024 4:17 PM
> To: Andrew Boyer ; dev@dpdk.org
> Cc: Neel Patel ; nd ; Honnappa
> Nagarahalli ; nd 
> Subject: RE: [PATCH 2/3] net/ionic: remove duplicate barriers
> 
> Hi Andrew,
> 
> I think that this barrier may have been added to ensure any writes to q-
> >hw_index and q->head_idx complete before ionic_q_flush computes val.
> Dependency chains can also prevent reordering if that's the case this barrier 
> is
> not required.
> However, I have the following concern.
> 
> > diff --git a/drivers/net/ionic/ionic_main.c
> > b/drivers/net/ionic/ionic_main.c index 1f24f64a33..814bb3b8f4 100644
> > --- a/drivers/net/ionic/ionic_main.c
> > +++ b/drivers/net/ionic/ionic_main.c
> > @@ -223,7 +223,6 @@ ionic_adminq_post(struct ionic_lif *lif, struct
> > ionic_admin_ctx *ctx)
> > q->head_idx = Q_NEXT_TO_POST(q, 1);
> >
> > /* Ring doorbell */
> > -   rte_wmb();
> > ionic_q_flush(q);
> >
> >  err_out:
> 
> Ionic_q_flush(q) uses q->hw_index and q->head_idx to compute the value of
> val which it writes to the address stored in q->db. I can see that 
> q->head_idx is
> updated before the removed rte_wmb(), therefore it's safe to assume that " q-
> >head_idx = Q_NEXT_TO_POST(q, 1)" and "val = IONIC_DBELL_QID(q-
> >hw_index) | q->head_idx" will maintain the program order due to that
> dependency. But I don't know if there exists a dependency chain over q-
> >hw_index. If not, then that may have been the motive behind this barrier.
> 

Since q->hw_index is also updated in the same CPU ionic_q_flush will
always see the correct value, consequently val should be always correct. 
It's safe to remove this barrier.

> > diff --git a/drivers/net/ionic/ionic_rxtx_sg.c
> > b/drivers/net/ionic/ionic_rxtx_sg.c
> > index e8dec99c04..3820fd850f 100644
> > --- a/drivers/net/ionic/ionic_rxtx_sg.c
> > +++ b/drivers/net/ionic/ionic_rxtx_sg.c
> > @@ -218,7 +218,6 @@ ionic_xmit_pkts_sg(void *tx_queue, struct
> rte_mbuf
> > **tx_pkts,
> > }
> >
> > if (nb_tx > 0) {
> > -   rte_wmb();
> > ionic_txq_flush(q);
> >
> > txq->last_wdog_cycles = rte_get_timer_cycles(); diff --git
> > a/drivers/net/ionic/ionic_rxtx_simple.c
> > b/drivers/net/ionic/ionic_rxtx_simple.c
> > index 9674b4d1df..4c9b9415ad 100644
> > --- a/drivers/net/ionic/ionic_rxtx_simple.c
> > +++ b/drivers/net/ionic/ionic_rxtx_simple.c
> > @@ -191,7 +191,6 @@ ionic_xmit_pkts(void *tx_queue, struct rte_mbuf
> > **tx_pkts,
> > }
> >
> > if (nb_tx > 0) {
> > -   rte_wmb();
> > ionic_txq_flush(q);
> >
> > txq->last_wdog_cycles = rte_get_timer_cycles();
> > --
> > 2.17.1


Re: [PATCH 3/3] net/ionic: add vdev support for embedded applications

2024-02-19 Thread Honnappa Nagarahalli


> On Feb 16, 2024, at 11:07 AM, Andrew Boyer  wrote:
> 
> Add support for running DPDK applications directly on AMD Pensando
> embedded HW. The platform exposes the device BARs through UIO. The
> UIO code in the common/ionic library walks the sysfs filesystem
> to identify the relevant BARs and map them into process memory.
> 
> The SoCs are named 'Capri' and 'Elba'.
> 
> The vdev device interface code is located in ionic_dev_vdev.c.
> 
> Some datapath operations are #ifdef-ed out to save on resources when
> running in embedded mode.
> 
> Some controlpath operations are skipped by the ionic_is_embedded()
> helper function.
> 
> Before ringing the doorbell, use an ARM 'dsb st' barrier. The normal
> barrier inside rte_write64() is insufficient on these devices due to
> a chip errata.
> 
> Signed-off-by: Andrew Boyer 
> Signed-off-by: Neel Patel 
> Signed-off-by: R Mohamed Shah 
> Signed-off-by: Alfredo Cardigliano 
> ---
> config/arm/arm64_capri_linux_gcc   |  16 +++
> config/arm/arm64_elba_linux_gcc|  16 +++
> config/arm/meson.build |  42 
> drivers/net/ionic/ionic.h  |   2 +-
> drivers/net/ionic/ionic_dev.h  |  17 
> drivers/net/ionic/ionic_dev_vdev.c | 156 +
> drivers/net/ionic/ionic_ethdev.c   |   7 ++
> drivers/net/ionic/ionic_lif.c  |  19 
> drivers/net/ionic/ionic_rxtx.h |   4 +
> drivers/net/ionic/meson.build  |   1 +
> 10 files changed, 279 insertions(+), 1 deletion(-)
> create mode 100644 config/arm/arm64_capri_linux_gcc
> create mode 100644 config/arm/arm64_elba_linux_gcc
> create mode 100644 drivers/net/ionic/ionic_dev_vdev.c
> 
> diff --git a/config/arm/arm64_capri_linux_gcc 
> b/config/arm/arm64_capri_linux_gcc
> new file mode 100644
> index 00..1a6313e684
> --- /dev/null
> +++ b/config/arm/arm64_capri_linux_gcc
> @@ -0,0 +1,16 @@
> +[binaries]
> +c = ['ccache', 'aarch64-linux-gnu-gcc']
> +cpp = ['ccache', 'aarch64-linux-gnu-g++']
> +ar = 'aarch64-linux-gnu-gcc-ar'
> +strip = 'aarch64-linux-gnu-strip'
> +pkgconfig = 'aarch64-linux-gnu-pkg-config'
> +pcap-config = ''
> +
> +[host_machine]
> +system = 'linux'
> +cpu_family = 'aarch64'
> +cpu = 'armv8-a'
> +endian = 'little'
> +
> +[properties]
> +platform = 'capri'
> diff --git a/config/arm/arm64_elba_linux_gcc b/config/arm/arm64_elba_linux_gcc
> new file mode 100644
> index 00..4d891bd5a7
> --- /dev/null
> +++ b/config/arm/arm64_elba_linux_gcc
> @@ -0,0 +1,16 @@
> +[binaries]
> +c = ['ccache', 'aarch64-linux-gnu-gcc']
> +cpp = ['ccache', 'aarch64-linux-gnu-g++']
> +ar = 'aarch64-linux-gnu-gcc-ar'
> +strip = 'aarch64-linux-gnu-strip'
> +pkgconfig = 'aarch64-linux-gnu-pkg-config'
> +pcap-config = ''
> +
> +[host_machine]
> +system = 'linux'
> +cpu_family = 'aarch64'
> +cpu = 'armv8-a'
> +endian = 'little'
> +
> +[properties]
> +platform = 'elba'
> diff --git a/config/arm/meson.build b/config/arm/meson.build
> index 36f21d2259..2326021fed 100644
> --- a/config/arm/meson.build
> +++ b/config/arm/meson.build
> @@ -165,6 +165,33 @@ implementer_cavium = {
> }
> }
> 
> +implementer_ionic = {
> +'description': 'AMD Pensando',
> +'flags': [
> +['RTE_MAX_NUMA_NODES', 1],
> +['RTE_CACHE_LINE_SIZE', 64],
> +['RTE_LIBRTE_VHOST_NUMA', false],
> +['RTE_EAL_NUMA_AWARE_HUGEPAGES', false],
> +['RTE_LIBRTE_IONIC_PMD_EMBEDDED', true],
> +],
> +'part_number_config': {
> +'0xc1': {
> +'compiler_options':  ['-mcpu=cortex-a72'],
> +'flags': [
> +['RTE_MAX_LCORE', 4],
> +['RTE_LIBRTE_IONIC_PMD_BARRIER_ERRATA', true],
> +]
> +},
> +'0xc2': {
> +'compiler_options':  ['-mcpu=cortex-a72'],
> +'flags': [
> +['RTE_MAX_LCORE', 16],
> +['RTE_LIBRTE_IONIC_PMD_BARRIER_ERRATA', true],
> +]
> +}
> +}
> +}

Can you place it such that it is ordered alphabetically? (I understand that 
currently things are not ordered alphabetically, I have plans to fix that)

> +
> implementer_ampere = {
> 'description': 'Ampere Computing',
> 'flags': [
> @@ -294,6 +321,7 @@ implementers = {
> '0x50': implementer_ampere,
> '0x51': implementer_qualcomm,
> '0x70': implementer_phytium,
> +'0x75': implementer_ionic,
> '0xc0': implementer_ampere,
> }
> 
> @@ -517,6 +545,18 @@ soc_bluefield3 = {
>'numa': false
> }
> 
> +soc_ionic_capri = {
> +'description': 'AMD Pensando Capri',
> +'implementer': '0x75',
> +'part_number': '0xc1'
> +}
> +
> +soc_ionic_elba = {
> +'description': 'AMD Pensando Elba',
> +'implementer': '0x75',
> +'part_number': '0xc2'
> +}
> +
> '''
> Start of SoCs list
> generic: Generic un-optimized build for armv8 aarch64 execution mode.
> @@ -576,6 +616,8 @@ socs = {
> 'thunderx2': soc_thunderx2,
> 'thunderxt88': soc_thunderxt88,
> 'thunderxt83': soc_thunderxt83,
> +'capri': soc_ionic_

[PATCH] examples/dma: fix max-frame-size cannot be zero

2024-02-19 Thread Chengwen Feng
In the original implementation, the max_frame_size could be zero, but
commit ("examples/dma: replace getopt with argparse") treat zero as an
error. This commit fixes it.

Also, since unsigned doesn't < 0, adjust "<= 0" judgement to "== 0".

Fixes: 8d85afb19af7 ("examples/dma: replace getopt with argparse")

Reported-by: Jiang, YuX 
Signed-off-by: Chengwen Feng 
---
 examples/dma/dmafwd.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/examples/dma/dmafwd.c b/examples/dma/dmafwd.c
index f4a0bff06e..acceae6b7b 100644
--- a/examples/dma/dmafwd.c
+++ b/examples/dma/dmafwd.c
@@ -695,23 +695,23 @@ dma_parse_args(int argc, char **argv, unsigned int 
nb_ports)
return ret;
 
/* check argument's value which parsing by autosave. */
-   if (dma_batch_sz <= 0 || dma_batch_sz > MAX_PKT_BURST) {
+   if (dma_batch_sz == 0 || dma_batch_sz > MAX_PKT_BURST) {
printf("Invalid dma batch size, %d.\n", dma_batch_sz);
return -1;
}
 
-   if (max_frame_size <= 0 || max_frame_size > 
RTE_ETHER_MAX_JUMBO_FRAME_LEN) {
+   if (max_frame_size > RTE_ETHER_MAX_JUMBO_FRAME_LEN) {
printf("Invalid max frame size, %d.\n", max_frame_size);
return -1;
}
 
-   if (nb_queues <= 0 || nb_queues > MAX_RX_QUEUES_COUNT) {
+   if (nb_queues == 0 || nb_queues > MAX_RX_QUEUES_COUNT) {
printf("Invalid RX queues number %d. Max %u\n",
nb_queues, MAX_RX_QUEUES_COUNT);
return -1;
}
 
-   if (ring_size <= 0) {
+   if (ring_size == 0) {
printf("Invalid ring size, %d.\n", ring_size);
return -1;
}
@@ -721,7 +721,7 @@ dma_parse_args(int argc, char **argv, unsigned int nb_ports)
ring_size = MBUF_RING_SIZE;
}
 
-   if (stats_interval <= 0) {
+   if (stats_interval == 0) {
printf("Invalid stats interval, setting to 1\n");
stats_interval = 1; /* set to default */
}
-- 
2.17.1



[DPDK/ethdev Bug 1381] TAP device can not support 17 queues

2024-02-19 Thread bugzilla
https://bugs.dpdk.org/show_bug.cgi?id=1381

Bug ID: 1381
   Summary: TAP device can not support 17 queues
   Product: DPDK
   Version: 23.11
  Hardware: All
OS: All
Status: UNCONFIRMED
  Severity: normal
  Priority: Normal
 Component: ethdev
  Assignee: dev@dpdk.org
  Reporter: step...@networkplumber.org
  Target Milestone: ---

If you try:
# dpdk-testpmd --log-level=pmd.net.tap:debug -l 1-2 --vdev=net_tap0 -- -i
--rxq=8 --txq=8

It will fail because:
EAL: Detected CPU lcores: 8
EAL: Detected NUMA nodes: 1
EAL: Detected static linkage of DPDK
EAL: Multi-process socket /var/run/dpdk/rte/mp_socket
EAL: Selected IOVA mode 'VA'
rte_pmd_tap_probe(): Initializing pmd_tap for net_tap0
eth_dev_tap_create(): TAP device on numa 0
tun_alloc(): /dev/net/tun Features 7173
tun_alloc():   Multi-queue support for 16 queues
tun_alloc(): Device name is 'dtap0'
tun_alloc(): Using rt-signal 35
eth_dev_tap_create(): allocated dtap0
Interactive-mode selected
testpmd: create a new mbuf pool : n=155456, size=2176, socket=0
testpmd: preferred mempool ops selected: ring_mp_mc

Warning! port-topology=paired and odd forward ports number, the last port will
pair with itself.

Configuring Port 0 (socket 0)
tap_dev_configure(): net_tap0: dtap0: TX configured queues number: 8
tap_dev_configure(): net_tap0: dtap0: RX configured queues number: 8
tun_alloc(): /dev/net/tun Features 7173
tun_alloc():   Multi-queue support for 16 queues
tun_alloc(): Device name is 'dtap0'
tap_setup_queue(): dtap0: add tx queue for qid 0 fd 26
tap_tx_queue_setup():   TX TUNTAP device name dtap0, qid 0 on fd 26 csum off
tun_alloc(): /dev/net/tun Features 7173
tun_alloc():   Multi-queue support for 16 queues
tun_alloc(): Device name is 'dtap0'
tap_setup_queue(): dtap0: add tx queue for qid 1 fd 212
tap_tx_queue_setup():   TX TUNTAP device name dtap0, qid 1 on fd 212 csum off
tun_alloc(): /dev/net/tun Features 7173
tun_alloc():   Multi-queue support for 16 queues
tun_alloc(): Device name is 'dtap0'
tap_setup_queue(): dtap0: add tx queue for qid 2 fd 213
tap_tx_queue_setup():   TX TUNTAP device name dtap0, qid 2 on fd 213 csum off
tun_alloc(): /dev/net/tun Features 7173
tun_alloc():   Multi-queue support for 16 queues
tun_alloc(): Device name is 'dtap0'
tap_setup_queue(): dtap0: add tx queue for qid 3 fd 214
tap_tx_queue_setup():   TX TUNTAP device name dtap0, qid 3 on fd 214 csum off
tun_alloc(): /dev/net/tun Features 7173
tun_alloc():   Multi-queue support for 16 queues
tun_alloc(): Device name is 'dtap0'
tap_setup_queue(): dtap0: add tx queue for qid 4 fd 215
tap_tx_queue_setup():   TX TUNTAP device name dtap0, qid 4 on fd 215 csum off
tun_alloc(): /dev/net/tun Features 7173
tun_alloc():   Multi-queue support for 16 queues
tun_alloc(): Device name is 'dtap0'
tap_setup_queue(): dtap0: add tx queue for qid 5 fd 216
tap_tx_queue_setup():   TX TUNTAP device name dtap0, qid 5 on fd 216 csum off
tun_alloc(): /dev/net/tun Features 7173
tun_alloc():   Multi-queue support for 16 queues
tun_alloc(): Device name is 'dtap0'
tap_setup_queue(): dtap0: add tx queue for qid 6 fd 217
tap_tx_queue_setup():   TX TUNTAP device name dtap0, qid 6 on fd 217 csum off
tun_alloc(): /dev/net/tun Features 7173
tun_alloc():   Multi-queue support for 16 queues
tun_alloc(): Device name is 'dtap0'
tap_setup_queue(): dtap0: add tx queue for qid 7 fd 218
tap_tx_queue_setup():   TX TUNTAP device name dtap0, qid 7 on fd 218 csum off
tap_setup_queue(): dtap0: dup fd 26 for rx queue qid 0 (219)
tap_rx_queue_setup():   RX TUNTAP device name dtap0, qid 0 on fd 219
tap_setup_queue(): dtap0: dup fd 212 for rx queue qid 1 (220)
tap_rx_queue_setup():   RX TUNTAP device name dtap0, qid 1 on fd 220
tap_setup_queue(): dtap0: dup fd 213 for rx queue qid 2 (221)
tap_rx_queue_setup():   RX TUNTAP device name dtap0, qid 2 on fd 221
tap_setup_queue(): dtap0: dup fd 214 for rx queue qid 3 (222)
tap_rx_queue_setup():   RX TUNTAP device name dtap0, qid 3 on fd 222
tap_setup_queue(): dtap0: dup fd 215 for rx queue qid 4 (223)
tap_rx_queue_setup():   RX TUNTAP device name dtap0, qid 4 on fd 223
tap_setup_queue(): dtap0: dup fd 216 for rx queue qid 5 (224)
tap_rx_queue_setup():   RX TUNTAP device name dtap0, qid 5 on fd 224
tap_setup_queue(): dtap0: dup fd 217 for rx queue qid 6 (225)
tap_rx_queue_setup():   RX TUNTAP device name dtap0, qid 6 on fd 225
tap_setup_queue(): dtap0: dup fd 218 for rx queue qid 7 (226)
tap_rx_queue_setup():   RX TUNTAP device name dtap0, qid 7 on fd 226
EAL: Cannot send more than 8 FDs
tap_mp_req_on_rxtx(): Failed to send start req to secondary 7

This is a regression caused by:
commit c36ce7099c2187926cd62cff7ebd479823554929
Author: Kumara Parameshwaran 
Date:   Mon Jan 31 20:02:34 2022 +0530

net/tap: fix to populate FDs in secondary process

When a tap device is hotplugged to primary process which in turn
adds the device to all sec

Re: [PATCH] net/gve: Change ERR to DEBUG to prevent flooding of logs for Tx-Dqo.

2024-02-19 Thread Rushil Gupta
I agree.
This bug has manifested for a while before I fixed it partially in "[PATCH]
net/gve: fix dqo bug for chained descriptors"
However, for higher queue counts (> 13); we still see this behavior. I'll
add a statistic.

On Mon, Feb 19, 2024, 10:56 PM Stephen Hemminger 
wrote:

> On Mon, 19 Feb 2024 02:44:35 +
> Rushil Gupta  wrote:
>
> > This was causing failure for testpmd runs (for queues >=15)
> > presumably due to flooding of logs due to descriptor ring being
> > overwritten.
> >
> > Fixes: a01854 ("net/gve: fix dqo bug for chained descriptors")
> > Cc: sta...@dpdk.org
> >
> > Signed-off-by: Rushil Gupta 
> > Reviewed-by: Joshua Washington 
>
> Isn't this still an error. What about the descriptor overwritten is there
> an mbuf leak?
> Maybe a statistic would be better than a message?
>


Re: [PATCH] net/hns3: fix Rx packet truncation when KEEP CRC enabled

2024-02-19 Thread Jie Hai

Hi, Ferruh,

Thanks for your review.

On 2024/2/7 22:15, Ferruh Yigit wrote:

On 2/6/2024 1:10 AM, Jie Hai wrote:

From: Dengdui Huang 

When KEEP_CRC offload is enabled, some packets will be truncated and
the CRC is still be stripped in following cases:
1. For HIP08 hardware, the packet type is TCP and the length
is less than or equal to 60B.
2. For other hardwares, the packet type is IP and the length
is less than or equal to 60B.



If a device doesn't support the offload by some packets, it can be
option to disable offload for that device, instead of calculating it in
software and append it.


The KEEP CRC feature of hns3 is faulty only in the specific packet
type and small packet(<60B) case.
What's more, the small ethernet packet is not common.


Unless you have a specific usecase, or requirement to support the offload.


Yes, some users of hns3 are already using this feature.
So we cannot drop this offload


<...>


@@ -2492,10 +2544,16 @@ hns3_recv_pkts_simple(void *rx_queue,
goto pkt_err;
  
  		rxm->packet_type = hns3_rx_calc_ptype(rxq, l234_info, ol_info);

-
if (rxm->packet_type == RTE_PTYPE_L2_ETHER_TIMESYNC)
rxm->ol_flags |= RTE_MBUF_F_RX_IEEE1588_PTP;
  
+		if (unlikely(rxq->crc_len > 0)) {

+   if (hns3_need_recalculate_crc(rxq, rxm))
+   hns3_recalculate_crc(rxq, rxm);
+   rxm->pkt_len -= rxq->crc_len;
+   rxm->data_len -= rxq->crc_len;



Removing 'crc_len' from 'mbuf->pkt_len' & 'mbuf->data_len' is
practically same as stripping CRC.

We don't count CRC length in the statistics, but it should be accessible
in the payload by the user.

Our drivers are behaving exactly as you say.


.


[PATCH 0/4] cfgfile: enhance error detecting

2024-02-19 Thread Chengwen Feng
When I was trying to debug a problem introduced by config.ini in
test-dma-perf, I found the cfgfile library should enhance error
detecting, so got this patchset.

Chengwen Feng (4):
  cfgfile: remove dead code
  cfgfile: support verify name and value
  cfgfile: verify add section and entry result
  cfgfile: add unique name flag

 lib/cfgfile/rte_cfgfile.c | 70 +--
 lib/cfgfile/rte_cfgfile.h |  7 
 2 files changed, 59 insertions(+), 18 deletions(-)

-- 
2.17.1



[PATCH 3/4] cfgfile: verify add section and entry result

2024-02-19 Thread Chengwen Feng
The rte_cfgfile_add_section() and _add_entry()'s result were not
verified, so that potential errors may not be detected.

This commit adds the verification.

Cc: sta...@dpdk.org

Signed-off-by: Chengwen Feng 
---
 lib/cfgfile/rte_cfgfile.c | 19 ++-
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/lib/cfgfile/rte_cfgfile.c b/lib/cfgfile/rte_cfgfile.c
index 6f8f46a406..ad9314dc14 100644
--- a/lib/cfgfile/rte_cfgfile.c
+++ b/lib/cfgfile/rte_cfgfile.c
@@ -182,6 +182,7 @@ rte_cfgfile_load_with_params(const char *filename, int 
flags,
char buffer[CFG_NAME_LEN + CFG_VALUE_LEN + 4];
int lineno = 0;
struct rte_cfgfile *cfg;
+   int ret;
 
if (rte_cfgfile_check_params(params))
return NULL;
@@ -226,7 +227,9 @@ rte_cfgfile_load_with_params(const char *filename, int 
flags,
*end = '\0';
_strip(&buffer[1], end - &buffer[1]);
 
-   rte_cfgfile_add_section(cfg, &buffer[1]);
+   ret = rte_cfgfile_add_section(cfg, &buffer[1]);
+   if (ret != 0)
+   goto error1;
} else {
/* key and value line */
char *split[2] = {NULL};
@@ -261,8 +264,10 @@ rte_cfgfile_load_with_params(const char *filename, int 
flags,
if (cfg->num_sections == 0)
goto error1;
 
-   _add_entry(&cfg->sections[cfg->num_sections - 1],
-   split[0], split[1]);
+   ret = _add_entry(&cfg->sections[cfg->num_sections - 1],
+split[0], split[1]);
+   if (ret != 0)
+   goto error1;
}
}
fclose(f);
@@ -277,6 +282,7 @@ struct rte_cfgfile *
 rte_cfgfile_create(int flags)
 {
int i;
+   int ret;
struct rte_cfgfile *cfg;
 
/* future proof flags usage */
@@ -310,8 +316,11 @@ rte_cfgfile_create(int flags)
cfg->sections[i].allocated_entries = CFG_ALLOC_ENTRY_BATCH;
}
 
-   if (flags & CFG_FLAG_GLOBAL_SECTION)
-   rte_cfgfile_add_section(cfg, "GLOBAL");
+   if (flags & CFG_FLAG_GLOBAL_SECTION) {
+   ret = rte_cfgfile_add_section(cfg, "GLOBAL");
+   if (ret != 0)
+   goto error1;
+   }
 
return cfg;
 error1:
-- 
2.17.1



[PATCH 1/4] cfgfile: remove dead code

2024-02-19 Thread Chengwen Feng
This memchr() will never return NULL because rte_cfgfile_load() function
will skip lines without useful content.

Fixes: 74e0d3a17461 ("cfgfile: fix null pointer dereference in parsing")
Cc: sta...@dpdk.org

Signed-off-by: Chengwen Feng 
---
 lib/cfgfile/rte_cfgfile.c | 6 --
 1 file changed, 6 deletions(-)

diff --git a/lib/cfgfile/rte_cfgfile.c b/lib/cfgfile/rte_cfgfile.c
index 6a5e4fd942..d7aa38b56f 100644
--- a/lib/cfgfile/rte_cfgfile.c
+++ b/lib/cfgfile/rte_cfgfile.c
@@ -223,12 +223,6 @@ rte_cfgfile_load_with_params(const char *filename, int 
flags,
 
split[0] = buffer;
split[1] = memchr(buffer, '=', len);
-   if (split[1] == NULL) {
-   CFG_LOG(ERR,
-   "line %d - no '=' character found",
-   lineno);
-   goto error1;
-   }
*split[1] = '\0';
split[1]++;
 
-- 
2.17.1



[PATCH 2/4] cfgfile: support verify name and value

2024-02-19 Thread Chengwen Feng
This patch supports verify section's name, entry's name and entry's
value validity.

Cc: sta...@dpdk.org

Signed-off-by: Chengwen Feng 
---
 lib/cfgfile/rte_cfgfile.c | 17 +
 1 file changed, 17 insertions(+)

diff --git a/lib/cfgfile/rte_cfgfile.c b/lib/cfgfile/rte_cfgfile.c
index d7aa38b56f..6f8f46a406 100644
--- a/lib/cfgfile/rte_cfgfile.c
+++ b/lib/cfgfile/rte_cfgfile.c
@@ -105,6 +105,16 @@ static int
 _add_entry(struct rte_cfgfile_section *section, const char *entryname,
const char *entryvalue)
 {
+   int name_len, value_len;
+
+   name_len = strlen(entryname);
+   value_len = strlen(entryvalue);
+   if (name_len == 0 || name_len >= CFG_NAME_LEN || value_len >= 
CFG_VALUE_LEN) {
+   CFG_LOG(ERR, "invalid entry name %s or value %s in section %s",
+   entryname, entryvalue, section->name);
+   return -EINVAL;
+   }
+
/* resize entry structure if we don't have room for more entries */
if (section->num_entries == section->allocated_entries) {
struct rte_cfgfile_entry *n_entries = realloc(
@@ -322,6 +332,7 @@ rte_cfgfile_create(int flags)
 int
 rte_cfgfile_add_section(struct rte_cfgfile *cfg, const char *sectionname)
 {
+   int len;
int i;
 
if (cfg == NULL)
@@ -330,6 +341,12 @@ rte_cfgfile_add_section(struct rte_cfgfile *cfg, const 
char *sectionname)
if (sectionname == NULL)
return -EINVAL;
 
+   len = strlen(sectionname);
+   if (len == 0 || len >= CFG_NAME_LEN) {
+   CFG_LOG(ERR, "invalid section name %s", sectionname);
+   return -EINVAL;
+   }
+
/* resize overall struct if we don't have room for more sections */
if (cfg->num_sections == cfg->allocated_sections) {
 
-- 
2.17.1



[PATCH 4/4] cfgfile: add unique name flag

2024-02-19 Thread Chengwen Feng
The cfgfile supports duplicate section name and entry name when parsing
config file, which may confused and hard to debug when accidentally set
duplicate name.

So add unique name flag, it will treat as error if encounter duplicate
section name or entry name.

Cc: sta...@dpdk.org

Signed-off-by: Chengwen Feng 
---
 lib/cfgfile/rte_cfgfile.c | 32 +++-
 lib/cfgfile/rte_cfgfile.h |  7 +++
 2 files changed, 30 insertions(+), 9 deletions(-)

diff --git a/lib/cfgfile/rte_cfgfile.c b/lib/cfgfile/rte_cfgfile.c
index ad9314dc14..9e901b0977 100644
--- a/lib/cfgfile/rte_cfgfile.c
+++ b/lib/cfgfile/rte_cfgfile.c
@@ -102,8 +102,8 @@ _get_section(struct rte_cfgfile *cfg, const char 
*sectionname)
 }
 
 static int
-_add_entry(struct rte_cfgfile_section *section, const char *entryname,
-   const char *entryvalue)
+_add_entry(struct rte_cfgfile *cfg, struct rte_cfgfile_section *section,
+  const char *entryname, const char *entryvalue, bool check_dup)
 {
int name_len, value_len;
 
@@ -115,6 +115,14 @@ _add_entry(struct rte_cfgfile_section *section, const char 
*entryname,
return -EINVAL;
}
 
+   if (check_dup) {
+   if (rte_cfgfile_has_entry(cfg, section->name, entryname) != 0) {
+   CFG_LOG(ERR, "duplicate entry name %s in section %s",
+   entryname, section->name);
+   return -EEXIST;
+   }
+   }
+
/* resize entry structure if we don't have room for more entries */
if (section->num_entries == section->allocated_entries) {
struct rte_cfgfile_entry *n_entries = realloc(
@@ -264,8 +272,9 @@ rte_cfgfile_load_with_params(const char *filename, int 
flags,
if (cfg->num_sections == 0)
goto error1;
 
-   ret = _add_entry(&cfg->sections[cfg->num_sections - 1],
-split[0], split[1]);
+   ret = _add_entry(cfg, &cfg->sections[cfg->num_sections 
- 1],
+split[0], split[1],
+!!(flags & CFG_FLAG_UNIQUE_NAME));
if (ret != 0)
goto error1;
}
@@ -286,7 +295,8 @@ rte_cfgfile_create(int flags)
struct rte_cfgfile *cfg;
 
/* future proof flags usage */
-   if (flags & ~(CFG_FLAG_GLOBAL_SECTION | CFG_FLAG_EMPTY_VALUES))
+   if (flags & ~(CFG_FLAG_GLOBAL_SECTION | CFG_FLAG_EMPTY_VALUES |
+ CFG_FLAG_UNIQUE_NAME))
return NULL;
 
cfg = malloc(sizeof(*cfg));
@@ -356,6 +366,13 @@ rte_cfgfile_add_section(struct rte_cfgfile *cfg, const 
char *sectionname)
return -EINVAL;
}
 
+   if (cfg->flags & CFG_FLAG_UNIQUE_NAME) {
+   if (rte_cfgfile_has_section(cfg, sectionname) != 0) {
+   CFG_LOG(ERR, "duplicate section name %s", sectionname);
+   return -EEXIST;
+   }
+   }
+
/* resize overall struct if we don't have room for more sections */
if (cfg->num_sections == cfg->allocated_sections) {
 
@@ -396,16 +413,13 @@ int rte_cfgfile_add_entry(struct rte_cfgfile *cfg,
|| (entryvalue == NULL))
return -EINVAL;
 
-   if (rte_cfgfile_has_entry(cfg, sectionname, entryname) != 0)
-   return -EEXIST;
-
/* search for section pointer by sectionname */
struct rte_cfgfile_section *curr_section = _get_section(cfg,
sectionname);
if (curr_section == NULL)
return -EINVAL;
 
-   ret = _add_entry(curr_section, entryname, entryvalue);
+   ret = _add_entry(cfg, curr_section, entryname, entryvalue, true);
 
return ret;
 }
diff --git a/lib/cfgfile/rte_cfgfile.h b/lib/cfgfile/rte_cfgfile.h
index 232c65c77b..74057c1b73 100644
--- a/lib/cfgfile/rte_cfgfile.h
+++ b/lib/cfgfile/rte_cfgfile.h
@@ -56,6 +56,13 @@ enum {
 * be zero length (e.g., "key=").
 */
CFG_FLAG_EMPTY_VALUES = 2,
+
+   /**
+* Indicates that file should have unique section name AND unique
+* entry's name. If a duplicate name is detected, the operation will
+* return error.
+*/
+   CFG_FLAG_UNIQUE_NAME = 4,
 };
 /**@} */
 
-- 
2.17.1



RE: [PATCH 1/1] net/mana: add vlan tagging support

2024-02-19 Thread Wei Hu


> -Original Message-
> From: Ferruh Yigit 
> Sent: Saturday, February 10, 2024 12:06 AM
> To: Wei Hu ; andrew.rybche...@oktetlabs.ru; Thomas
> Monjalon ; Long Li 
> Cc: dev@dpdk.org
> Subject: Re: [PATCH 1/1] net/mana: add vlan tagging support
> 
> On 2/9/2024 8:52 AM, Wei Hu wrote:
> > For tx path, use LONG_PACKET_FORMAT if vlan tag is present. For rx,
> > extract vlan id from oob, put into mbuf and set the vlan flags in
> > mbuf.
> >
> > Also add myself to the maintainers list for vmbus, mana and netvsc.
> >
> > Signed-off-by: Wei Hu 
> > ---
> >  MAINTAINERS |  3 +++
> >  drivers/net/mana/mana.h |  2 ++
> >  drivers/net/mana/rx.c   |  6 ++
> >  drivers/net/mana/tx.c   | 30 +++---
> >  4 files changed, 38 insertions(+), 3 deletions(-)
> >
> > diff --git a/MAINTAINERS b/MAINTAINERS index 5fb3a73f84..9983d013a6
> > 100644
> > --- a/MAINTAINERS
> > +++ b/MAINTAINERS
> > @@ -608,6 +608,7 @@ F: app/test/test_vdev.c
> >
> >  VMBUS bus driver
> >  M: Long Li 
> > +M: Wei Hu 
> >  F: drivers/bus/vmbus/
> >
> >
> > @@ -867,6 +868,7 @@ F: doc/guides/nics/features/mlx5.ini
> >
> >  Microsoft mana
> >  M: Long Li 
> > +M: Wei Hu 
> >  F: drivers/net/mana/
> >  F: doc/guides/nics/mana.rst
> >  F: doc/guides/nics/features/mana.ini
> > @@ -878,6 +880,7 @@ F: doc/guides/nics/vdev_netvsc.rst
> >
> >  Microsoft Hyper-V netvsc
> >  M: Long Li 
> > +M: Wei Hu 
> >  F: drivers/net/netvsc/
> >  F: doc/guides/nics/netvsc.rst
> >  F: doc/guides/nics/features/netvsc.ini
> >
> 
> Hi Wei,
> 
> Can you please separate the maintainer file update to its own patch?

Sure. I will remove the maintainer file update and send it out later separately.

Thanks,
Wei


RE: [PATCH 1/1] net/mana: add vlan tagging support

2024-02-19 Thread Wei Hu



> -Original Message-
> From: Long Li 
> Sent: Saturday, February 10, 2024 2:48 AM
> To: Wei Hu ; ferruh.yi...@amd.com;
> andrew.rybche...@oktetlabs.ru; Thomas Monjalon
> ; Alan Elder 
> Cc: dev@dpdk.org
> Subject: RE: [PATCH 1/1] net/mana: add vlan tagging support
> 
> > +   if (oob->rx_vlan_tag_present) {
> > +   mbuf->ol_flags |=
> > +   RTE_MBUF_F_RX_VLAN |
> > RTE_MBUF_F_RX_VLAN_STRIPPED;
> > +   mbuf->vlan_tci = oob->rx_vlan_id;
> > +   }
> > +
> 
> Netvsc has the following code for dealing with vlan on RX mbufs (in 
> hn_rxtx.c):
> /* NDIS always strips tag, put it back if necessary */
> if (!hv->vlan_strip && rte_vlan_insert(&m)) {
> 
> It seems we should do the same?

Not sure if we want to do the same. Two reasons. 

1. Searching the netvsc source, I don't see a place that it set hv->vlan_strip 
to false. It means
!hv->vlan_string is always false, and rte_vlan_insert(&m) never run. 

2. Usually vlan_strip can be set to true or false if the hardware supports this 
feature. In the mana
case, the hardware strips off the vlan tag anyway. There is no way to tell the 
mana hardware to 
keep the tag. Adding the tag back by software not only slows things down, but 
it also complicates the 
code and test. Not sure if there is any real application needs it. 

I am open to add it.  But in my opinion, we don't need it. Let me know what you 
think.
> 
> > pkts[pkt_received++] = mbuf;
> > rxq->stats.packets++;
> > rxq->stats.bytes += mbuf->data_len; diff --git
> > a/drivers/net/mana/tx.c b/drivers/net/mana/tx.c index
> > 58c4a1d976..f075fcb0f5 100644
> > --- a/drivers/net/mana/tx.c
> > +++ b/drivers/net/mana/tx.c
> > @@ -180,6 +180,15 @@ get_vsq_frame_num(uint32_t vsq)
> > return v.vsq_frame;
> >  }
> >
> > +#define VLAN_PRIO_MASK 0xe000 /* Priority Code Point */
> > +#define VLAN_PRIO_SHIFT13
> > +#define VLAN_CFI_MASK  0x1000 /* Canonical Format Indicator
> > / Drop Eligible Indicator */
> > +#define VLAN_VID_MASK  0x0fff /* VLAN Identifier */
> > +
> > +#define mana_mbuf_vlan_tag_get_id(m)   ((m)->vlan_tci &
> > VLAN_VID_MASK)
> > +#define mana_mbuf_vlan_tag_get_cfi(m)  (!!((m)->vlan_tci &
> > VLAN_CFI_MASK))
> > +#define mana_mbuf_vlan_tag_get_prio(m) (((m)->vlan_tci &
> > VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT)
> > +
> 
> Those definitions look like those in @Alan Elder's patch for netvsc. Can we
> consolidate some of those definitions into a common place?
> 
> Maybe in "lib/net/rte_ether.h"?
> 

Ok. Will add it to rte_ether.h.

Thanks,
Wei


> Thanks,
> 
> Long


RE: [PATCH] net/nfp: add support of UDP fragmentation offload

2024-02-19 Thread Chaoyong He
> On 2/17/2024 1:54 AM, Chaoyong He wrote:
> > Add the support of UDP fragmentation offload feature.
> >
> > Signed-off-by: Chaoyong He 
> > Reviewed-by: Long Wu 
> > Reviewed-by: Peng Zhang 
> 
> <...>
> 
> > diff --git a/drivers/net/nfp/nfp_net_common.c
> > b/drivers/net/nfp/nfp_net_common.c
> > index 72c9a41b00..99c319eb2d 100644
> > --- a/drivers/net/nfp/nfp_net_common.c
> > +++ b/drivers/net/nfp/nfp_net_common.c
> > @@ -312,7 +312,7 @@ nfp_net_log_device_information(const struct
> nfp_net_hw *hw)
> > hw->ver.major, hw->ver.minor, hw->max_mtu);
> >
> > PMD_INIT_LOG(INFO, "CAP: %#x", cap);
> > -   PMD_INIT_LOG(INFO,
> "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
> > +   PMD_INIT_LOG(INFO,
> >
> +"%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s
> %s",
> >
> 
> This seems getting out of hand, I assume this is done like this (instead of 
> multiple
> print lines) to prevent line break, if so what do you think add a new macro 
> that
> doesn't append \n automatically and refactor this code (in a separate patch) ?

Yeah, that's a good point, thanks!
I will add it into my to-do list, and send a patch at the suitable time.