[PATCH] power: use hugepage memory for queue list entry structure

2024-12-19 Thread Huisong Li
The queue_list_entry structure data is used in rx_callback of io path
when enable PMD Power Management. However its memory is currently from
normal heap memory. For better performance, use hugepage memory to
replace it.

Signed-off-by: Huisong Li 
---
 lib/power/rte_power_pmd_mgmt.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/lib/power/rte_power_pmd_mgmt.c b/lib/power/rte_power_pmd_mgmt.c
index a2fff3b765..c7bf57a910 100644
--- a/lib/power/rte_power_pmd_mgmt.c
+++ b/lib/power/rte_power_pmd_mgmt.c
@@ -97,7 +97,7 @@ queue_list_find(const struct pmd_core_cfg *cfg, const union 
queue *q)
 }
 
 static int
-queue_list_add(struct pmd_core_cfg *cfg, const union queue *q)
+queue_list_add(struct pmd_core_cfg *cfg, const union queue *q, unsigned int 
lcore_id)
 {
struct queue_list_entry *qle;
 
@@ -105,10 +105,10 @@ queue_list_add(struct pmd_core_cfg *cfg, const union 
queue *q)
if (queue_list_find(cfg, q) != NULL)
return -EEXIST;
 
-   qle = malloc(sizeof(*qle));
+   qle = rte_zmalloc_socket(NULL, sizeof(*qle), RTE_CACHE_LINE_SIZE,
+rte_lcore_to_socket_id(lcore_id));
if (qle == NULL)
return -ENOMEM;
-   memset(qle, 0, sizeof(*qle));
 
queue_copy(&qle->queue, q);
TAILQ_INSERT_TAIL(&cfg->head, qle, next);
@@ -570,7 +570,7 @@ rte_power_ethdev_pmgmt_queue_enable(unsigned int lcore_id, 
uint16_t port_id,
goto end;
}
/* add this queue to the list */
-   ret = queue_list_add(lcore_cfg, &qdata);
+   ret = queue_list_add(lcore_cfg, &qdata, lcore_id);
if (ret < 0) {
POWER_LOG(DEBUG, "Failed to add queue to list: %s",
strerror(-ret));
@@ -664,7 +664,7 @@ rte_power_ethdev_pmgmt_queue_disable(unsigned int lcore_id,
 * callbacks can be freed. we're intentionally casting away const-ness.
 */
rte_free((void *)(uintptr_t)queue_cfg->cb);
-   free(queue_cfg);
+   rte_free(queue_cfg);
 
return 0;
 }
-- 
2.22.0



Re: [PATCH 1/1] vhost: fix a double fetch when dequeue offloading

2024-12-19 Thread David Marchand
On Thu, Dec 19, 2024 at 7:38 AM Yunjian Wang  wrote:
>
> The hdr->csum_start does two successive reads from user space to read a
> variable length data structure. The result overflow if the data structure
> changes between the two reads.
>
> To fix this, we can prevent double fetch issue by copying virtio_hdr to
> the temporary variable.
>
> Fixes: 4dc4e33ffa10 ("net/virtio: fix Rx checksum calculation")
> Cc: sta...@dpdk.org
>
> Signed-off-by: Yunjian Wang 
> ---
>  lib/vhost/virtio_net.c | 13 -
>  1 file changed, 8 insertions(+), 5 deletions(-)
>
> diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c
> index 69901ab3b5..5c40ae7069 100644
> --- a/lib/vhost/virtio_net.c
> +++ b/lib/vhost/virtio_net.c
> @@ -2914,10 +2914,12 @@ desc_to_mbuf(struct virtio_net *dev, struct 
> vhost_virtqueue *vq,
>  * in a contiguous virtual area.
>  */
> copy_vnet_hdr_from_desc(&tmp_hdr, buf_vec);
> -   hdr = &tmp_hdr;
> } else {
> -   hdr = (struct virtio_net_hdr 
> *)((uintptr_t)buf_vec[0].buf_addr);
> +   rte_memcpy((void *)(uintptr_t)&tmp_hdr,
> +   (void *)(uintptr_t)buf_vec[0].buf_addr,
> +   sizeof(struct virtio_net_hdr));
> }
> +   hdr = &tmp_hdr;
> }

This will need some benchmark, as I remember putting rte_memcpy in
inlined helpers had some performance impact.

Instead, I would call copy_vnet_hdr_from_desc unconditionnally, and
store in a struct virtio_net_hdr hdr variable (+ a has_vnet_hdr
boolean to indicate validity).
Something like:
if (virtio_net_with_host_offload(dev)) {
-   if (unlikely(buf_vec[0].buf_len < sizeof(struct
virtio_net_hdr))) {
-   /*
-* No luck, the virtio-net header doesn't fit
-* in a contiguous virtual area.
-*/
-   copy_vnet_hdr_from_desc(&tmp_hdr, buf_vec);
-   hdr = &tmp_hdr;
-   } else {
-   hdr = (struct virtio_net_hdr
*)((uintptr_t)buf_vec[0].buf_addr);
-   }
+   copy_vnet_hdr_from_desc(&hdr, buf_vec);
+   has_vnet_hdr = true;
}

(besides, in copy_vnet_hdr_from_desc, the while (cond) {} loop could
be changed to do a do {} while (cond), and that approach requires
performance numbers too)


>
> for (vec_idx = 0; vec_idx < nr_vec; vec_idx++) {
> @@ -3363,7 +3365,7 @@ virtio_dev_tx_batch_packed(struct virtio_net *dev,
>  {
> uint16_t avail_idx = vq->last_avail_idx;
> uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf);
> -   struct virtio_net_hdr *hdr;
> +   struct virtio_net_hdr hdr;
> uintptr_t desc_addrs[PACKED_BATCH_SIZE];
> uint16_t ids[PACKED_BATCH_SIZE];
> uint16_t i;
> @@ -3382,8 +3384,9 @@ virtio_dev_tx_batch_packed(struct virtio_net *dev,
>
> if (virtio_net_with_host_offload(dev)) {
> vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
> -   hdr = (struct virtio_net_hdr *)(desc_addrs[i]);
> -   vhost_dequeue_offload(dev, hdr, pkts[i], 
> legacy_ol_flags);
> +   rte_memcpy((void *)(uintptr_t)&hdr,
> +   (void *)(uintptr_t)desc_addrs[i], 
> sizeof(struct virtio_net_hdr));
> +   vhost_dequeue_offload(dev, &hdr, pkts[i], 
> legacy_ol_flags);
> }
> }

Here too, there may be an impact with adding rte_memcpy.
Just do a copy like:

if (virtio_net_with_host_offload(dev)) {
+   struct virtio_net_hdr hdr;
+
vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
-   hdr = (struct virtio_net_hdr *)(desc_addrs[i]);
-   vhost_dequeue_offload(dev, hdr, pkts[i],
legacy_ol_flags);
+   hdr = *(struct virtio_net_hdr *)(desc_addrs[i]);
+   vhost_dequeue_offload(dev, &hdr, pkts[i],
legacy_ol_flags);
}


-- 
David Marchand



Re: rte_event_eth_tx_adapter_enqueue() short enqueue

2024-12-19 Thread Bruce Richardson
On Thu, Dec 19, 2024 at 04:59:33PM +0100, Morten Brørup wrote:
> > From: Bruce Richardson [mailto:bruce.richard...@intel.com]
> > Sent: Wednesday, 27 November 2024 12.07
> > 
> > On Wed, Nov 27, 2024 at 11:53:50AM +0100, Mattias Rönnblom wrote:
> > > On 2024-11-27 11:38, Bruce Richardson wrote:
> > > > On Wed, Nov 27, 2024 at 11:03:31AM +0100, Mattias Rönnblom wrote:
> > > > > Hi.
> > > > >
> > > > > Consider the following situation:
> > > > >
> > > > > An application does
> > > > >
> > > > > rte_event_eth_tx_adapter_enqueue()
> > > > >
> > > > > and due to back-pressure or some other reason not all
> > events/packets could
> > > > > be enqueued, and a count lower than the nb_events input parameter
> > is
> > > > > returned.
> > > > >
> > > > > The API says that "/../ the remaining events at the end of ev[]
> > are not
> > > > > consumed and the caller has to take care of them /../".
> > > > >
> > > > > May an event device rearrange the ev array so that any enqueue
> > failures are
> > > > > put last in the ev array?
> > > > >
> > > > > In other words: does the "at the end of ev[]" mean "at the end of
> > ev[] as
> > > > > the call has completed", or is the event array supposed to be
> > untouched, and
> > > > > thus the same events are at the end both before and after the
> > call.
> > > > >
> > > > > The ev array pointer is not const, so from that perspective it
> > may be
> > > > > modified.
> > > > >
> > > > > This situation may occur for example the bonding driver is used
> > under the
> > > > > hood. The bonding driver does this kind of rearrangements on the
> > ethdev
> > > > > level.
> > > > >
> > > >
> > > > Interesting question. I tend to think that we should not proclude
> > this
> > > > reordering, as it should allow e.g  an eventdev which is short on
> > space to
> > > > selectively enqueue only the high priority events.
> > > >
> > >
> > > Allowing reordering may be a little surprising to the user. At least
> > it
> > > would be for me.
> > >
> > > Other eventdev APIs enqueue do not allow this kind of reordering
> > (with
> > > const-marked arrays).
> > >
> > 
> > That is a good point. I forgot that the events are directly passed to
> > the
> > enqueue functions rather than being passed as pointers, which could
> > then be
> > reordered without modifying the underlying events.
> > 
> > > That said, I lean toward agreeing with you, since it will solve the
> > ethdev
> > > tx_burst() mapping issue mentioned.
> > >
> > 
> > If enabling this solves a real problem, then let's allow it, despite
> > the
> > inconsistency in the APIs. Again, though, we need to to call this out
> > in
> > the docs very prominently to avoid surprises.
> > 
> > Alternatively, do we want to add a separate API that explicitly allows
> > reordering, and update the existing API to have a const value
> > parameter?
> > For drivers that don't implement the reordering they can just not
> > provide
> > the reordering function and the non-reorder version can be used
> > transparently instead.
> 
> IMHO, allowing reordering with the current API would break the developer's 
> reasonable expectations of the API.
> Breaking reasonable expectations could be considered an API break.
> 
> Some application may have a parallel array with metadata about the events.
> If the events are reordered (and the last N of them deferred to the 
> application to process), the application can no longer index into the 
> metadata array (to process the metadata of the deferred events).
> 
> For reference, consider the SORING proposed by Konstantin.
> 
> Regarding "const":
> It's my impression that "const" is missing in lots of APIs where the 
> parameter must not be modified.

+1 to this.
Fortunately, if we find ones where it is missing, it's not an ABI or API
break to add in the missing const clarification. Therefore, let's add these
consts in whenever we spot them missing!

> So, developers cannot rely on "const" as an indication if a passed parameter 
> might be modified or not.
> Obviously, "const" cannot be modified. But no "const" does not imply that the 
> parameter is contractually modifiable by the function.
> 

Or more specifically, the develop cannot derive any information from the
absence of const in our APIs - the parameters might be modified, but then
again they may not.

Sometimes this causes problems, where application code wants to have
const-correctness but is blocked by a DPDK function where it logically
should not modify parameters e.g. a configure function, but is not
explicitly committing via const not to.

/Bruce


Re: [PATCH v2] cryptodev: fix C++ include

2024-12-19 Thread Mattias Rönnblom

On 2024-12-19 14:30, Thomas Monjalon wrote:

Some cryptodev functions were not included in an extern "C" block.

There are 2 blocks, the second one being fast path inline functions,
preceded with an include of the required rte_cryptodev_core.h file.

Fixes: 719834a6849e ("use C linkage where appropriate in headers")
Cc: sta...@dpdk.org

Reported-by: Zhigang Hu 
Signed-off-by: Thomas Monjalon 
---
v2: keep rte_cryptodev_core.h include at the same place
---
  .mailmap  |  1 +
  lib/cryptodev/rte_cryptodev.h | 12 ++--
  2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/.mailmap b/.mailmap
index 2bf38f9e8c..1e4bb06d6e 100644
--- a/.mailmap
+++ b/.mailmap
@@ -1794,6 +1794,7 @@ Zhenghua Zhou 
  Zhenning Xiao 
  Zhe Tao 
  Zhichao Zeng 
+Zhigang Hu 
  Zhigang Lu 
  Zhiguang He 
  Zhihong Peng 
diff --git a/lib/cryptodev/rte_cryptodev.h b/lib/cryptodev/rte_cryptodev.h
index c64d2f83a0..071ff3dbdf 100644
--- a/lib/cryptodev/rte_cryptodev.h
+++ b/lib/cryptodev/rte_cryptodev.h
@@ -22,6 +22,10 @@
  
  #include "rte_cryptodev_trace_fp.h"
  
+#ifdef __cplusplus

+extern "C" {
+#endif
+
  /**
   * @internal Logtype used for cryptodev related messages.
   */
@@ -1928,11 +1932,16 @@ int rte_cryptodev_remove_deq_callback(uint8_t dev_id,
  uint16_t qp_id,
  struct rte_cryptodev_cb *cb);
  
-#include 

+#ifdef __cplusplus
+}
+#endif
+
+#include "rte_cryptodev_core.h"
  
  #ifdef __cplusplus

  extern "C" {
  #endif
+
  /**
   *
   * Dequeue a burst of processed crypto operations from a queue on the crypto
@@ -2125,7 +2134,6 @@ rte_cryptodev_qp_depth_used(uint8_t dev_id, uint16_t 
qp_id)
return rc;
  }
  
-

  #ifdef __cplusplus
  }
  #endif


Reviewed-by: Mattias Rönnblom 



Re: [PATCH v2] cryptodev: fix C++ include

2024-12-19 Thread Mattias Rönnblom

On 2024-12-19 16:37, David Marchand wrote:

On Thu, Dec 19, 2024 at 2:31 PM Thomas Monjalon  wrote:


Some cryptodev functions were not included in an extern "C" block.

There are 2 blocks, the second one being fast path inline functions,
preceded with an include of the required rte_cryptodev_core.h file.

Fixes: 719834a6849e ("use C linkage where appropriate in headers")
Cc: sta...@dpdk.org

Reported-by: Zhigang Hu 
Signed-off-by: Thomas Monjalon 


I wonder if there is also an issue with
lib/cryptodev/rte_crypto_asym.h, for symbols
rte_crypto_asym_ke_strings and rte_crypto_asym_op_strings.


+1


But at least this patch looks fine to me.

Reviewed-by: David Marchand 






[PATCH] cryptodev: allow use of global variables from C++

2024-12-19 Thread Mattias Rönnblom
Avoid C++ name mangling of the two global variables being exported
from .

Suggested-by: David Marchand 
Signed-off-by: Mattias Rönnblom 
---
 lib/cryptodev/rte_crypto_asym.h | 8 
 1 file changed, 8 insertions(+)

diff --git a/lib/cryptodev/rte_crypto_asym.h b/lib/cryptodev/rte_crypto_asym.h
index aeb46e688e..9787b710e7 100644
--- a/lib/cryptodev/rte_crypto_asym.h
+++ b/lib/cryptodev/rte_crypto_asym.h
@@ -25,6 +25,10 @@
 
 struct rte_cryptodev_asym_session;
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** asym key exchange operation type name strings */
 extern const char *
 rte_crypto_asym_ke_strings[];
@@ -33,6 +37,10 @@ rte_crypto_asym_ke_strings[];
 extern const char *
 rte_crypto_asym_op_strings[];
 
+#ifdef __cplusplus
+}
+#endif
+
 #define RTE_CRYPTO_ASYM_FLAG_PUB_KEY_NO_PADDINGRTE_BIT32(0)
 /**<
  * Flag to denote public key will be returned without leading zero bytes
-- 
2.43.0



[RFC v2 1/1] eventdev: add atomic queue to test-eventdev app

2024-12-19 Thread Luka Jankovic
From 2e55ecd0e522f50cbb3635f53b025e165db7cf3e Mon Sep 17 00:00:00 2001
In-Reply-To: <228d44a6f2f1f6a4fb5519d9a91c99973f8d7352.ca...@ericsson.com>
References: <228d44a6f2f1f6a4fb5519d9a91c99973f8d7352.ca...@ericsson.com>
From: Luka Jankovic 
Date: Thu, 19 Dec 2024 13:31:26 +
Subject: [RFC v2 1/1] eventdev: add atomic queue to test-eventdev app
To: luka.janko...@ericsson.com
Cc: dev@dpdk.org,
mattias.ronnb...@ericsson.com

Add an atomic queue test based on the order queue test but use exclusively 
atomic queues.
This makes it compatible with event devices such as the distributed software 
eventdev.

The test detects whether port maintenance is required.

To verify atomicity, a spinlock is set up for each combination of queue and 
flow.
It is taken whenever an event is dequeued for processing and released when 
processing is finished.
The test will fail if a port attempts to take a lock which is already taken.

Signed-off-by: Luka Jankovic 
---
v2:
 * Changed to only check queue, flow combination, not port, queue, flow.
 * Lock is only held when a packet is processed.
 * Utilize event u64 instead of mbuf.
 * General cleanup.
---
 app/test-eventdev/evt_common.h|  10 +
 app/test-eventdev/meson.build |   1 +
 app/test-eventdev/test_atomic_queue.c | 372 ++
 3 files changed, 383 insertions(+)
 create mode 100644 app/test-eventdev/test_atomic_queue.c

diff --git a/app/test-eventdev/evt_common.h b/app/test-eventdev/evt_common.h
index 901b8ba55d..adb024c011 100644
--- a/app/test-eventdev/evt_common.h
+++ b/app/test-eventdev/evt_common.h
@@ -138,6 +138,16 @@ evt_has_flow_id(uint8_t dev_id)
true : false;
 }
 
+static inline bool
+evt_is_maintenance_free(uint8_t dev_id)
+{
+   struct rte_event_dev_info dev_info;
+
+   rte_event_dev_info_get(dev_id, &dev_info);
+   return (dev_info.event_dev_cap & RTE_EVENT_DEV_CAP_MAINTENANCE_FREE) ?
+   true : false;
+}
+
 static inline int
 evt_service_setup(uint32_t service_id)
 {
diff --git a/app/test-eventdev/meson.build b/app/test-eventdev/meson.build
index ab8769c755..db5add39eb 100644
--- a/app/test-eventdev/meson.build
+++ b/app/test-eventdev/meson.build
@@ -15,6 +15,7 @@ sources = files(
 'test_order_atq.c',
 'test_order_common.c',
 'test_order_queue.c',
+'test_atomic_queue.c',
 'test_perf_atq.c',
 'test_perf_common.c',
 'test_perf_queue.c',
diff --git a/app/test-eventdev/test_atomic_queue.c 
b/app/test-eventdev/test_atomic_queue.c
new file mode 100644
index 00..51e988c527
--- /dev/null
+++ b/app/test-eventdev/test_atomic_queue.c
@@ -0,0 +1,372 @@
+#include 
+#include 
+#include 
+
+#include "test_order_common.h"
+
+#define NB_QUEUES 2
+
+static rte_spinlock_t *atomic_locks;
+
+static inline uint64_t
+event_data_create(flow_id_t flow, uint32_t seq)
+{
+   return ((uint64_t)flow << 32) | seq;
+}
+
+static inline uint32_t
+event_data_get_seq(struct rte_event *const ev)
+{
+   return ev->u64 & 0x;
+}
+
+static inline uint32_t
+event_data_get_flow(struct rte_event *const ev)
+{
+   return ev->u64 >> 32;
+}
+
+static inline uint32_t
+get_lock_idx(int queue, flow_id_t flow, uint32_t nb_flows)
+{
+   return (queue * nb_flows) + flow;
+}
+
+static inline bool
+test_done(struct test_order *const t)
+{
+   return t->err || t->result == EVT_TEST_SUCCESS;
+}
+
+static inline int
+atomic_producer(void *arg)
+{
+   struct prod_data *p = arg;
+   struct test_order *t = p->t;
+   struct evt_options *opt = t->opt;
+   const uint8_t dev_id = p->dev_id;
+   const uint8_t port = p->port_id;
+   const uint64_t nb_pkts = t->nb_pkts;
+   uint32_t *producer_flow_seq = t->producer_flow_seq;
+   const uint32_t nb_flows = t->nb_flows;
+   uint64_t count = 0;
+   struct rte_event ev;
+
+   if (opt->verbose_level > 1)
+   printf("%s(): lcore %d dev_id %d port=%d queue=%d\n", __func__, 
rte_lcore_id(),
+   dev_id, port, p->queue_id);
+
+   ev = (struct rte_event){
+   .event = 0,
+   .op = RTE_EVENT_OP_NEW,
+   .queue_id = p->queue_id,
+   .sched_type = RTE_SCHED_TYPE_ORDERED,
+   .priority = RTE_EVENT_DEV_PRIORITY_NORMAL,
+   .event_type = RTE_EVENT_TYPE_CPU,
+   .sub_event_type = 0 /* stage 0 */
+   };
+
+   while (count < nb_pkts && t->err == false) {
+   const flow_id_t flow = rte_rand_max(nb_flows);
+
+   /* Maintain seq number per flow */
+   ev.u64 = event_data_create(flow, producer_flow_seq[flow]++);
+   ev.flow_id = flow;
+
+   while (rte_event_enqueue_burst(dev_id, port, &ev, 1) != 1) {
+   if (t->err)
+   break;
+   rte_pause();
+   }

Re: [PATCH v2] cryptodev: fix C++ include

2024-12-19 Thread David Marchand
On Thu, Dec 19, 2024 at 2:31 PM Thomas Monjalon  wrote:
>
> Some cryptodev functions were not included in an extern "C" block.
>
> There are 2 blocks, the second one being fast path inline functions,
> preceded with an include of the required rte_cryptodev_core.h file.
>
> Fixes: 719834a6849e ("use C linkage where appropriate in headers")
> Cc: sta...@dpdk.org
>
> Reported-by: Zhigang Hu 
> Signed-off-by: Thomas Monjalon 

I wonder if there is also an issue with
lib/cryptodev/rte_crypto_asym.h, for symbols
rte_crypto_asym_ke_strings and rte_crypto_asym_op_strings.
But at least this patch looks fine to me.

Reviewed-by: David Marchand 


-- 
David Marchand



RE: [RFC 0/8] ioring: network driver

2024-12-19 Thread Morten Brørup
> From: Konstantin Ananyev [mailto:konstantin.anan...@huawei.com]
> 
> > > > This is first draft of new simplified TAP device that uses
> > > > the Linux kernel ioring API to provide a read/write ring
> > > > with kernel.
> > > >
> > > > This is split from tap device because there are so many
> > > > unnecessary things in existing tap, and supporting ioring is
> > > > better without ifdefs etc. The default name of the tap
> > > > device is different that other uses in DPDK but the driver
> > > > tries to keep the same relevant devargs as before.
> > > >
> > > > This driver will only provide features that match what kernel
> > > > does, so no flow support etc. The next version will add checksum
> > > > and multi-segment packets. Some of the doc files may need update
> > > > as well.
> > >
> > > Makes sense to me, though didn't properly look inside.
> > > One thing - probably add  a 'tap' into the name,
> > > 'tap_ioiring' or so, otherwise 'ioring' is a bit too generic
> > > and might be confusing.

Konstantin is referring to the name of the driver and the source code file 
names, "net/ioring" -> "net/tap_ioring".

> >
> > There are some userspaces that look for "e*" in name for some setups.

Stephen is referring to the device name of an instantiated interface, e.g. 
"eth0".

And yes, assuming devices named "e*" are Ethernet devices is a common hack in 
Linux applications. I've done it myself. :-)

> 
> Didn't get you here, pls try to re-phrase.
> 
> > But names are totally arbitrary




[PATCH v2] net/mlx5/hws: fix fragmented ptype match

2024-12-19 Thread Alexander Kozyrev
Fragmented PTYPE matching requires setting the mask to the exact
RTE_PTYPE_L4_FRAG value to avoid conflicts with other L4 types.
Adding L2 or L3 types to the same mask should be allowed,
but there is a check for the exact value for setting the definer.
This prevents the fragmented packets from matching in case of L2/L3
mask is provided as well. Mask out L2/L3 types when setting L4_FRAG.

Fixes: 761439a20f net/mlx5/hws: support fragmented packet type matching
Cc: sta...@dpdk.org

Signed-off-by: Alexander Kozyrev 
---
 drivers/net/mlx5/hws/mlx5dr_definer.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/mlx5/hws/mlx5dr_definer.c 
b/drivers/net/mlx5/hws/mlx5dr_definer.c
index e6d3dbfa46..837e0c47bd 100644
--- a/drivers/net/mlx5/hws/mlx5dr_definer.c
+++ b/drivers/net/mlx5/hws/mlx5dr_definer.c
@@ -2205,7 +2205,7 @@ mlx5dr_definer_conv_item_ptype(struct 
mlx5dr_definer_conv_data *cd,
 * Cannot be combined with Layer 4 Types (TCP/UDP).
 * The exact value must be specified in the mask.
 */
-   if (m->packet_type == RTE_PTYPE_L4_FRAG) {
+   if ((m->packet_type & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_FRAG) {
fc = &cd->fc[DR_CALC_FNAME(PTYPE_FRAG, false)];
fc->item_idx = item_idx;
fc->tag_set = &mlx5dr_definer_ptype_frag_set;
@@ -2227,7 +2227,7 @@ mlx5dr_definer_conv_item_ptype(struct 
mlx5dr_definer_conv_data *cd,
}
 
if (m->packet_type & RTE_PTYPE_INNER_L4_MASK) {
-   if (m->packet_type == RTE_PTYPE_INNER_L4_FRAG) {
+   if ((m->packet_type & RTE_PTYPE_INNER_L4_MASK) == 
RTE_PTYPE_INNER_L4_FRAG) {
fc = &cd->fc[DR_CALC_FNAME(PTYPE_FRAG, true)];
fc->item_idx = item_idx;
fc->tag_set = &mlx5dr_definer_ptype_frag_set;
-- 
2.43.5



Re: [PATCH 1/1] vhost: fix a double fetch when dequeue offloading

2024-12-19 Thread Stephen Hemminger
On Thu, 19 Dec 2024 14:38:28 +0800
Yunjian Wang  wrote:

> - hdr = (struct virtio_net_hdr 
> *)((uintptr_t)buf_vec[0].buf_addr);
> + rte_memcpy((void *)(uintptr_t)&tmp_hdr,
> + (void *)(uintptr_t)buf_vec[0].buf_addr,
> + sizeof(struct virtio_net_hdr));
>   }

Do not introduce more rte_memcpy of a fixed size.
You don't need that many casts!
Why can you not use a structure assignment here.


[PATCH] cryptodev: fix C++ include

2024-12-19 Thread Thomas Monjalon
Some cryptodev functions were not included in the extern "C" block,
so it is moved to start before.

An include is also moved to avoid being part of this block.

Fixes: 719834a6849e ("use C linkage where appropriate in headers")
Cc: sta...@dpdk.org

Reported-by: Zhigang Hu 
Signed-off-by: Thomas Monjalon 
---
 .mailmap  |  1 +
 lib/cryptodev/rte_cryptodev.h | 10 +-
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/.mailmap b/.mailmap
index 2bf38f9e8c..1e4bb06d6e 100644
--- a/.mailmap
+++ b/.mailmap
@@ -1794,6 +1794,7 @@ Zhenghua Zhou 
 Zhenning Xiao 
 Zhe Tao 
 Zhichao Zeng 
+Zhigang Hu 
 Zhigang Lu 
 Zhiguang He 
 Zhihong Peng 
diff --git a/lib/cryptodev/rte_cryptodev.h b/lib/cryptodev/rte_cryptodev.h
index c64d2f83a0..31a7edb189 100644
--- a/lib/cryptodev/rte_cryptodev.h
+++ b/lib/cryptodev/rte_cryptodev.h
@@ -21,6 +21,11 @@
 #include 
 
 #include "rte_cryptodev_trace_fp.h"
+#include "rte_cryptodev_core.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
 
 /**
  * @internal Logtype used for cryptodev related messages.
@@ -1928,11 +1933,6 @@ int rte_cryptodev_remove_deq_callback(uint8_t dev_id,
  uint16_t qp_id,
  struct rte_cryptodev_cb *cb);
 
-#include 
-
-#ifdef __cplusplus
-extern "C" {
-#endif
 /**
  *
  * Dequeue a burst of processed crypto operations from a queue on the crypto
-- 
2.47.1



RE: rte_event_eth_tx_adapter_enqueue() short enqueue

2024-12-19 Thread Morten Brørup
> From: Bruce Richardson [mailto:bruce.richard...@intel.com]
> Sent: Wednesday, 27 November 2024 12.07
> 
> On Wed, Nov 27, 2024 at 11:53:50AM +0100, Mattias Rönnblom wrote:
> > On 2024-11-27 11:38, Bruce Richardson wrote:
> > > On Wed, Nov 27, 2024 at 11:03:31AM +0100, Mattias Rönnblom wrote:
> > > > Hi.
> > > >
> > > > Consider the following situation:
> > > >
> > > > An application does
> > > >
> > > > rte_event_eth_tx_adapter_enqueue()
> > > >
> > > > and due to back-pressure or some other reason not all
> events/packets could
> > > > be enqueued, and a count lower than the nb_events input parameter
> is
> > > > returned.
> > > >
> > > > The API says that "/../ the remaining events at the end of ev[]
> are not
> > > > consumed and the caller has to take care of them /../".
> > > >
> > > > May an event device rearrange the ev array so that any enqueue
> failures are
> > > > put last in the ev array?
> > > >
> > > > In other words: does the "at the end of ev[]" mean "at the end of
> ev[] as
> > > > the call has completed", or is the event array supposed to be
> untouched, and
> > > > thus the same events are at the end both before and after the
> call.
> > > >
> > > > The ev array pointer is not const, so from that perspective it
> may be
> > > > modified.
> > > >
> > > > This situation may occur for example the bonding driver is used
> under the
> > > > hood. The bonding driver does this kind of rearrangements on the
> ethdev
> > > > level.
> > > >
> > >
> > > Interesting question. I tend to think that we should not proclude
> this
> > > reordering, as it should allow e.g  an eventdev which is short on
> space to
> > > selectively enqueue only the high priority events.
> > >
> >
> > Allowing reordering may be a little surprising to the user. At least
> it
> > would be for me.
> >
> > Other eventdev APIs enqueue do not allow this kind of reordering
> (with
> > const-marked arrays).
> >
> 
> That is a good point. I forgot that the events are directly passed to
> the
> enqueue functions rather than being passed as pointers, which could
> then be
> reordered without modifying the underlying events.
> 
> > That said, I lean toward agreeing with you, since it will solve the
> ethdev
> > tx_burst() mapping issue mentioned.
> >
> 
> If enabling this solves a real problem, then let's allow it, despite
> the
> inconsistency in the APIs. Again, though, we need to to call this out
> in
> the docs very prominently to avoid surprises.
> 
> Alternatively, do we want to add a separate API that explicitly allows
> reordering, and update the existing API to have a const value
> parameter?
> For drivers that don't implement the reordering they can just not
> provide
> the reordering function and the non-reorder version can be used
> transparently instead.

IMHO, allowing reordering with the current API would break the developer's 
reasonable expectations of the API.
Breaking reasonable expectations could be considered an API break.

Some application may have a parallel array with metadata about the events.
If the events are reordered (and the last N of them deferred to the application 
to process), the application can no longer index into the metadata array (to 
process the metadata of the deferred events).

For reference, consider the SORING proposed by Konstantin.

Regarding "const":
It's my impression that "const" is missing in lots of APIs where the parameter 
must not be modified.
So, developers cannot rely on "const" as an indication if a passed parameter 
might be modified or not.
Obviously, "const" cannot be modified. But no "const" does not imply that the 
parameter is contractually modifiable by the function.



[PATCH] net/mlx5: fix fragmented ptype match

2024-12-19 Thread Alexander Kozyrev
Fragmented PTYPE macthing requires the setting the mask to the exact
RTE_PTYPE_L4_FRAG value to avoid conflicts with other L4 types.
Adding L2 or L3 types to the same mask should be allowed,
but there is a check for the exact value for setting the definer.
This prevents the fragmented packets from matching in case of L2/L3
mask is provided as well. Mask out L2/L3 types when setting L4_FRAG.

Fixes: 761439a20f net/mlx5/hws: support fragmented packet type matching
Cc: sta...@dpdk.org

Signed-off-by: Alexander Kozyrev 
---
 drivers/net/mlx5/hws/mlx5dr_definer.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/mlx5/hws/mlx5dr_definer.c 
b/drivers/net/mlx5/hws/mlx5dr_definer.c
index e6d3dbfa46..837e0c47bd 100644
--- a/drivers/net/mlx5/hws/mlx5dr_definer.c
+++ b/drivers/net/mlx5/hws/mlx5dr_definer.c
@@ -2205,7 +2205,7 @@ mlx5dr_definer_conv_item_ptype(struct 
mlx5dr_definer_conv_data *cd,
 * Cannot be combined with Layer 4 Types (TCP/UDP).
 * The exact value must be specified in the mask.
 */
-   if (m->packet_type == RTE_PTYPE_L4_FRAG) {
+   if ((m->packet_type & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_FRAG) {
fc = &cd->fc[DR_CALC_FNAME(PTYPE_FRAG, false)];
fc->item_idx = item_idx;
fc->tag_set = &mlx5dr_definer_ptype_frag_set;
@@ -2227,7 +2227,7 @@ mlx5dr_definer_conv_item_ptype(struct 
mlx5dr_definer_conv_data *cd,
}
 
if (m->packet_type & RTE_PTYPE_INNER_L4_MASK) {
-   if (m->packet_type == RTE_PTYPE_INNER_L4_FRAG) {
+   if ((m->packet_type & RTE_PTYPE_INNER_L4_MASK) == 
RTE_PTYPE_INNER_L4_FRAG) {
fc = &cd->fc[DR_CALC_FNAME(PTYPE_FRAG, true)];
fc->item_idx = item_idx;
fc->tag_set = &mlx5dr_definer_ptype_frag_set;
-- 
2.43.5



Re: [RFC] eventdev: add atomic queue to test-eventdev app

2024-12-19 Thread Luka Jankovic
On Tue, 2024-12-10 at 11:37 +0100, Mattias Rönnblom wrote:
>
> > +{
> > +   struct rte_event_dev_info dev_info;
> > +
> > +   rte_event_dev_info_get(dev_id, &dev_info);
> > +   return (dev_info.event_dev_cap & 
> > RTE_EVENT_DEV_CAP_MAINTENANCE_FREE) ?
> > +   true : false;
>
> return dev_info.event_dev_cap & RTE_EVENT_DEV_CAP_MAINTENANCE_FREE;
>
> will work fine.
>
>
I decided against it in order to maintain consistent styling with similar 
functions in the file.

>
> > +static int
> > +worker_wrapper(void *arg)
>
> Delete "wrapper".

All other eventdev-tests name their equivalent functions "worker_wrapper", so I 
picked it to be consistent with the other tests.

>
> > +
> > +   /* setup one port per worker, linking to all queues */
> > +   ret = order_event_dev_port_setup(test, opt, nb_workers, NB_QUEUES);
>
> "order"?

This function is declared in test_order_common.h and is used in all tests. It 
is not specific for "ordered" ports, so I thought it was OK to use.


> > +
> > +static void
> > +atomic_queue_opt_dump(struct evt_options *opt)
> > +{
> > +   order_opt_dump(opt);
>
> "order"?

Same thing here.

Thank you for the feedback. I will re-implement the test by not checking 
port-flow-queue combination and generally clean-up the code based on your 
comments.



[PATCH v2] cryptodev: fix C++ include

2024-12-19 Thread Thomas Monjalon
Some cryptodev functions were not included in an extern "C" block.

There are 2 blocks, the second one being fast path inline functions,
preceded with an include of the required rte_cryptodev_core.h file.

Fixes: 719834a6849e ("use C linkage where appropriate in headers")
Cc: sta...@dpdk.org

Reported-by: Zhigang Hu 
Signed-off-by: Thomas Monjalon 
---
v2: keep rte_cryptodev_core.h include at the same place
---
 .mailmap  |  1 +
 lib/cryptodev/rte_cryptodev.h | 12 ++--
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/.mailmap b/.mailmap
index 2bf38f9e8c..1e4bb06d6e 100644
--- a/.mailmap
+++ b/.mailmap
@@ -1794,6 +1794,7 @@ Zhenghua Zhou 
 Zhenning Xiao 
 Zhe Tao 
 Zhichao Zeng 
+Zhigang Hu 
 Zhigang Lu 
 Zhiguang He 
 Zhihong Peng 
diff --git a/lib/cryptodev/rte_cryptodev.h b/lib/cryptodev/rte_cryptodev.h
index c64d2f83a0..071ff3dbdf 100644
--- a/lib/cryptodev/rte_cryptodev.h
+++ b/lib/cryptodev/rte_cryptodev.h
@@ -22,6 +22,10 @@
 
 #include "rte_cryptodev_trace_fp.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * @internal Logtype used for cryptodev related messages.
  */
@@ -1928,11 +1932,16 @@ int rte_cryptodev_remove_deq_callback(uint8_t dev_id,
  uint16_t qp_id,
  struct rte_cryptodev_cb *cb);
 
-#include 
+#ifdef __cplusplus
+}
+#endif
+
+#include "rte_cryptodev_core.h"
 
 #ifdef __cplusplus
 extern "C" {
 #endif
+
 /**
  *
  * Dequeue a burst of processed crypto operations from a queue on the crypto
@@ -2125,7 +2134,6 @@ rte_cryptodev_qp_depth_used(uint8_t dev_id, uint16_t 
qp_id)
return rc;
 }
 
-
 #ifdef __cplusplus
 }
 #endif
-- 
2.47.1



RE: [PATCH 1/1] vhost: fix a double fetch when dequeue offloading

2024-12-19 Thread Wangyunjian(wangyunjian,TongTu)

> -Original Message-
> From: David Marchand [mailto:david.march...@redhat.com]
> Sent: Thursday, December 19, 2024 4:24 PM
> To: Wangyunjian(wangyunjian,TongTu) ;
> maxime.coque...@redhat.com
> Cc: dev@dpdk.org; chen...@nvidia.com; Lilijun (Jerry)
> ; xiawei (H) ;
> wangzengyuan ; sta...@dpdk.org
> Subject: Re: [PATCH 1/1] vhost: fix a double fetch when dequeue offloading
> 
> On Thu, Dec 19, 2024 at 7:38 AM Yunjian Wang 
> wrote:
> >
> > The hdr->csum_start does two successive reads from user space to read a
> > variable length data structure. The result overflow if the data structure
> > changes between the two reads.
> >
> > To fix this, we can prevent double fetch issue by copying virtio_hdr to
> > the temporary variable.
> >
> > Fixes: 4dc4e33ffa10 ("net/virtio: fix Rx checksum calculation")
> > Cc: sta...@dpdk.org
> >
> > Signed-off-by: Yunjian Wang 
> > ---
> >  lib/vhost/virtio_net.c | 13 -
> >  1 file changed, 8 insertions(+), 5 deletions(-)
> >
> > diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c
> > index 69901ab3b5..5c40ae7069 100644
> > --- a/lib/vhost/virtio_net.c
> > +++ b/lib/vhost/virtio_net.c
> > @@ -2914,10 +2914,12 @@ desc_to_mbuf(struct virtio_net *dev, struct
> vhost_virtqueue *vq,
> >  * in a contiguous virtual area.
> >  */
> > copy_vnet_hdr_from_desc(&tmp_hdr,
> buf_vec);
> > -   hdr = &tmp_hdr;
> > } else {
> > -   hdr = (struct virtio_net_hdr
> *)((uintptr_t)buf_vec[0].buf_addr);
> > +   rte_memcpy((void *)(uintptr_t)&tmp_hdr,
> > +   (void
> *)(uintptr_t)buf_vec[0].buf_addr,
> > +   sizeof(struct virtio_net_hdr));
> > }
> > +   hdr = &tmp_hdr;
> > }
> 
> This will need some benchmark, as I remember putting rte_memcpy in
> inlined helpers had some performance impact.
> 
> Instead, I would call copy_vnet_hdr_from_desc unconditionnally, and
> store in a struct virtio_net_hdr hdr variable (+ a has_vnet_hdr
> boolean to indicate validity).
> Something like:
> if (virtio_net_with_host_offload(dev)) {
> -   if (unlikely(buf_vec[0].buf_len < sizeof(struct
> virtio_net_hdr))) {
> -   /*
> -* No luck, the virtio-net header doesn't fit
> -* in a contiguous virtual area.
> -*/
> -   copy_vnet_hdr_from_desc(&tmp_hdr, buf_vec);
> -   hdr = &tmp_hdr;
> -   } else {
> -   hdr = (struct virtio_net_hdr
> *)((uintptr_t)buf_vec[0].buf_addr);
> -   }
> +   copy_vnet_hdr_from_desc(&hdr, buf_vec);
> +   has_vnet_hdr = true;
> }
> 
> (besides, in copy_vnet_hdr_from_desc, the while (cond) {} loop could
> be changed to do a do {} while (cond), and that approach requires
> performance numbers too)

How about this?
@@ -2904,8 +2904,8 @@ desc_to_mbuf(struct virtio_net *dev, struct 
vhost_virtqueue *vq,
uint32_t hdr_remain = dev->vhost_hlen;
uint32_t cpy_len;
struct rte_mbuf *cur = m, *prev = m;
-   struct virtio_net_hdr tmp_hdr;
-   struct virtio_net_hdr *hdr = NULL;
+   bool has_vnet_hdr = false;
+   struct virtio_net_hdr hdr;
uint16_t vec_idx;
struct vhost_async *async = vq->async;
struct async_inflight_info *pkts_info;
@@ -2921,11 +2921,11 @@ desc_to_mbuf(struct virtio_net *dev, struct 
vhost_virtqueue *vq,
 * No luck, the virtio-net header doesn't fit
 * in a contiguous virtual area.
 */
-   copy_vnet_hdr_from_desc(&tmp_hdr, buf_vec);
-   hdr = &tmp_hdr;
+   copy_vnet_hdr_from_desc(&hdr, buf_vec);
} else {
-   hdr = (struct virtio_net_hdr 
*)((uintptr_t)buf_vec[0].buf_addr);
+   hdr = *(struct virtio_net_hdr 
*)((uintptr_t)buf_vec[0].buf_addr);
}
+   has_vnet_hdr = true;
}


> 
> 
> >
> > for (vec_idx = 0; vec_idx < nr_vec; vec_idx++) {
> > @@ -3363,7 +3365,7 @@ virtio_dev_tx_batch_packed(struct virtio_net
> *dev,
> >  {
> > uint16_t avail_idx = vq->last_avail_idx;
> > uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf);
> > -   struct virtio_net_hdr *hdr;
> > +   struct virtio_net_hdr hdr;
> > uintptr_t desc_addrs[PACKED_BATCH_SIZE];
> > uint16_t ids[PACKED_BATCH_SIZE];
> > uint16_t i;
> > @@ -3382,8 +3384,9 @@ virtio_dev_tx_batch_packed(struct virtio_net
> *dev,
> >
> > if (virtio_net_with_host_offload(dev)) {
> > vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
> > -   hdr = (struct virtio_net_hdr *)(desc_addrs[i])

Re: The question for the modification against (lib/cryptodev/rte_cryptodev.h) in commit 719834a6

2024-12-19 Thread Thomas Monjalon
19/12/2024 11:41, Hu, Zhigang:
> Hi, experts,
> 
> My C++ program calls some functions defined in lib/cryptodev/rte_cryptodev.h. 
> After upgrading dpdk from 23.11 to 24.11, my C++ program failed to be 
> complied.  It looks like that the cimmit 719834a6 caused my issue.

Yes, you're right.
Thanks for reporting.

Please could you try this fix?
https://patches.dpdk.org/project/dpdk/patch/20241219114555.406331-1-tho...@monjalon.net/






RE: [EXTERNAL] [PATCH] cryptodev: fix C++ include

2024-12-19 Thread Akhil Goyal
> Some cryptodev functions were not included in the extern "C" block,
> so it is moved to start before.
> 
> An include is also moved to avoid being part of this block.
> 
> Fixes: 719834a6849e ("use C linkage where appropriate in headers")
> Cc: sta...@dpdk.org
> 
> Reported-by: Zhigang Hu 
> Signed-off-by: Thomas Monjalon 
> ---
>  .mailmap  |  1 +
>  lib/cryptodev/rte_cryptodev.h | 10 +-
>  2 files changed, 6 insertions(+), 5 deletions(-)
> 
> diff --git a/.mailmap b/.mailmap
> index 2bf38f9e8c..1e4bb06d6e 100644
> --- a/.mailmap
> +++ b/.mailmap
> @@ -1794,6 +1794,7 @@ Zhenghua Zhou 
>  Zhenning Xiao 
>  Zhe Tao 
>  Zhichao Zeng 
> +Zhigang Hu 
>  Zhigang Lu 
>  Zhiguang He 
>  Zhihong Peng 
> diff --git a/lib/cryptodev/rte_cryptodev.h b/lib/cryptodev/rte_cryptodev.h
> index c64d2f83a0..31a7edb189 100644
> --- a/lib/cryptodev/rte_cryptodev.h
> +++ b/lib/cryptodev/rte_cryptodev.h
> @@ -21,6 +21,11 @@
>  #include 
> 
>  #include "rte_cryptodev_trace_fp.h"
> +#include "rte_cryptodev_core.h"

Fix is ok but rte_cryptodev_core.h should not be moved up.
It is added in the middle to segregate the fast path APIs.
And it is an internal header which cannot be included by app directly.
I think the same schema is followed in ethdev as well.

> +
> +#ifdef __cplusplus
> +extern "C" {
> +#endif
> 
>  /**
>   * @internal Logtype used for cryptodev related messages.
> @@ -1928,11 +1933,6 @@ int rte_cryptodev_remove_deq_callback(uint8_t
> dev_id,
> uint16_t qp_id,
> struct rte_cryptodev_cb *cb);
> 
> -#include 
> -
> -#ifdef __cplusplus
> -extern "C" {
> -#endif
>  /**
>   *
>   * Dequeue a burst of processed crypto operations from a queue on the crypto
> --
> 2.47.1



The question for the modification against (lib/cryptodev/rte_cryptodev.h) in commit 719834a6

2024-12-19 Thread Hu, Zhigang
Hi, experts,

My C++ program calls some functions defined in lib/cryptodev/rte_cryptodev.h. 
After upgrading dpdk from 23.11 to 24.11, my C++ program failed to be complied. 
 It looks like that the cimmit 719834a6 caused my issue.

It seems that the commit 719834a6 intends to exclude all head files from the
#ifdef __cplusplus
extern "C" {
#endif
..
#ifdef __cplusplus
}
#endif
Unfortunately some function declarations are also excluded wrongly since these 
declarations are before the last head file. Below picture is an example. The 
function in the red circle can't be linked by C++ complier. Would you please 
help to check this issue?

Best regards!
Hu Zhigang

[cid:bbf5bf01-e72e-42b0-8b1a-fc627fbb1dc5]


Re: [EXTERNAL] [PATCH] cryptodev: fix C++ include

2024-12-19 Thread Thomas Monjalon
19/12/2024 13:24, Akhil Goyal:
> > Some cryptodev functions were not included in the extern "C" block,
> > so it is moved to start before.
> > 
> > An include is also moved to avoid being part of this block.
[...]
> > --- a/lib/cryptodev/rte_cryptodev.h
> > +++ b/lib/cryptodev/rte_cryptodev.h
> > @@ -21,6 +21,11 @@
> >  #include 
> > 
> >  #include "rte_cryptodev_trace_fp.h"
> > +#include "rte_cryptodev_core.h"
> 
> Fix is ok but rte_cryptodev_core.h should not be moved up.
> It is added in the middle to segregate the fast path APIs.
> And it is an internal header which cannot be included by app directly.
> I think the same schema is followed in ethdev as well.

Indeed, this is how it's done in ethdev:

#ifdef __cplusplus
}
#endif

#include 

#ifdef __cplusplus
extern "C" {
#endif

I'll do the same in v2.




Re: [PATCH] net/gve: Allocate qpl pages using malloc if memzone allocation fails

2024-12-19 Thread Stephen Hemminger
On Thu, 19 Dec 2024 12:53:14 -0800
Praveen Kaligineedi  wrote:

> The TX queue requires IOVA contiguous QPL memory.  So, we still need
> memzone code for TX queues.
> 
> Regards,
> Praveen
> 
> On Wed, Dec 18, 2024 at 7:51 PM Stephen Hemminger
>  wrote:
> >
> > On Wed, 18 Dec 2024 15:46:35 -0800
> > Joshua Washington  wrote:
> >  
> > > From: Praveen Kaligineedi 
> > >
> > > Allocating QPL for an RX queue might fail if enough contiguous IOVA
> > > memory cannot be allocated. However, the only requirement for QPL
> > > for RX is that each 4K buffer be IOVA contiguous, not the entire
> > > QPL. Therefore, use malloc to allocate 4K buffers if the allocation
> > > using memzone fails.
> > >
> > > Signed-off-by: Praveen Kaligineedi 
> > > Reviewed-by: Joshua Washington 
> > > ---  
> >
> > Why keep the memzone code? rte_malloc and memzone are both coming from
> > huge pages. Is there any advantage to memzone for what you are doing?
> >
> > Better to not have two potential allocation paths to test.  

Please fix email address typo in next version. I am getting bounces
because it has
   Praveen Kaligineedi 
in Cc:
Looks like you meant:
Praveen Kaligineedi 


[PATCH] pdump: clear statistics when enabled

2024-12-19 Thread Stephen Hemminger
The pdump statistics are used to keep track of the number
of packets captured, filtered, etc. These need not be cumalative
and instead should be start counting when capture starts.

This fixes the issue where multiple invocations of dumpcap
would include counts from previous invocations.

Bugzilla ID: 1604
Fixes: 10f726efe26c ("pdump: support pcapng and filtering")
Cc: sta...@dpdk.org

Signed-off-by: Stephen Hemminger 
---
 lib/pdump/rte_pdump.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/lib/pdump/rte_pdump.c b/lib/pdump/rte_pdump.c
index 679c3dd0b5..0e6ffc167b 100644
--- a/lib/pdump/rte_pdump.c
+++ b/lib/pdump/rte_pdump.c
@@ -199,6 +199,8 @@ pdump_register_rx_callbacks(enum pdump_version ver,
rte_errno);
return rte_errno;
}
+
+   memset(&pdump_stats->rx[port][qid], 0, sizeof(struct 
rte_pdump_stats));
} else if (operation == DISABLE) {
int ret;
 
@@ -257,6 +259,7 @@ pdump_register_tx_callbacks(enum pdump_version ver,
rte_errno);
return rte_errno;
}
+   memset(&pdump_stats->tx[port][qid], 0, sizeof(struct 
rte_pdump_stats));
} else if (operation == DISABLE) {
int ret;
 
-- 
2.45.2



Re: [PATCH] net/gve: Allocate qpl pages using malloc if memzone allocation fails

2024-12-19 Thread Stephen Hemminger
On Thu, 19 Dec 2024 12:53:14 -0800
Praveen Kaligineedi  wrote:

> The TX queue requires IOVA contiguous QPL memory.  So, we still need
> memzone code for TX queues.
> 
> Regards,
> Praveen
> 
> On Wed, Dec 18, 2024 at 7:51 PM Stephen Hemminger
>  wrote:
> >
> > On Wed, 18 Dec 2024 15:46:35 -0800
> > Joshua Washington  wrote:
> >  
> > > From: Praveen Kaligineedi 
> > >
> > > Allocating QPL for an RX queue might fail if enough contiguous IOVA
> > > memory cannot be allocated. However, the only requirement for QPL
> > > for RX is that each 4K buffer be IOVA contiguous, not the entire
> > > QPL. Therefore, use malloc to allocate 4K buffers if the allocation
> > > using memzone fails.
> > >
> > > Signed-off-by: Praveen Kaligineedi 
> > > Reviewed-by: Joshua Washington 
> > > ---  
> >
> > Why keep the memzone code? rte_malloc and memzone are both coming from
> > huge pages. Is there any advantage to memzone for what you are doing?
> >
> > Better to not have two potential allocation paths to test.  


So then use rte_malloc for Rx queue and rte_memzone for Tx queue
and document rationale.


Re: [PATCH v4] net/zxdh: Provided zxdh basic init

2024-12-19 Thread Stephen Hemminger
On Tue, 10 Sep 2024 20:00:20 +0800
Junlong Wang  wrote:

> provided zxdh initialization of zxdh PMD driver.
> include msg channel, np init and etc.
> 
> Signed-off-by: Junlong Wang 
> ---
> V4: Resolve compilation issues
> V3: Resolve compilation issues
> V2: Resolve compilation issues and modify doc(zxdh.ini zdh.rst)
> V1: Provide zxdh basic init and open source NPSDK lib
> ---

Overall this looks good, one test checklist item for me was to build
with Gcc 14 and analyzer option. This finds bugs but can generate false
positives.  The output is quite verbose.

It complains about this which may or may not be a real problem.
If memcpy() is used instead of rte_memcpy() then the problem goes away.
The issue is that inlined version rte_memcpy() will reference past the arguments
as an internal optimization for small values.

[1564/3222] Compiling C object 
drivers/libtmp_rte_net_zxdh.a.p/net_zxdh_zxdh_common.c.o
In file included from ../lib/mempool/rte_mempool.h:50,
 from ../lib/mbuf/rte_mbuf.h:38,
 from ../lib/net/rte_ether.h:20,
 from ../lib/ethdev/rte_eth_ctrl.h:10,
 from ../lib/ethdev/rte_ethdev.h:1472,
 from ../lib/ethdev/ethdev_driver.h:21,
 from ../drivers/net/zxdh/zxdh_common.c:8:
In function ‘rte_mov15_or_less’,
inlined from ‘rte_memcpy_generic’ at 
../lib/eal/x86/include/rte_memcpy.h:395:10,
inlined from ‘rte_memcpy’ at ../lib/eal/x86/include/rte_memcpy.h:757:10,
inlined from ‘zxdh_get_res_info’ at ../drivers/net/zxdh/zxdh_common.c:231:2:
../lib/eal/x86/include/rte_memcpy.h:82:55: warning: stack-based buffer overflow 
[CWE-121] [-Wanalyzer-out-of-bounds]
   82 | ((struct rte_uint64_alias *)dst)->val =
  | ~~^
   83 | ((const struct rte_uint64_alias *)src)->val;
  | ~~~
  ‘zxdh_panelid_get’: events 1-3
|
|../drivers/net/zxdh/zxdh_common.c:250:1:
|  239 | uint8_t reps = 0;
|  | 
|  | |
|  | (2) capacity: 1 byte
|..
|  250 | zxdh_panelid_get(struct rte_eth_dev *dev, uint8_t *panelid)
|  | ^~~~
|  | |
|  | (1) entry to ‘zxdh_panelid_get’
|..
|  255 | int32_t ret = zxdh_get_res_panel_id(¶m, panelid);
|  |   ~
|  |   |
|  |   (3) inlined call to ‘zxdh_get_res_panel_id’ 
from ‘zxdh_panelid_get’
|
+--> ‘zxdh_get_res_panel_id’: event 4
   |
   |  242 | if (zxdh_get_res_info(in, ZXDH_TBL_FIELD_PNLID, 
&reps, &reps_len) != ZXDH_BAR_MSG_OK)
   |  | 
^
   |  | |
   |  | (4) calling ‘zxdh_get_res_info’ from 
‘zxdh_panelid_get’
   |
 ‘zxdh_get_res_info’: events 5-12
   |
   |  186 | zxdh_get_res_info(struct zxdh_res_para *dev, uint8_t field, 
uint8_t *res, uint16_t *len)
   |  | ^
   |  | |
   |  | (5) entry to ‘zxdh_get_res_info’
   |..
   |  192 | if (!res || !dev)
   |  |~
   |  ||
   |  |(6) following ‘false’ branch...
   |..
   |  195 | struct zxdh_tbl_msg_header tbl_msg = {
   |  |~~~
   |  ||
   |  |(7) ...to here
   |..
   |  217 | if (ret != ZXDH_BAR_MSG_OK) {
   |  |~
   |  ||
   |  |(8) following ‘false’ branch (when ‘ret == 0’)...
   |..
   |  225 | if (tbl_reps->check != ZXDH_TBL_MSG_PRO_SUCCESS) {
   |  |
   |  |||
   |  ||(9) ...to here
   |  |(10) following ‘false’ branch...
   |..
   |  230 | *len = tbl_reps->len;
   |  |~
   |  ||
   |  |(11) ...to here
   |  231 | rte_memcpy(res, (recv_buf + ZXDH_REPS_HEADER_OFFSET 
+
   |  | ~
   |  | |
   |  | (12) inlined call to ‘rte_memcpy’ from 
‘zxdh_get_res_info’
   |
   +--> ‘rte_memcpy’: events 13-14
  |
  |../lib/eal/x86/include/rte_memcpy.h:754:12:
  |  754 | if (!(((uintptr_t)dst | (uintptr_t)src

[PATCH v2 2/5] net/bnxt: fix use after free

2024-12-19 Thread Stephen Hemminger
The filter cleanup loop was using STAILQ_FOREACH and rte_free
and would dereference the filter after free.

Found by build with -Dbsanitize=address,undefined

Fixes: e8fe0e067b68 ("net/bnxt: fix allocation of PF info struct")
Cc: ajit.khapa...@broadcom.com
Cc: sta...@dpdk.org

Signed-off-by: Stephen Hemminger 
---
 drivers/net/bnxt/bnxt_filter.c | 11 ++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/net/bnxt/bnxt_filter.c b/drivers/net/bnxt/bnxt_filter.c
index 7b90ba651f..3804a5ec16 100644
--- a/drivers/net/bnxt/bnxt_filter.c
+++ b/drivers/net/bnxt/bnxt_filter.c
@@ -12,6 +12,13 @@
 #include 
 #include 
 
+#ifndef STAILQ_FOREACH_SAFE
+#defineSTAILQ_FOREACH_SAFE(var, head, field, tvar) 
\
+   for ((var) = STAILQ_FIRST((head));  \
+   (var) && ((tvar) = STAILQ_NEXT((var), field), 1);   \
+   (var) = (tvar))
+#endif
+
 #include "bnxt.h"
 #include "bnxt_filter.h"
 #include "bnxt_hwrm.h"
@@ -151,7 +158,9 @@ void bnxt_free_filter_mem(struct bnxt *bp)
bp->filter_info = NULL;
 
for (i = 0; i < bp->pf->max_vfs; i++) {
-   STAILQ_FOREACH(filter, &bp->pf->vf_info[i].filter, next) {
+   struct bnxt_filter_info *tmp;
+
+   STAILQ_FOREACH_SAFE(filter, &bp->pf->vf_info[i].filter, next, 
tmp) {
rte_free(filter);
STAILQ_REMOVE(&bp->pf->vf_info[i].filter, filter,
  bnxt_filter_info, next);
-- 
2.45.2



[PATCH v2 1/5] bus/fslmc: fix use after rte_free

2024-12-19 Thread Stephen Hemminger
The cleanup loop would deference the dpio_dev after freeing.
Use TAILQ_FOREACH_SAFE to fix that.
Found by building with sanitizer undefined flag.

Fixes: e55d0494ab98 ("bus/fslmc: support secondary process")
Cc: shreyansh.j...@nxp.com
Cc: sta...@dpdk.org
Signed-off-by: Stephen Hemminger 
---
 drivers/bus/fslmc/portal/dpaa2_hw_dpio.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/bus/fslmc/portal/dpaa2_hw_dpio.c 
b/drivers/bus/fslmc/portal/dpaa2_hw_dpio.c
index 2dfcf7a498..fb7d1968d1 100644
--- a/drivers/bus/fslmc/portal/dpaa2_hw_dpio.c
+++ b/drivers/bus/fslmc/portal/dpaa2_hw_dpio.c
@@ -403,6 +403,7 @@ dpaa2_create_dpio_device(int vdev_fd,
struct rte_dpaa2_device *obj)
 {
struct dpaa2_dpio_dev *dpio_dev = NULL;
+   struct dpaa2_dpio_dev *dpio_tmp;
struct vfio_region_info reg_info = { .argsz = sizeof(reg_info)};
struct qbman_swp_desc p_des;
struct dpio_attr attr;
@@ -588,7 +589,7 @@ dpaa2_create_dpio_device(int vdev_fd,
rte_free(dpio_dev);
 
/* For each element in the list, cleanup */
-   TAILQ_FOREACH(dpio_dev, &dpio_dev_list, next) {
+   RTE_TAILQ_FOREACH_SAFE(dpio_dev, &dpio_dev_list, next, dpio_tmp) {
if (dpio_dev->dpio) {
dpio_disable(dpio_dev->dpio, CMD_PRI_LOW,
dpio_dev->token);
-- 
2.45.2



[PATCH v2 0/5] Fixes for build with -Dsanitize=undefined

2024-12-19 Thread Stephen Hemminger
Recent bug report https://bugs.dpdk.org/show_bug.cgi?id=1605
highlighted that no build is done with -Dsanitize=address,undefined.
Doing a test build showed some new issues that were not being reported.

Note: a couple of these required introducing the equivalent _FOREACH_SAFE
variant macros. These are in FreeBSD and in DPDK windows versions of queue.h
but never made it to Linux sys/queue.h. Put the macro in the file where
used (rather than a common spot) to make backporting the fixes easier.

Stephen Hemminger (5):
  bus/fslmc: fix use after rte_free
  net/bnxt: fix use after free
  net/bnx2x: use RTE_BIT32
  net/qede: fix use after free
  vhost: use strlcpy instead of strncpy

 drivers/bus/fslmc/portal/dpaa2_hw_dpio.c |   3 +-
 drivers/net/bnx2x/ecore_reg.h| 130 +++
 drivers/net/bnxt/bnxt_filter.c   |  11 +-
 drivers/net/qede/qede_filter.c   |  20 ++--
 lib/vhost/socket.c   |   3 +-
 5 files changed, 90 insertions(+), 77 deletions(-)

-- 
2.45.2



[PATCH v2 3/5] net/bnx2x: use RTE_BIT32

2024-12-19 Thread Stephen Hemminger
The expression "0x1 << 31" is actually undefined since 0x1
is an integer not unsigned. Fix the general problem by using
the existing RTE_BIT32() macros instead.

Shows up as a cryptic warning when building with -Dbsanitize=undefined.

../drivers/net/bnx2x/bnx2x.c:3362:25: error: case label does not reduce to an 
integer constant
 3362 | case 
AEU_INPUTS_ATTN_BITS_MCP_LATCHED_SCPAD_PARITY:
  | ^~~~

Signed-off-by: Stephen Hemminger 
---
 drivers/net/bnx2x/ecore_reg.h | 130 +-
 1 file changed, 65 insertions(+), 65 deletions(-)

diff --git a/drivers/net/bnx2x/ecore_reg.h b/drivers/net/bnx2x/ecore_reg.h
index 6f7b0522f2..bddea3b832 100644
--- a/drivers/net/bnx2x/ecore_reg.h
+++ b/drivers/net/bnx2x/ecore_reg.h
@@ -4359,71 +4359,71 @@
 #define HW_LOCK_RESOURCE_RECOVERY_REG   11
 #define HW_LOCK_RESOURCE_RESET  5
 #define HW_LOCK_RESOURCE_SPIO   2
-#define AEU_INPUTS_ATTN_BITS_ATC_HW_INTERRUPT   (0x1 << 4)
-#define AEU_INPUTS_ATTN_BITS_ATC_PARITY_ERROR   (0x1 << 5)
-#define AEU_INPUTS_ATTN_BITS_BRB_HW_INTERRUPT   (0x1 << 19)
-#define AEU_INPUTS_ATTN_BITS_BRB_PARITY_ERROR   (0x1 << 18)
-#define AEU_INPUTS_ATTN_BITS_CCM_HW_INTERRUPT   (0x1 << 31)
-#define AEU_INPUTS_ATTN_BITS_CCM_PARITY_ERROR   (0x1 << 30)
-#define AEU_INPUTS_ATTN_BITS_CDU_HW_INTERRUPT   (0x1 << 9)
-#define AEU_INPUTS_ATTN_BITS_CDU_PARITY_ERROR   (0x1 << 8)
-#define AEU_INPUTS_ATTN_BITS_CFC_HW_INTERRUPT   (0x1 << 7)
-#define AEU_INPUTS_ATTN_BITS_CFC_PARITY_ERROR   (0x1 << 6)
-#define AEU_INPUTS_ATTN_BITS_CSDM_HW_INTERRUPT  (0x1 << 29)
-#define AEU_INPUTS_ATTN_BITS_CSDM_PARITY_ERROR  (0x1 << 28)
-#define AEU_INPUTS_ATTN_BITS_CSEMI_HW_INTERRUPT (0x1 
<< 1)
-#define AEU_INPUTS_ATTN_BITS_CSEMI_PARITY_ERROR (0x1 
<< 0)
-#define AEU_INPUTS_ATTN_BITS_DEBUG_PARITY_ERROR (0x1 
<< 18)
-#define AEU_INPUTS_ATTN_BITS_DMAE_HW_INTERRUPT  (0x1 << 11)
-#define AEU_INPUTS_ATTN_BITS_DMAE_PARITY_ERROR  (0x1 << 10)
-#define AEU_INPUTS_ATTN_BITS_DOORBELLQ_HW_INTERRUPT (0x1 << 13)
-#define AEU_INPUTS_ATTN_BITS_DOORBELLQ_PARITY_ERROR (0x1 << 12)
-#define AEU_INPUTS_ATTN_BITS_GPIO0_FUNCTION_0   (0x1 << 2)
-#define AEU_INPUTS_ATTN_BITS_IGU_PARITY_ERROR   (0x1 << 12)
-#define AEU_INPUTS_ATTN_BITS_MCP_LATCHED_ROM_PARITY (0x1 << 28)
-#define AEU_INPUTS_ATTN_BITS_MCP_LATCHED_SCPAD_PARITY   (0x1 << 31)
-#define AEU_INPUTS_ATTN_BITS_MCP_LATCHED_UMP_RX_PARITY  (0x1 << 29)
-#define AEU_INPUTS_ATTN_BITS_MCP_LATCHED_UMP_TX_PARITY  (0x1 << 30)
-#define AEU_INPUTS_ATTN_BITS_MISC_HW_INTERRUPT  (0x1 << 15)
-#define AEU_INPUTS_ATTN_BITS_MISC_PARITY_ERROR  (0x1 << 14)
-#define AEU_INPUTS_ATTN_BITS_NIG_PARITY_ERROR   (0x1 << 14)
-#define AEU_INPUTS_ATTN_BITS_PARSER_PARITY_ERROR(0x1 << 20)
-#define AEU_INPUTS_ATTN_BITS_PBCLIENT_HW_INTERRUPT  (0x1 << 31)
-#define AEU_INPUTS_ATTN_BITS_PBCLIENT_PARITY_ERROR  (0x1 << 30)
-#define AEU_INPUTS_ATTN_BITS_PBF_PARITY_ERROR   (0x1 << 0)
-#define AEU_INPUTS_ATTN_BITS_PGLUE_HW_INTERRUPT (0x1 
<< 2)
-#define AEU_INPUTS_ATTN_BITS_PGLUE_PARITY_ERROR (0x1 
<< 3)
-#define AEU_INPUTS_ATTN_BITS_PXPPCICLOCKCLIENT_HW_INTERRUPT (0x1 << 5)
-#define AEU_INPUTS_ATTN_BITS_PXPPCICLOCKCLIENT_PARITY_ERROR (0x1 << 4)
-#define AEU_INPUTS_ATTN_BITS_PXP_HW_INTERRUPT   (0x1 << 3)
-#define AEU_INPUTS_ATTN_BITS_PXP_PARITY_ERROR   (0x1 << 2)
-#define AEU_INPUTS_ATTN_BITS_QM_HW_INTERRUPT(0x1 << 3)
-#define AEU_INPUTS_ATTN_BITS_QM_PARITY_ERROR(0x1 << 2)
-#define AEU_INPUTS_ATTN_BITS_SEARCHER_PARITY_ERROR  (0x1 << 22)
-#define AEU_INPUTS_ATTN_BITS_SPIO5  (0x1 << 15)
-#define AEU_INPUTS_ATTN_BITS_TCM_HW_INTERRUPT   (0x1 << 27)
-#define AEU_INPUTS_ATTN_BITS_TCM_PARITY_ERROR   (0x1 << 26)
-#define AEU_INPUTS_ATTN_BITS_TIMERS_HW_INTERRUPT(0x1 << 5)
-#define AEU_INPUTS_ATTN_BITS_TIMERS_PARITY_ERROR(0x1 << 4)
-#define AEU_INPUTS_ATTN_BITS_TSDM_HW_INTERRUPT  (0x1 << 25)
-#define AEU_INPUTS_ATTN_BITS_TSDM_PARITY_ERROR  (0x1 << 24)
-#define AEU_INPUTS_ATTN_BITS_TSEMI_HW_INTERRUPT (0x1 
<< 29)
-#define AEU_INPUTS_ATTN_BITS_TSEMI_PARITY_ERROR (0x1 
<< 28)
-#define AEU_INPUTS_ATTN_BITS_UCM_HW_INTERRUPT   (0x1 << 23)
-#de

[PATCH v2 4/5] net/qede: fix use after free

2024-12-19 Thread Stephen Hemminger
The loop cleaning up flowdir resources was using SLIST_FOREACH
but the inner loop would call rte_free. Found by building with
address sanitizer undefined check.

Also remove needless initialization, and null check.

Fixes: f5765f66f9bb ("net/qede: refactor flow director into generic aRFS")
Cc: shahed.sha...@cavium.com
Cc: sta...@dpdk.org

Signed-off-by: Stephen Hemminger 
---
 drivers/net/qede/qede_filter.c | 20 
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/drivers/net/qede/qede_filter.c b/drivers/net/qede/qede_filter.c
index 14fb4338e9..5db36b03e2 100644
--- a/drivers/net/qede/qede_filter.c
+++ b/drivers/net/qede/qede_filter.c
@@ -10,6 +10,13 @@
 #include 
 #include 
 
+#ifndef SLIST_FOREACH_SAFE
+#defineSLIST_FOREACH_SAFE(var, head, field, tvar)  
\
+   for ((var) = SLIST_FIRST((head));   \
+   (var) && ((tvar) = SLIST_NEXT((var), field), 1);\
+   (var) = (tvar))
+#endif
+
 #include "qede_ethdev.h"
 
 /* VXLAN tunnel classification mapping */
@@ -154,15 +161,12 @@ int qede_check_fdir_support(struct rte_eth_dev *eth_dev)
 void qede_fdir_dealloc_resc(struct rte_eth_dev *eth_dev)
 {
struct qede_dev *qdev = QEDE_INIT_QDEV(eth_dev);
-   struct qede_arfs_entry *tmp = NULL;
+   struct qede_arfs_entry *tmp, *tmp2;
 
-   SLIST_FOREACH(tmp, &qdev->arfs_info.arfs_list_head, list) {
-   if (tmp) {
-   rte_memzone_free(tmp->mz);
-   SLIST_REMOVE(&qdev->arfs_info.arfs_list_head, tmp,
-qede_arfs_entry, list);
-   rte_free(tmp);
-   }
+   SLIST_FOREACH_SAFE(tmp, &qdev->arfs_info.arfs_list_head, list, tmp2) {
+   rte_memzone_free(tmp->mz);
+   SLIST_REMOVE(&qdev->arfs_info.arfs_list_head, tmp, 
qede_arfs_entry, list);
+   rte_free(tmp);
}
 }
 
-- 
2.45.2



[PATCH v2 5/5] vhost: use strlcpy instead of strncpy

2024-12-19 Thread Stephen Hemminger
Some tools such as gcc address sanitizer will complain if strncpy
is used to completely fill a string since it will not be null
terminated. Since the previous code forced as null at end,
use strlcpy() to get the same effect.

Signed-off-by: Stephen Hemminger 
---
 lib/vhost/socket.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/lib/vhost/socket.c b/lib/vhost/socket.c
index d29d15494c..f938189cc2 100644
--- a/lib/vhost/socket.c
+++ b/lib/vhost/socket.c
@@ -359,8 +359,7 @@ create_unix_socket(struct vhost_user_socket *vsocket)
 
memset(un, 0, sizeof(*un));
un->sun_family = AF_UNIX;
-   strncpy(un->sun_path, vsocket->path, sizeof(un->sun_path));
-   un->sun_path[sizeof(un->sun_path) - 1] = '\0';
+   strlcpy(un->sun_path, vsocket->path, sizeof(un->sun_path));
 
vsocket->socket_fd = fd;
return 0;
-- 
2.45.2



[PATCH 1/1] vhost: fix missing gso_size validity check

2024-12-19 Thread Yunjian Wang
The value of tso_segsz cannot be 0, instead check that value of
gso_size was set.

Fixes: d0cf91303d73 ("vhost: add Tx offload capabilities")
Cc: sta...@dpdk.org

Signed-off-by: Yunjian Wang 
---
 lib/vhost/virtio_net.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c
index 69901ab3b5..2ac5bc29a3 100644
--- a/lib/vhost/virtio_net.c
+++ b/lib/vhost/virtio_net.c
@@ -2733,6 +2733,9 @@ vhost_dequeue_offload_legacy(struct virtio_net *dev, 
struct virtio_net_hdr *hdr,
}
 
if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
+   if (hdr->gso_size == 0)
+   goto error;
+
switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
case VIRTIO_NET_HDR_GSO_TCPV4:
case VIRTIO_NET_HDR_GSO_TCPV6:
-- 
2.33.0



[PATCH v2 1/1] vhost: fix a double fetch when dequeue offloading

2024-12-19 Thread Yunjian Wang
The hdr->csum_start does two successive reads from user space to read a
variable length data structure. The result overflow if the data structure
changes between the two reads.

To fix this, we can prevent double fetch issue by copying virtio_hdr to
the temporary variable.

Fixes: 4dc4e33ffa10 ("net/virtio: fix Rx checksum calculation")
Cc: sta...@dpdk.org

Signed-off-by: Yunjian Wang 
---
v2: update code styles suggested by David Marchand
---
 lib/vhost/virtio_net.c | 27 ++-
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c
index 69901ab3b5..2676447906 100644
--- a/lib/vhost/virtio_net.c
+++ b/lib/vhost/virtio_net.c
@@ -2896,8 +2896,8 @@ desc_to_mbuf(struct virtio_net *dev, struct 
vhost_virtqueue *vq,
uint32_t hdr_remain = dev->vhost_hlen;
uint32_t cpy_len;
struct rte_mbuf *cur = m, *prev = m;
-   struct virtio_net_hdr tmp_hdr;
-   struct virtio_net_hdr *hdr = NULL;
+   bool has_vnet_hdr = false;
+   struct virtio_net_hdr hdr;
uint16_t vec_idx;
struct vhost_async *async = vq->async;
struct async_inflight_info *pkts_info;
@@ -2913,11 +2913,11 @@ desc_to_mbuf(struct virtio_net *dev, struct 
vhost_virtqueue *vq,
 * No luck, the virtio-net header doesn't fit
 * in a contiguous virtual area.
 */
-   copy_vnet_hdr_from_desc(&tmp_hdr, buf_vec);
-   hdr = &tmp_hdr;
+   copy_vnet_hdr_from_desc(&hdr, buf_vec);
} else {
-   hdr = (struct virtio_net_hdr 
*)((uintptr_t)buf_vec[0].buf_addr);
+   hdr = *(struct virtio_net_hdr 
*)((uintptr_t)buf_vec[0].buf_addr);
}
+   has_vnet_hdr = true;
}
 
for (vec_idx = 0; vec_idx < nr_vec; vec_idx++) {
@@ -2953,7 +2953,7 @@ desc_to_mbuf(struct virtio_net *dev, struct 
vhost_virtqueue *vq,
if (async_fill_seg(dev, vq, cur, mbuf_offset,
   buf_iova + buf_offset, cpy_len, 
false) < 0)
goto error;
-   } else if (likely(hdr && cur == m)) {
+   } else if (likely(has_vnet_hdr && cur == m)) {
rte_memcpy(rte_pktmbuf_mtod_offset(cur, void *, 
mbuf_offset),
(void *)((uintptr_t)(buf_addr + buf_offset)),
cpy_len);
@@ -3013,10 +3013,10 @@ desc_to_mbuf(struct virtio_net *dev, struct 
vhost_virtqueue *vq,
 
if (is_async) {
async_iter_finalize(async);
-   if (hdr)
-   pkts_info[slot_idx].nethdr = *hdr;
-   } else if (hdr) {
-   vhost_dequeue_offload(dev, hdr, m, legacy_ol_flags);
+   if (has_vnet_hdr)
+   pkts_info[slot_idx].nethdr = hdr;
+   } else if (has_vnet_hdr) {
+   vhost_dequeue_offload(dev, &hdr, m, legacy_ol_flags);
}
 
return 0;
@@ -3363,7 +3363,6 @@ virtio_dev_tx_batch_packed(struct virtio_net *dev,
 {
uint16_t avail_idx = vq->last_avail_idx;
uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf);
-   struct virtio_net_hdr *hdr;
uintptr_t desc_addrs[PACKED_BATCH_SIZE];
uint16_t ids[PACKED_BATCH_SIZE];
uint16_t i;
@@ -3381,9 +3380,11 @@ virtio_dev_tx_batch_packed(struct virtio_net *dev,
   pkts[i]->pkt_len);
 
if (virtio_net_with_host_offload(dev)) {
+   struct virtio_net_hdr hdr;
+
vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
-   hdr = (struct virtio_net_hdr *)(desc_addrs[i]);
-   vhost_dequeue_offload(dev, hdr, pkts[i], 
legacy_ol_flags);
+   hdr = *(struct virtio_net_hdr *)(desc_addrs[i]);
+   vhost_dequeue_offload(dev, &hdr, pkts[i], 
legacy_ol_flags);
}
}
 
-- 
2.33.0



Re: [PATCH 1/1] vhost: fix a double fetch when dequeue offloading

2024-12-19 Thread Stephen Hemminger
On Fri, 20 Dec 2024 02:17:12 +
"Wangyunjian(wangyunjian,TongTu)"  wrote:

> > -Original Message-
> > From: Stephen Hemminger [mailto:step...@networkplumber.org]
> > Sent: Friday, December 20, 2024 12:16 AM
> > To: Wangyunjian(wangyunjian,TongTu) 
> > Cc: dev@dpdk.org; maxime.coque...@redhat.com; chen...@nvidia.com;
> > Lilijun (Jerry) ; xiawei (H) 
> > ;
> > wangzengyuan ; sta...@dpdk.org
> > Subject: Re: [PATCH 1/1] vhost: fix a double fetch when dequeue offloading
> > 
> > On Thu, 19 Dec 2024 14:38:28 +0800
> > Yunjian Wang  wrote:
> >   
> > > - hdr = (struct virtio_net_hdr 
> > > *)((uintptr_t)buf_vec[0].buf_addr);
> > > + rte_memcpy((void *)(uintptr_t)&tmp_hdr,
> > > + (void *)(uintptr_t)buf_vec[0].buf_addr,
> > > + sizeof(struct virtio_net_hdr));
> > >   }  
> > 
> > Do not introduce more rte_memcpy of a fixed size.
> > You don't need that many casts!
> > Why can you not use a structure assignment here.  
> 
>  The virtio_hdr is a shared component, and other fields within it are read
> multiple times. This can potentially result in a double fetch scenario.


The point is do a copy, but not with rte_memcpy.
Also you need a rte_compiler_barrier() anyway.


[PATCH] net/bonding: check return value when enable/disable promisc mode

2024-12-19 Thread Sunyang Wu
Add validation for the return value of rte_eth_promiscuous_enable
and rte_eth_promiscuous_disable.

Signed-off-by: Sunyang Wu 
---
 drivers/net/bonding/rte_eth_bond_pmd.c | 18 ++
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/drivers/net/bonding/rte_eth_bond_pmd.c 
b/drivers/net/bonding/rte_eth_bond_pmd.c
index 91bf2c2345..f69496feec 100644
--- a/drivers/net/bonding/rte_eth_bond_pmd.c
+++ b/drivers/net/bonding/rte_eth_bond_pmd.c
@@ -2783,6 +2783,7 @@ bond_ethdev_promiscuous_update(struct rte_eth_dev *dev)
 {
struct bond_dev_private *internals = dev->data->dev_private;
uint16_t port_id = internals->current_primary_port;
+   int ret;
 
switch (internals->mode) {
case BONDING_MODE_ROUND_ROBIN:
@@ -2802,10 +2803,19 @@ bond_ethdev_promiscuous_update(struct rte_eth_dev *dev)
 * mode should be set to new primary member according to bonding
 * device.
 */
-   if (rte_eth_promiscuous_get(internals->port_id) == 1)
-   rte_eth_promiscuous_enable(port_id);
-   else
-   rte_eth_promiscuous_disable(port_id);
+   if (rte_eth_promiscuous_get(internals->port_id) == 1) {
+   ret = rte_eth_promiscuous_enable(port_id);
+   if (ret != 0)
+   RTE_BOND_LOG(ERR,
+"Failed to enable promiscuous mode 
for port %u: %s",
+port_id, rte_strerror(-ret));
+   } else {
+   ret = rte_eth_promiscuous_disable(port_id);
+   if (ret != 0)
+   RTE_BOND_LOG(ERR,
+"Failed to disable promiscuous 
mode for port %u: %s",
+port_id, rte_strerror(-ret));
+   }
}
 
return 0;
-- 
2.19.0.rc0.windows.1



RE: [PATCH 1/1] vhost: fix a double fetch when dequeue offloading

2024-12-19 Thread Wangyunjian(wangyunjian,TongTu)
> -Original Message-
> From: Stephen Hemminger [mailto:step...@networkplumber.org]
> Sent: Friday, December 20, 2024 12:16 AM
> To: Wangyunjian(wangyunjian,TongTu) 
> Cc: dev@dpdk.org; maxime.coque...@redhat.com; chen...@nvidia.com;
> Lilijun (Jerry) ; xiawei (H) ;
> wangzengyuan ; sta...@dpdk.org
> Subject: Re: [PATCH 1/1] vhost: fix a double fetch when dequeue offloading
> 
> On Thu, 19 Dec 2024 14:38:28 +0800
> Yunjian Wang  wrote:
> 
> > -   hdr = (struct virtio_net_hdr 
> > *)((uintptr_t)buf_vec[0].buf_addr);
> > +   rte_memcpy((void *)(uintptr_t)&tmp_hdr,
> > +   (void *)(uintptr_t)buf_vec[0].buf_addr,
> > +   sizeof(struct virtio_net_hdr));
> > }
> 
> Do not introduce more rte_memcpy of a fixed size.
> You don't need that many casts!
> Why can you not use a structure assignment here.

 The virtio_hdr is a shared component, and other fields within it are read
multiple times. This can potentially result in a double fetch scenario.


[PATCH] net/virtio: check return value when calling pthread_mutex_init

2024-12-19 Thread Sunyang Wu
Add validation for the return value of the pthread_mutex_init.

Signed-off-by: Sunyang Wu 
---
 drivers/net/virtio/virtio_user/virtio_user_dev.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.c 
b/drivers/net/virtio/virtio_user/virtio_user_dev.c
index 2997d2bd26..00272cf802 100644
--- a/drivers/net/virtio/virtio_user/virtio_user_dev.c
+++ b/drivers/net/virtio/virtio_user/virtio_user_dev.c
@@ -735,8 +735,13 @@ virtio_user_dev_init(struct virtio_user_dev *dev, char 
*path, uint16_t queues,
 enum virtio_user_backend_type backend_type)
 {
uint64_t backend_features;
+   int ret;
 
-   pthread_mutex_init(&dev->mutex, NULL);
+   ret = pthread_mutex_init(&dev->mutex, NULL);
+   if (ret) {
+   PMD_INIT_LOG(ERR, "(%s) init dev mutex failed", dev->path);
+   return ret;
+   }
strlcpy(dev->path, path, PATH_MAX);
 
dev->started = 0;
-- 
2.19.0.rc0.windows.1



Re: [PATCH v4] net/zxdh: Provided zxdh basic init

2024-12-19 Thread Junlong Wang
In the latest v4 version I submitted on December 18th,
when I opened the -Wanalyzer-out-of-bounds and compiled it on the gcc14.2 
environment,
''
C compiler for the host machine: cc (gcc 14.2.1 "cc (GCC) 14.2.1 20241104 (Red 
Hat 14.2.1-6)")
Compiler for C supports arguments -Wanalyzer-out-of-bounds: YES
''
this issue did not occur;


And [PATCH v4] net/zxdh: Provided zxdh basic init, this patch was submitted 
three months ago on September 9th;
So, I am confused;
Is this issue also present in the latest v4 submission version and do we need 
to solved it



>Overall this looks good, one test checklist item for me was to build
>with Gcc 14 and analyzer option. This finds bugs but can generate false
>positives.  The output is quite verbose.

>It complains about this which may or may not be a real problem.
>If memcpy() is used instead of rte_memcpy() then the problem goes away.
>The issue is that inlined version rte_memcpy() will reference past the 
>arguments
>as an internal optimization for small values.

>[1564/3222] Compiling C object 
>drivers/libtmp_rte_net_zxdh.a.p/net_zxdh_zxdh_common.c.o
>In file included from ../lib/mempool/rte_mempool.h:50,
> from ../lib/mbuf/rte_mbuf.h:38,
> from ../lib/net/rte_ether.h:20,
> from ../lib/ethdev/rte_eth_ctrl.h:10,
> from ../lib/ethdev/rte_ethdev.h:1472,
> from ../lib/ethdev/ethdev_driver.h:21,
> from ../drivers/net/zxdh/zxdh_common.c:8:
>In function ‘rte_mov15_or_less’,
>inlined from ‘rte_memcpy_generic’ at 
> ../lib/eal/x86/include/rte_memcpy.h:395:10,
>inlined from ‘rte_memcpy’ at ../lib/eal/x86/include/rte_memcpy.h:757:10,
>inlined from ‘zxdh_get_res_info’ at 
> ../drivers/net/zxdh/zxdh_common.c:231:2:
>.../lib/eal/x86/include/rte_memcpy.h:82:55: warning: stack-based buffer 
>overflow [CWE-121] [-Wanalyzer-out-of-bounds]
>   82 | ((struct rte_uint64_alias *)dst)->val =
>  | ~~^
>   83 | ((const struct rte_uint64_alias *)src)->val;
>  | ~~~
>  ‘zxdh_panelid_get’: events 1-3
>|
>|../drivers/net/zxdh/zxdh_common.c:250:1:
>|  239 | uint8_t reps = 0;
>|  | 
>|  | |
>|  | (2) capacity: 1 byte
>|..
>|  250 | zxdh_panelid_get(struct rte_eth_dev *dev, uint8_t *panelid)
>|  | ^~~~
>|  | |
>|  | (1) entry to ‘zxdh_panelid_get’
>|..
>|  255 | int32_t ret = zxdh_get_res_panel_id(¶m, panelid);
>|  |   ~
>|  |   |
>|  |   (3) inlined call to ‘zxdh_get_res_panel_id’ 
> from ‘zxdh_panelid_get’
>|
>   |
>   |  242 | if (zxdh_get_res_info(in, ZXDH_TBL_FIELD_PNLID, 
> &reps, &reps_len) != ZXDH_BAR_MSG_OK)
>   |  | 
> ^
>   |  | |
>   |  | (4) calling ‘zxdh_get_res_info’ from 
> ‘zxdh_panelid_get’
>   |
> ‘zxdh_get_res_info’: events 5-12
>   |
>   |  186 | zxdh_get_res_info(struct zxdh_res_para *dev, uint8_t 
> field, uint8_t *res, uint16_t *len)
>   |  | ^
>   |  | |
>   |  | (5) entry to ‘zxdh_get_res_info’
>   |..
>   |  192 | if (!res || !dev)
>   |  |~
>   |  ||
>   |  |(6) following ‘false’ branch
>   |..
>   |  195 | struct zxdh_tbl_msg_header tbl_msg = {
>   |  |~~~
>   |  ||
>   |  |(7) ...to here
>   |..
>   |  217 | if (ret != ZXDH_BAR_MSG_OK) {
>   |  |~
>   |  ||
>   |  |(8) following ‘false’ branch (when ‘ret == 
> 0’)...
>   |..
>   |  225 | if (tbl_reps->check != ZXDH_TBL_MSG_PRO_SUCCESS) {
>   |  |
>   |  |||
>   |  ||(9) ...to here
>   |  |(10) following ‘false’ branch...
>   |..
>   |  230 | *len = tbl_reps->len;
>   |  |~
>   |  ||
>   |  |(11) ...to here
>   |  231 | rte_memcpy(res, (recv_buf + 
> ZXDH_REPS_HEADER_OFFSET +
>   |  | ~
>   |  | |
>   |  | (12) inlined cal

[PATCH v6] graph: mcore: optimize graph search

2024-12-19 Thread Huichao Cai
Hi David, does "has_data_member_inserted_between = 
{offset_of(total_sched_fail), offset_of(xstat_off)}" need to be on the same 
line?



Re: [PATCH] net/virtio: check return value when calling pthread_mutex_init

2024-12-19 Thread Stephen Hemminger
On Fri, 20 Dec 2024 09:53:26 +0800
Sunyang Wu  wrote:

> Add validation for the return value of the pthread_mutex_init.
> 
> Signed-off-by: Sunyang Wu 
> ---
>  drivers/net/virtio/virtio_user/virtio_user_dev.c | 7 ++-
>  1 file changed, 6 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.c 
> b/drivers/net/virtio/virtio_user/virtio_user_dev.c
> index 2997d2bd26..00272cf802 100644
> --- a/drivers/net/virtio/virtio_user/virtio_user_dev.c
> +++ b/drivers/net/virtio/virtio_user/virtio_user_dev.c
> @@ -735,8 +735,13 @@ virtio_user_dev_init(struct virtio_user_dev *dev, char 
> *path, uint16_t queues,
>enum virtio_user_backend_type backend_type)
>  {
>   uint64_t backend_features;
> + int ret;
>  
> - pthread_mutex_init(&dev->mutex, NULL);
> + ret = pthread_mutex_init(&dev->mutex, NULL);
> + if (ret) {
> + PMD_INIT_LOG(ERR, "(%s) init dev mutex failed", dev->path);
> + return ret;
> + }
>   strlcpy(dev->path, path, PATH_MAX);
>  
>   dev->started = 0;

This is wasted and dead code.
The man page for pthread_mutex_init explicitly says:

RETURN VALUE
   pthread_mutex_init  always  returns  0.