[RFC 1/3] eal: add enhanced lock annotations

2024-12-02 Thread David Marchand
Clang 3.6+ offers enhanced lock annotations when it comes to shared vs
exclusive capability/lock release.
Introduce macros for those new function attributes.

Signed-off-by: David Marchand 
---
 doc/api/doxy-api.conf.in   | 12 +++
 lib/eal/include/rte_lock_annotations.h | 48 ++
 2 files changed, 60 insertions(+)

diff --git a/doc/api/doxy-api.conf.in b/doc/api/doxy-api.conf.in
index d23352d300..ac490e7631 100644
--- a/doc/api/doxy-api.conf.in
+++ b/doc/api/doxy-api.conf.in
@@ -103,6 +103,18 @@ PREDEFINED  = __DOXYGEN__ \
   __rte_shared_trylock_function(x)= \
   __rte_assert_shared_lock(x)= \
   __rte_unlock_function(x)= \
+  __rte_capability(x)= \
+  __rte_requires_capability(x)= \
+  __rte_acquire_capability(x)= \
+  __rte_try_acquire_capability(x)= \
+  __rte_release_capability(x)= \
+  __rte_assert_capability(x)= \
+  __rte_requires_shared_capability(x)= \
+  __rte_acquire_shared_capability(x)= \
+  __rte_try_acquire_shared_capability(x)= \
+  __rte_release_shared_capability(x)= \
+  __rte_assert_shared_capability(x)= \
+  __rte_exclude_capability(x)= \
   __attribute__(x)=
 
 OPTIMIZE_OUTPUT_FOR_C   = YES
diff --git a/lib/eal/include/rte_lock_annotations.h 
b/lib/eal/include/rte_lock_annotations.h
index 2456a69352..4240458c53 100644
--- a/lib/eal/include/rte_lock_annotations.h
+++ b/lib/eal/include/rte_lock_annotations.h
@@ -43,6 +43,36 @@ extern "C" {
 #define __rte_locks_excluded(...) \
__attribute__((locks_excluded(__VA_ARGS__)))
 
+
+#define __rte_capability(...) \
+   __attribute__((capability(__VA_ARGS__)))
+
+#define __rte_requires_capability(...) \
+   __attribute__((requires_capability(__VA_ARGS__)))
+#define __rte_acquire_capability(...) \
+   __attribute__((acquire_capability(__VA_ARGS__)))
+#define __rte_try_acquire_capability(ret, ...) \
+   __attribute__((try_acquire_capability(ret, __VA_ARGS__)))
+#define __rte_release_capability(...) \
+   __attribute__((release_capability(__VA_ARGS__)))
+#define __rte_assert_capability(...) \
+   __attribute__((assert_capability(__VA_ARGS__)))
+
+#define __rte_requires_shared_capability(...) \
+   __attribute__((requires_shared_capability(__VA_ARGS__)))
+#define __rte_acquire_shared_capability(...) \
+   __attribute__((acquire_shared_capability(__VA_ARGS__)))
+#define __rte_try_acquire_shared_capability(ret, ...) \
+   __attribute__((try_acquire_shared_capability(ret, __VA_ARGS__)))
+#define __rte_release_shared_capability(...) \
+   __attribute__((release_shared_capability(__VA_ARGS__)))
+#define __rte_assert_shared_capability(...) \
+   __attribute__((assert_shared_capability(__VA_ARGS__)))
+
+#define __rte_exclude_capability(...) \
+   __attribute__((exclude_capability(__VA_ARGS__)))
+
+
 #define __rte_no_thread_safety_analysis \
__attribute__((no_thread_safety_analysis))
 
@@ -67,6 +97,24 @@ extern "C" {
 
 #define __rte_locks_excluded(...)
 
+
+#define __rte_capability(...)
+
+#define __rte_requires_capability(...)
+#define __rte_acquire_capability(...)
+#define __rte_try_acquire_capability(...)
+#define __rte_release_capability(...)
+#define __rte_assert_capability(...)
+
+#define __rte_requires_shared_capability(...)
+#define __rte_acquire_shared_capability(...)
+#define __rte_try_acquire_shared_capability(...)
+#define __rte_release_shared_capability(...)
+#define __rte_assert_shared_capability(...)
+
+#define __rte_exclude_capability(...)
+
+
 #define __rte_no_thread_safety_analysis
 
 #endif /* RTE_ANNOTATE_LOCKS */
-- 
2.47.0



Re: [PATCH v1 11/21] net/_common_intel: add post-Tx buffer free function

2024-12-02 Thread David Marchand
On Mon, Dec 2, 2024 at 12:27 PM Bruce Richardson
 wrote:
>
> The actions taken for post-Tx buffer free for the SSE and AVX drivers
> for i40e, iavf and ice drivers are all common, so centralize those in
> common/intel_eth driver.
>
> Signed-off-by: Bruce Richardson 
> ---
>  drivers/net/_common_intel/tx.h  | 71 
>  drivers/net/i40e/i40e_rxtx_vec_common.h | 72 -
>  drivers/net/iavf/iavf_rxtx_vec_common.h | 61 -
>  drivers/net/ice/ice_rxtx_vec_common.h   | 61 -
>  4 files changed, 98 insertions(+), 167 deletions(-)
>
> diff --git a/drivers/net/_common_intel/tx.h b/drivers/net/_common_intel/tx.h
> index c372d2838b..a930309c05 100644
> --- a/drivers/net/_common_intel/tx.h
> +++ b/drivers/net/_common_intel/tx.h
> @@ -7,6 +7,7 @@
>
>  #include 
>  #include 
> +#include 
>
>  /* forward declaration of the common intel (ci) queue structure */
>  struct ci_tx_queue;
> @@ -107,4 +108,74 @@ ci_tx_backlog_entry(struct ci_tx_entry *txep, struct 
> rte_mbuf **tx_pkts, uint16_
> txep[i].mbuf = tx_pkts[i];
>  }
>
> +#define IETH_VPMD_TX_MAX_FREE_BUF 64
> +
> +typedef int (*ci_desc_done_fn)(struct ci_tx_queue *txq, uint16_t idx);
> +
> +static __rte_always_inline int
> +ci_tx_free_bufs(struct ci_tx_queue *txq, ci_desc_done_fn desc_done)
> +{
> +   struct ci_tx_entry *txep;
> +   uint32_t n;
> +   uint32_t i;
> +   int nb_free = 0;
> +   struct rte_mbuf *m, *free[IETH_VPMD_TX_MAX_FREE_BUF];
> +
> +   /* check DD bits on threshold descriptor */
> +   if (!desc_done(txq, txq->tx_next_dd))
> +   return 0;
> +
> +   n = txq->tx_rs_thresh;
> +
> +/* first buffer to free from S/W ring is at index
> + * tx_next_dd - (tx_rs_thresh-1)
> + */
> +   txep = &txq->sw_ring[txq->tx_next_dd - (n - 1)];
> +
> +   if (txq->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) {
> +   for (i = 0; i < n; i++) {
> +   free[i] = txep[i].mbuf;
> +   /* no need to reset txep[i].mbuf in vector path */
> +   }
> +   rte_mempool_put_bulk(free[0]->pool, (void **)free, n);
> +   goto done;
> +   }
> +
> +   m = rte_pktmbuf_prefree_seg(txep[0].mbuf);
> +   if (likely(m != NULL)) {
> +   free[0] = m;
> +   nb_free = 1;
> +   for (i = 1; i < n; i++) {
> +   m = rte_pktmbuf_prefree_seg(txep[i].mbuf);
> +   if (likely(m != NULL)) {
> +   if (likely(m->pool == free[0]->pool)) {
> +   free[nb_free++] = m;
> +   } else {
> +   rte_mempool_put_bulk(free[0]->pool,
> +(void *)free,
> +nb_free);
> +   free[0] = m;
> +   nb_free = 1;
> +   }
> +   }
> +   }
> +   rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
> +   } else {
> +   for (i = 1; i < n; i++) {
> +   m = rte_pktmbuf_prefree_seg(txep[i].mbuf);
> +   if (m != NULL)
> +   rte_mempool_put(m->pool, m);
> +   }
> +   }

Is it possible to take an extra step and convert to rte_pktmbuf_free_bulk?


> +
> +done:
> +   /* buffers were freed, update counters */
> +   txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
> +   txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
> +   if (txq->tx_next_dd >= txq->nb_tx_desc)
> +   txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
> +
> +   return txq->tx_rs_thresh;
> +}
> +


-- 
David Marchand



[RFC 3/3] eal: enhance lock annotations for rwlock

2024-12-02 Thread David Marchand
Convert rwlock to the clang capability annotations.

Signed-off-by: David Marchand 
---
 lib/eal/common/eal_memalloc.h|   2 +-
 lib/eal/common/eal_private.h |   2 +-
 lib/eal/include/generic/rte_rwlock.h |  23 ++--
 lib/eal/include/rte_eal_memconfig.h  |  24 ++--
 lib/ethdev/ethdev_driver.c   |   4 +-
 lib/ethdev/ethdev_private.h  |   4 +-
 lib/ethdev/rte_ethdev.c  |   4 +-
 lib/hash/rte_cuckoo_hash.c   |   8 +-
 lib/vhost/iotlb.h|   8 +-
 lib/vhost/vhost.c|  10 +-
 lib/vhost/vhost.h|  24 ++--
 lib/vhost/vhost_crypto.c |  14 +--
 lib/vhost/virtio_net.c   | 170 +--
 lib/vhost/virtio_net_ctrl.c  |   2 +-
 14 files changed, 150 insertions(+), 149 deletions(-)

diff --git a/lib/eal/common/eal_memalloc.h b/lib/eal/common/eal_memalloc.h
index 286ffb7633..0c267066d9 100644
--- a/lib/eal/common/eal_memalloc.h
+++ b/lib/eal/common/eal_memalloc.h
@@ -92,7 +92,7 @@ eal_memalloc_get_seg_fd_offset(int list_idx, int seg_idx, 
size_t *offset);
 
 int
 eal_memalloc_init(void)
-   __rte_shared_locks_required(rte_mcfg_mem_get_lock());
+   __rte_requires_shared_capability(rte_mcfg_mem_get_lock());
 
 int
 eal_memalloc_cleanup(void);
diff --git a/lib/eal/common/eal_private.h b/lib/eal/common/eal_private.h
index bb315dab04..89bc05ecc5 100644
--- a/lib/eal/common/eal_private.h
+++ b/lib/eal/common/eal_private.h
@@ -124,7 +124,7 @@ int rte_eal_memseg_init(void);
  *   0 on success, negative on error
  */
 int rte_eal_memory_init(void)
-   __rte_shared_locks_required(rte_mcfg_mem_get_lock());
+   __rte_requires_shared_capability(rte_mcfg_mem_get_lock());
 
 /**
  * Configure timers
diff --git a/lib/eal/include/generic/rte_rwlock.h 
b/lib/eal/include/generic/rte_rwlock.h
index ac0474466a..197b245d11 100644
--- a/lib/eal/include/generic/rte_rwlock.h
+++ b/lib/eal/include/generic/rte_rwlock.h
@@ -23,6 +23,7 @@
  */
 
 #include 
+#include 
 
 #include 
 #include 
@@ -57,7 +58,7 @@ extern "C" {
/* Writer is waiting or has lock */
 #define RTE_RWLOCK_READ 0x4/* Reader increment */
 
-typedef struct __rte_lockable {
+typedef struct __rte_capability("rwlock") {
RTE_ATOMIC(int32_t) cnt;
 } rte_rwlock_t;
 
@@ -90,7 +91,7 @@ rte_rwlock_init(rte_rwlock_t *rwl)
  */
 static inline void
 rte_rwlock_read_lock(rte_rwlock_t *rwl)
-   __rte_shared_lock_function(rwl)
+   __rte_acquire_shared_capability(rwl)
__rte_no_thread_safety_analysis
 {
int32_t x;
@@ -127,7 +128,7 @@ rte_rwlock_read_lock(rte_rwlock_t *rwl)
  */
 static inline int
 rte_rwlock_read_trylock(rte_rwlock_t *rwl)
-   __rte_shared_trylock_function(0, rwl)
+   __rte_try_acquire_shared_capability(false, rwl)
__rte_no_thread_safety_analysis
 {
int32_t x;
@@ -160,7 +161,7 @@ rte_rwlock_read_trylock(rte_rwlock_t *rwl)
  */
 static inline void
 rte_rwlock_read_unlock(rte_rwlock_t *rwl)
-   __rte_unlock_function(rwl)
+   __rte_release_shared_capability(rwl)
__rte_no_thread_safety_analysis
 {
rte_atomic_fetch_sub_explicit(&rwl->cnt, RTE_RWLOCK_READ, 
rte_memory_order_release);
@@ -178,7 +179,7 @@ rte_rwlock_read_unlock(rte_rwlock_t *rwl)
  */
 static inline int
 rte_rwlock_write_trylock(rte_rwlock_t *rwl)
-   __rte_exclusive_trylock_function(0, rwl)
+   __rte_try_acquire_capability(false, rwl)
__rte_no_thread_safety_analysis
 {
int32_t x;
@@ -200,7 +201,7 @@ rte_rwlock_write_trylock(rte_rwlock_t *rwl)
  */
 static inline void
 rte_rwlock_write_lock(rte_rwlock_t *rwl)
-   __rte_exclusive_lock_function(rwl)
+   __rte_acquire_capability(rwl)
__rte_no_thread_safety_analysis
 {
int32_t x;
@@ -238,7 +239,7 @@ rte_rwlock_write_lock(rte_rwlock_t *rwl)
  */
 static inline void
 rte_rwlock_write_unlock(rte_rwlock_t *rwl)
-   __rte_unlock_function(rwl)
+   __rte_release_capability(rwl)
__rte_no_thread_safety_analysis
 {
rte_atomic_fetch_sub_explicit(&rwl->cnt, RTE_RWLOCK_WRITE, 
rte_memory_order_release);
@@ -276,7 +277,7 @@ rte_rwlock_write_is_locked(rte_rwlock_t *rwl)
  */
 static inline void
 rte_rwlock_read_lock_tm(rte_rwlock_t *rwl)
-   __rte_shared_lock_function(rwl);
+   __rte_acquire_shared_capability(rwl);
 
 /**
  * Commit hardware memory transaction or release the read lock if the lock is 
used as a fall-back
@@ -286,7 +287,7 @@ rte_rwlock_read_lock_tm(rte_rwlock_t *rwl)
  */
 static inline void
 rte_rwlock_read_unlock_tm(rte_rwlock_t *rwl)
-   __rte_unlock_function(rwl);
+   __rte_release_shared_capability(rwl);
 
 /**
  * Try to execute critical section in a hardware memory transaction, if it
@@ -303,7 +304,7 @@ rte_rwlock_read_unlock_tm(rte_rwlock_t *rwl)
  */
 static inline void
 rte_rwlock_write_lock_tm(rte_rwlock_t *rwl)
-   __rte_exclusive_lock_function(rwl);
+   __rte_acquire_capability(r

Re: [PATCH v1 09/21] net/ixgbe: use common Tx queue structure

2024-12-02 Thread Bruce Richardson
On Mon, Dec 02, 2024 at 01:51:35PM +, Medvedkin, Vladimir wrote:
>Hi Bruce,
> 
>On 02/12/2024 11:24, Bruce Richardson wrote:
> 
> Merge in additional fields used by the ixgbe driver and then convert it
> over to using the common Tx queue structure.
> 
> Signed-off-by: Bruce Richardson [1]
> ---
>  drivers/net/_common_intel/tx.h| 14 +++-
>  drivers/net/ixgbe/ixgbe_ethdev.c  |  4 +-
>  .../ixgbe/ixgbe_recycle_mbufs_vec_common.c|  2 +-
>  drivers/net/ixgbe/ixgbe_rxtx.c| 64 +--
>  drivers/net/ixgbe/ixgbe_rxtx.h| 56 ++--
>  drivers/net/ixgbe/ixgbe_rxtx_vec_common.h | 26 
>  drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c   | 14 ++--
>  drivers/net/ixgbe/ixgbe_rxtx_vec_sse.c| 14 ++--
>  8 files changed, 80 insertions(+), 114 deletions(-)
> 
> diff --git a/drivers/net/_common_intel/tx.h b/drivers/net/_common_intel/tx.h
> index c4a1a0c816..51ae3b051d 100644
> --- a/drivers/net/_common_intel/tx.h
> +++ b/drivers/net/_common_intel/tx.h
> @@ -34,9 +34,13 @@ struct ci_tx_queue {
> volatile struct i40e_tx_desc *i40e_tx_ring;
> volatile struct iavf_tx_desc *iavf_tx_ring;
> volatile struct ice_tx_desc *ice_tx_ring;
> +   volatile union ixgbe_adv_tx_desc *ixgbe_tx_ring;
> };
> volatile uint8_t *qtx_tail;   /* register address of tail 
> */
> -   struct ci_tx_entry *sw_ring; /* virtual address of SW ring */
> +   union {
> +   struct ci_tx_entry *sw_ring; /* virtual address of SW ring */
> +   struct ci_tx_entry_vec *sw_ring_vec;
> +   };
> rte_iova_t tx_ring_dma;/* TX ring DMA address */
> uint16_t nb_tx_desc;   /* number of TX descriptors */
> uint16_t tx_tail; /* current value of tail register */
> @@ -87,6 +91,14 @@ struct ci_tx_queue {
> uint8_t tc;
> bool use_ctx;  /* with ctx info, each pkt needs two 
> desc
> riptors */
> };
> +   struct { /* ixgbe specific values */
> +   const struct ixgbe_txq_ops *ops;
> +   struct ixgbe_advctx_info *ctx_cache;
> 
>'struct ixgbe_advctx_info ctx_cache[IXGBE_CTX_NUM];' takes only 80
>bytes of memory, so using a pointer saves 72 bytes. Since the final
>version of the 'struct ci_tx_queue' without driver specific fields
>takes 96 bytes, embedding 'ixgbe_advctx_info ctx_cache[2]' array will
>take one more cache line, which is hot a huge deal in my opinion.
> 

Maybe not, though another way to look at it is that it is that those two
context entries are nearly as big as the rest of the struct!

>Or consider another (possibly better) approach, where for non IXGBE
>'struct ci_tx_queue' will remain the same size, but only for IXGBE an
>extra 80 bytes will be alllocated:
> 
>struct { /* ixgbe specific values */
> 
>const struct ixgbe_txq_ops *ops;
> 
>uint32_t ctx_curr;
> 
>uint8_t pthresh;   /**< Prefetch threshold
>register. */
> 
>uint8_t hthresh;   /**< Host threshold
>register. */
> 
>uint8_t wthresh;   /**< Write-back threshold
>reg. */
> 
>uint8_t using_ipsec;  /**< indicates that IPsec
>TX feature is in use */
>struct ixgbe_advctx_info ctx_cache[0];
> 
>};
> 
> +   uint32_t ctx_curr;
> +#ifdef RTE_LIB_SECURITY
> +   uint8_t using_ipsec;  /**< indicates that IPsec TX 
> featu
> re is in use */
> +#endif
> +   };
> };
>  };
> 

I prefer solutions where the extra 80 bytes are only allocated for the one
driver that needs them. I'll see if this alternative can work ok for us.

/Bruce



Re: [PATCH v1 11/21] net/_common_intel: add post-Tx buffer free function

2024-12-02 Thread Bruce Richardson
On Mon, Dec 02, 2024 at 01:59:37PM +0100, David Marchand wrote:
> On Mon, Dec 2, 2024 at 12:27 PM Bruce Richardson
>  wrote:
> >
> > The actions taken for post-Tx buffer free for the SSE and AVX drivers
> > for i40e, iavf and ice drivers are all common, so centralize those in
> > common/intel_eth driver.
> >
> > Signed-off-by: Bruce Richardson 
> > ---
> >  drivers/net/_common_intel/tx.h  | 71 
> >  drivers/net/i40e/i40e_rxtx_vec_common.h | 72 -
> >  drivers/net/iavf/iavf_rxtx_vec_common.h | 61 -
> >  drivers/net/ice/ice_rxtx_vec_common.h   | 61 -
> >  4 files changed, 98 insertions(+), 167 deletions(-)
> >
> > diff --git a/drivers/net/_common_intel/tx.h b/drivers/net/_common_intel/tx.h
> > index c372d2838b..a930309c05 100644
> > --- a/drivers/net/_common_intel/tx.h
> > +++ b/drivers/net/_common_intel/tx.h
> > @@ -7,6 +7,7 @@
> >
> >  #include 
> >  #include 
> > +#include 
> >
> >  /* forward declaration of the common intel (ci) queue structure */
> >  struct ci_tx_queue;
> > @@ -107,4 +108,74 @@ ci_tx_backlog_entry(struct ci_tx_entry *txep, struct 
> > rte_mbuf **tx_pkts, uint16_
> > txep[i].mbuf = tx_pkts[i];
> >  }
> >
> > +#define IETH_VPMD_TX_MAX_FREE_BUF 64
> > +
> > +typedef int (*ci_desc_done_fn)(struct ci_tx_queue *txq, uint16_t idx);
> > +
> > +static __rte_always_inline int
> > +ci_tx_free_bufs(struct ci_tx_queue *txq, ci_desc_done_fn desc_done)
> > +{
> > +   struct ci_tx_entry *txep;
> > +   uint32_t n;
> > +   uint32_t i;
> > +   int nb_free = 0;
> > +   struct rte_mbuf *m, *free[IETH_VPMD_TX_MAX_FREE_BUF];
> > +
> > +   /* check DD bits on threshold descriptor */
> > +   if (!desc_done(txq, txq->tx_next_dd))
> > +   return 0;
> > +
> > +   n = txq->tx_rs_thresh;
> > +
> > +/* first buffer to free from S/W ring is at index
> > + * tx_next_dd - (tx_rs_thresh-1)
> > + */
> > +   txep = &txq->sw_ring[txq->tx_next_dd - (n - 1)];
> > +
> > +   if (txq->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) {
> > +   for (i = 0; i < n; i++) {
> > +   free[i] = txep[i].mbuf;
> > +   /* no need to reset txep[i].mbuf in vector path */
> > +   }
> > +   rte_mempool_put_bulk(free[0]->pool, (void **)free, n);
> > +   goto done;
> > +   }
> > +
> > +   m = rte_pktmbuf_prefree_seg(txep[0].mbuf);
> > +   if (likely(m != NULL)) {
> > +   free[0] = m;
> > +   nb_free = 1;
> > +   for (i = 1; i < n; i++) {
> > +   m = rte_pktmbuf_prefree_seg(txep[i].mbuf);
> > +   if (likely(m != NULL)) {
> > +   if (likely(m->pool == free[0]->pool)) {
> > +   free[nb_free++] = m;
> > +   } else {
> > +   rte_mempool_put_bulk(free[0]->pool,
> > +(void *)free,
> > +nb_free);
> > +   free[0] = m;
> > +   nb_free = 1;
> > +   }
> > +   }
> > +   }
> > +   rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
> > +   } else {
> > +   for (i = 1; i < n; i++) {
> > +   m = rte_pktmbuf_prefree_seg(txep[i].mbuf);
> > +   if (m != NULL)
> > +   rte_mempool_put(m->pool, m);
> > +   }
> > +   }
> 
> Is it possible to take an extra step and convert to rte_pktmbuf_free_bulk?
> 
Will investigate


Re: [PATCH v1 09/21] net/ixgbe: use common Tx queue structure

2024-12-02 Thread Medvedkin, Vladimir

Hi Bruce,

On 02/12/2024 11:24, Bruce Richardson wrote:

Merge in additional fields used by the ixgbe driver and then convert it
over to using the common Tx queue structure.

Signed-off-by: Bruce Richardson
---
  drivers/net/_common_intel/tx.h| 14 +++-
  drivers/net/ixgbe/ixgbe_ethdev.c  |  4 +-
  .../ixgbe/ixgbe_recycle_mbufs_vec_common.c|  2 +-
  drivers/net/ixgbe/ixgbe_rxtx.c| 64 +--
  drivers/net/ixgbe/ixgbe_rxtx.h| 56 ++--
  drivers/net/ixgbe/ixgbe_rxtx_vec_common.h | 26 
  drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c   | 14 ++--
  drivers/net/ixgbe/ixgbe_rxtx_vec_sse.c| 14 ++--
  8 files changed, 80 insertions(+), 114 deletions(-)

diff --git a/drivers/net/_common_intel/tx.h b/drivers/net/_common_intel/tx.h
index c4a1a0c816..51ae3b051d 100644
--- a/drivers/net/_common_intel/tx.h
+++ b/drivers/net/_common_intel/tx.h
@@ -34,9 +34,13 @@ struct ci_tx_queue {
volatile struct i40e_tx_desc *i40e_tx_ring;
volatile struct iavf_tx_desc *iavf_tx_ring;
volatile struct ice_tx_desc *ice_tx_ring;
+   volatile union ixgbe_adv_tx_desc *ixgbe_tx_ring;
};
volatile uint8_t *qtx_tail;   /* register address of tail */
-   struct ci_tx_entry *sw_ring; /* virtual address of SW ring */
+   union {
+   struct ci_tx_entry *sw_ring; /* virtual address of SW ring */
+   struct ci_tx_entry_vec *sw_ring_vec;
+   };
rte_iova_t tx_ring_dma;/* TX ring DMA address */
uint16_t nb_tx_desc;   /* number of TX descriptors */
uint16_t tx_tail; /* current value of tail register */
@@ -87,6 +91,14 @@ struct ci_tx_queue {
uint8_t tc;
bool use_ctx;  /* with ctx info, each pkt needs two 
descriptors */
};
+   struct { /* ixgbe specific values */
+   const struct ixgbe_txq_ops *ops;
+   struct ixgbe_advctx_info *ctx_cache;
'struct ixgbe_advctx_info ctx_cache[IXGBE_CTX_NUM];' takes only 80 bytes 
of memory, so using a pointer saves 72 bytes. Since the final version of 
the 'struct ci_tx_queue' without driver specific fields takes 96 bytes, 
embedding 'ixgbe_advctx_info ctx_cache[2]' array will take one more 
cache line, which is hot a huge deal in my opinion.


Or consider another (possibly better) approach, where for non IXGBE 
'struct ci_tx_queue' will remain the same size, but only for IXGBE an 
extra 80 bytes will be alllocated:


struct {/* ixgbe specific values */

                        const struct ixgbe_txq_ops *ops;

                        uint32_t ctx_curr;

                        uint8_t pthresh;  /**< Prefetch threshold 
register. */


                        uint8_t hthresh;  /**< Host threshold register. */

                        uint8_t wthresh;  /**< Write-back threshold reg. */

                        uint8_t using_ipsec; /**< indicates that IPsec 
TX feature is in use */

struct ixgbe_advctx_info ctx_cache[0];

                };


+   uint32_t ctx_curr;
+#ifdef RTE_LIB_SECURITY
+   uint8_t using_ipsec;  /**< indicates that IPsec TX 
feature is in use */
+#endif
+   };
};
  };
  



--
Regards,
Vladimir


[RFC 2/3] eal: enhance lock annotations for spinlock and seqlock

2024-12-02 Thread David Marchand
Convert spinlock (and as a consequence seqlock) to the clang
capability annotations.

Signed-off-by: David Marchand 
---
 drivers/bus/dpaa/base/qbman/qman.c |  4 ++--
 drivers/net/fm10k/fm10k_ethdev.c   |  4 ++--
 lib/eal/include/generic/rte_spinlock.h | 14 +++---
 lib/eal/include/rte_eal_memconfig.h|  4 ++--
 lib/eal/include/rte_seqlock.h  |  4 ++--
 lib/graph/graph_private.h  |  4 ++--
 lib/vhost/vdpa.c   |  2 +-
 7 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/drivers/bus/dpaa/base/qbman/qman.c 
b/drivers/bus/dpaa/base/qbman/qman.c
index f92b25343a..11fabcaff5 100644
--- a/drivers/bus/dpaa/base/qbman/qman.c
+++ b/drivers/bus/dpaa/base/qbman/qman.c
@@ -45,7 +45,7 @@ static inline int fq_isset(struct qman_fq *fq, u32 mask)
 }
 
 static inline void fq_lock(struct qman_fq *fq)
-   __rte_exclusive_lock_function(&fq->fqlock)
+   __rte_acquire_capability(&fq->fqlock)
__rte_no_thread_safety_analysis
 {
if (fq_isset(fq, QMAN_FQ_FLAG_LOCKED))
@@ -53,7 +53,7 @@ static inline void fq_lock(struct qman_fq *fq)
 }
 
 static inline void fq_unlock(struct qman_fq *fq)
-__rte_unlock_function(&fq->fqlock)
+   __rte_release_capability(&fq->fqlock)
__rte_no_thread_safety_analysis
 {
if (fq_isset(fq, QMAN_FQ_FLAG_LOCKED))
diff --git a/drivers/net/fm10k/fm10k_ethdev.c b/drivers/net/fm10k/fm10k_ethdev.c
index 7b490bea17..747042d621 100644
--- a/drivers/net/fm10k/fm10k_ethdev.c
+++ b/drivers/net/fm10k/fm10k_ethdev.c
@@ -116,7 +116,7 @@ fm10k_mbx_initlock(struct fm10k_hw *hw)
 
 static void
 fm10k_mbx_lock(struct fm10k_hw *hw)
-   __rte_exclusive_lock_function(FM10K_DEV_PRIVATE_TO_MBXLOCK(hw->back))
+   __rte_acquire_capability(FM10K_DEV_PRIVATE_TO_MBXLOCK(hw->back))
 {
while (!rte_spinlock_trylock(FM10K_DEV_PRIVATE_TO_MBXLOCK(hw->back)))
rte_delay_us(FM10K_MBXLOCK_DELAY_US);
@@ -124,7 +124,7 @@ fm10k_mbx_lock(struct fm10k_hw *hw)
 
 static void
 fm10k_mbx_unlock(struct fm10k_hw *hw)
-   __rte_unlock_function(FM10K_DEV_PRIVATE_TO_MBXLOCK(hw->back))
+   __rte_release_capability(FM10K_DEV_PRIVATE_TO_MBXLOCK(hw->back))
 {
rte_spinlock_unlock(FM10K_DEV_PRIVATE_TO_MBXLOCK(hw->back));
 }
diff --git a/lib/eal/include/generic/rte_spinlock.h 
b/lib/eal/include/generic/rte_spinlock.h
index c2980601b2..c907d4e45c 100644
--- a/lib/eal/include/generic/rte_spinlock.h
+++ b/lib/eal/include/generic/rte_spinlock.h
@@ -32,7 +32,7 @@ extern "C" {
 /**
  * The rte_spinlock_t type.
  */
-typedef struct __rte_lockable {
+typedef struct __rte_capability("spinlock") {
volatile RTE_ATOMIC(int) locked; /**< lock status 0 = unlocked, 1 = 
locked */
 } rte_spinlock_t;
 
@@ -61,7 +61,7 @@ rte_spinlock_init(rte_spinlock_t *sl)
  */
 static inline void
 rte_spinlock_lock(rte_spinlock_t *sl)
-   __rte_exclusive_lock_function(sl);
+   __rte_acquire_capability(sl);
 
 #ifdef RTE_FORCE_INTRINSICS
 static inline void
@@ -87,7 +87,7 @@ rte_spinlock_lock(rte_spinlock_t *sl)
  */
 static inline void
 rte_spinlock_unlock(rte_spinlock_t *sl)
-   __rte_unlock_function(sl);
+   __rte_release_capability(sl);
 
 #ifdef RTE_FORCE_INTRINSICS
 static inline void
@@ -109,7 +109,7 @@ rte_spinlock_unlock(rte_spinlock_t *sl)
 __rte_warn_unused_result
 static inline int
 rte_spinlock_trylock(rte_spinlock_t *sl)
-   __rte_exclusive_trylock_function(1, sl);
+   __rte_try_acquire_capability(true, sl);
 
 #ifdef RTE_FORCE_INTRINSICS
 static inline int
@@ -158,7 +158,7 @@ static inline int rte_tm_supported(void);
  */
 static inline void
 rte_spinlock_lock_tm(rte_spinlock_t *sl)
-   __rte_exclusive_lock_function(sl);
+   __rte_acquire_capability(sl);
 
 /**
  * Commit hardware memory transaction or release the spinlock if
@@ -169,7 +169,7 @@ rte_spinlock_lock_tm(rte_spinlock_t *sl)
  */
 static inline void
 rte_spinlock_unlock_tm(rte_spinlock_t *sl)
-   __rte_unlock_function(sl);
+   __rte_release_capability(sl);
 
 /**
  * Try to execute critical section in a hardware memory transaction,
@@ -190,7 +190,7 @@ rte_spinlock_unlock_tm(rte_spinlock_t *sl)
 __rte_warn_unused_result
 static inline int
 rte_spinlock_trylock_tm(rte_spinlock_t *sl)
-   __rte_exclusive_trylock_function(1, sl);
+   __rte_try_acquire_capability(true, sl);
 
 /**
  * The rte_spinlock_recursive_t type.
diff --git a/lib/eal/include/rte_eal_memconfig.h 
b/lib/eal/include/rte_eal_memconfig.h
index 0b1d0d4ff0..55d78de334 100644
--- a/lib/eal/include/rte_eal_memconfig.h
+++ b/lib/eal/include/rte_eal_memconfig.h
@@ -132,14 +132,14 @@ rte_mcfg_mempool_write_unlock(void)
  */
 void
 rte_mcfg_timer_lock(void)
-   __rte_exclusive_lock_function(rte_mcfg_timer_get_lock());
+   __rte_acquire_capability(rte_mcfg_timer_get_lock());
 
 /**
  * Unlock the internal EAL Timer Library lock for exclusive access.
  */
 void
 rte_mcfg_timer_unlock(void)
-   __rte_unlock_function(rte_mcfg_timer_get_lo

[RFC 0/3] Improve lock annotations

2024-12-02 Thread David Marchand
A recent bug (see 22aa9a9c7099 ("vhost: fix deadlock in Rx async path"))
made more visible a gap in the clang thread safety annotations that
DPDK uses: no distinction is made between releasing a read lock and
releasing a write lock.

Clang 3.6 and later offers improved thread safety checks.

Marking objects as "lockable" has evolved into flagging some named
"capability". clang reports the capability name when an error is
reported (making this report a bit easier to understand).

For example, a spinlock is now flagged as:
typedef struct __rte_capability("spinlock") {
  volatile RTE_ATOMIC(int) locked;
} rte_spinlock_t;


For "exclusive" locking (spinlocks / write locks), the conversion is:
- exclusive_lock_function -> acquire_capability
- exclusive_trylock_function -> try_acquire_capability
- unlock_function -> release_capability
...

For "shared" locking (read locks):
- shared_lock_function -> acquire_shared_capability
- shared_trylock_function -> try_acquire_shared_capability
- unlock_function -> release_shared_capability
...


This RFC proposes to use those annotations (sticking to the
convention of simply prefixing the compiler attributes with __rte_).
The existing "old" annotations macros are left in place in case users
started to rely on them.

Note: DPDK requirements state that clang version must be >= 3.6
(following use of C11 standard).
No check about availability of the new attributes has been added to
this series, let's see what the CI thinks of this assumption...

Comments welcome.


-- 
David Marchand

David Marchand (3):
  eal: add enhanced lock annotations
  eal: enhance lock annotations for spinlock and seqlock
  eal: enhance lock annotations for rwlock

 doc/api/doxy-api.conf.in   |  12 ++
 drivers/bus/dpaa/base/qbman/qman.c |   4 +-
 drivers/net/fm10k/fm10k_ethdev.c   |   4 +-
 lib/eal/common/eal_memalloc.h  |   2 +-
 lib/eal/common/eal_private.h   |   2 +-
 lib/eal/include/generic/rte_rwlock.h   |  23 ++--
 lib/eal/include/generic/rte_spinlock.h |  14 +-
 lib/eal/include/rte_eal_memconfig.h|  28 ++--
 lib/eal/include/rte_lock_annotations.h |  48 +++
 lib/eal/include/rte_seqlock.h  |   4 +-
 lib/ethdev/ethdev_driver.c |   4 +-
 lib/ethdev/ethdev_private.h|   4 +-
 lib/ethdev/rte_ethdev.c|   4 +-
 lib/graph/graph_private.h  |   4 +-
 lib/hash/rte_cuckoo_hash.c |   8 +-
 lib/vhost/iotlb.h  |   8 +-
 lib/vhost/vdpa.c   |   2 +-
 lib/vhost/vhost.c  |  10 +-
 lib/vhost/vhost.h  |  24 ++--
 lib/vhost/vhost_crypto.c   |  14 +-
 lib/vhost/virtio_net.c | 170 -
 lib/vhost/virtio_net_ctrl.c|   2 +-
 22 files changed, 228 insertions(+), 167 deletions(-)

-- 
2.47.0



[PATCH v2] common/cnxk: fix atomic load in batch ops

2024-12-02 Thread Nawal Kishor
In roc batch alloc wait code, __ATOMIC_RELAXED is changed to
__ATOMIC_ACQUIRE in order to avoid potential out of order loads.

Fixes: 50d08d3934ec ("common/cnxk: fix batch alloc completion poll logic")

Signed-off-by: Nawal Kishor 
---
v2: Fixed review comments

 drivers/common/cnxk/roc_npa.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/common/cnxk/roc_npa.h b/drivers/common/cnxk/roc_npa.h
index f7cb4460e7..8525038810 100644
--- a/drivers/common/cnxk/roc_npa.h
+++ b/drivers/common/cnxk/roc_npa.h
@@ -255,7 +255,7 @@ roc_npa_batch_alloc_wait(uint64_t *cache_line, unsigned int 
wait_us)
/* Batch alloc status code is updated in bits [5:6] of the first word
 * of the 128 byte cache line.
 */
-   while (((__atomic_load_n(cache_line, __ATOMIC_RELAXED) >> 5) & 0x3) ==
+   while (((__atomic_load_n(cache_line, __ATOMIC_ACQUIRE) >> 5) & 0x3) ==
   ALLOC_CCODE_INVAL)
if (wait_us && (plt_tsc_cycles() - start) >= ticks)
break;
-- 
2.34.1



Re: [PATCH v4] net/hns3: fix Rx packet without CRC data

2024-12-02 Thread Stephen Hemminger
On Wed, 27 Nov 2024 18:08:07 +0800
Jie Hai  wrote:

> From: Dengdui Huang 
> 
> When KEEP_CRC offload is enabled, the CRC data is still stripped
> in following cases:
> 1. For HIP08 network engine, the packet type is TCP and the length
>is less than or equal to 60B.
> 2. For HIP09 network engine, the packet type is IP and the length
>is less than or equal to 60B.
> 
> So driver has to recaculate packet CRC for this rare scenarios.
> 
> In addition, to avoid impacting performance, KEEP_CRC is not
> supported when NEON or SVE algorithm is used.
> 
> Fixes: 8973d7c4ca12 ("net/hns3: support keeping CRC")
> Cc: sta...@dpdk.org
> 
> Signed-off-by: Dengdui Huang 
> Acked-by: Huisong Li 
> Acked-by: Jie Hai 
> ---

There is another issue around CRC in this driver.
If keep crc is enabled and the packet is received into a multisegment mbuf
and the CRC bytes are the only data left in the last segment
then the driver will free the segment and adjust the lengths.
That would make it impossible for an application that was looking
for the CRC.

See:

static inline void
recalculate_data_len(struct rte_mbuf *first_seg, struct rte_mbuf *last_seg,
struct rte_mbuf *rxm, struct hns3_rx_queue *rxq,
uint16_t data_len)
{
uint8_t crc_len = rxq->crc_len;

if (data_len <= crc_len) {
rte_pktmbuf_free_seg(rxm);
first_seg->nb_segs--;
last_seg->data_len = (uint16_t)(last_seg->data_len -
(crc_len - data_len));
last_seg->next = NULL;
} else
rxm->data_len = (uint16_t)(data_len - crc_len);
}


[PATCH] net/ixgbe: fix missing E610 support in flow engine

2024-12-02 Thread Vladimir Medvedkin
Currently flow engine is missing a new mac type E610 in the check for
fdir support.
This patch adds this mac type to the check.

Fixes: 316637762a5f ("net/ixgbe/base: enable E610 device")
Cc: sta...@dpdk.org

Signed-off-by: Vladimir Medvedkin 
---
 drivers/net/ixgbe/ixgbe_fdir.c | 3 ++-
 drivers/net/ixgbe/ixgbe_flow.c | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe_fdir.c b/drivers/net/ixgbe/ixgbe_fdir.c
index 06d6e2126d..b6351bc2cf 100644
--- a/drivers/net/ixgbe/ixgbe_fdir.c
+++ b/drivers/net/ixgbe/ixgbe_fdir.c
@@ -647,7 +647,8 @@ ixgbe_fdir_configure(struct rte_eth_dev *dev)
hw->mac.type != ixgbe_mac_X540 &&
hw->mac.type != ixgbe_mac_X550 &&
hw->mac.type != ixgbe_mac_X550EM_x &&
-   hw->mac.type != ixgbe_mac_X550EM_a)
+   hw->mac.type != ixgbe_mac_X550EM_a &&
+   hw->mac.type != ixgbe_mac_E610)
return -ENOSYS;
 
/* x550 supports mac-vlan and tunnel mode but other NICs not */
diff --git a/drivers/net/ixgbe/ixgbe_flow.c b/drivers/net/ixgbe/ixgbe_flow.c
index 1b35ed5faa..33da2f47ec 100644
--- a/drivers/net/ixgbe/ixgbe_flow.c
+++ b/drivers/net/ixgbe/ixgbe_flow.c
@@ -2764,7 +2764,8 @@ ixgbe_parse_fdir_filter(struct rte_eth_dev *dev,
hw->mac.type != ixgbe_mac_X540 &&
hw->mac.type != ixgbe_mac_X550 &&
hw->mac.type != ixgbe_mac_X550EM_x &&
-   hw->mac.type != ixgbe_mac_X550EM_a)
+   hw->mac.type != ixgbe_mac_X550EM_a &&
+   hw->mac.type != ixgbe_mac_E610)
return -ENOTSUP;
 
ret = ixgbe_parse_fdir_filter_normal(dev, attr, pattern,
-- 
2.43.0



lib/eal/arm/include/rte_vect.h fails to compile with clang14 for 32bit ARM

2024-12-02 Thread Roger Melton (rmelton)
Hey folks,

We are building DPDK with clang14 for a 32bit armv8-a based CPU and ran into a 
compile error with the following from lib/eal/arm/include/rte_vect.h:


#if (defined(RTE_ARCH_ARM) && defined(RTE_ARCH_32)) || \
(defined(RTE_ARCH_ARM64) && 
RTE_CC_IS_GNU && 
(GCC_VERSION < 
7))
/* NEON intrinsic vcopyq_laneq_u32() is not supported in ARMv7-A(AArch32)
 * On AArch64, this intrinsic is supported since GCC version 7.
 */
static inline uint32x4_t
vcopyq_laneq_u32(uint32x4_t
 a, const int lane_a,
 uint32x4_t b, const int lane_b)
{
return vsetq_lane_u32(vgetq_lane_u32(b, lane_b), a, lane_a);
}
#endif

clang14 compile fails as follows:

In file included from 
../../../../../../cisco-dpdk-upstream-arm-clang-fixes.git/lib/eal/common/eal_common_options.c:36:
../../../../../../cisco-dpdk-upstream-arm-clang-fixes.git/lib/eal/arm/include/rte_vect.h:80:24:
 error: argument to '__builtin_neon_vgetq_lane_i32' must be a constant integer
return vsetq_lane_u32(vgetq_lane_u32(b, lane_b), a, lane_a);
^ ~~
/auto/binos-tools/llvm14/llvm-14.0-p24/lib/clang/14.0.5/include/arm_neon.h:7697:22:
 note: expanded from macro 'vgetq_lane_u32'
__ret = (uint32_t) __builtin_neon_vgetq_lane_i32((int32x4_t)__s0, __p1); \
^ 
/auto/binos-tools/llvm14/llvm-14.0-p24/lib/clang/14.0.5/include/arm_neon.h:24148:19:
 note: expanded from macro 'vsetq_lane_u32'
uint32_t __s0 = __p0; \
^~~~
In file included from 
../../../../../../cisco-dpdk-upstream-arm-clang-fixes.git/lib/eal/common/eal_common_options.c:36:
../../../../../../cisco-dpdk-upstream-arm-clang-fixes.git/lib/eal/arm/include/rte_vect.h:80:9:
 error: argument to '__builtin_neon_vsetq_lane_i32' must be a constant integer
return vsetq_lane_u32(vgetq_lane_u32(b, lane_b), a, lane_a);
^ ~~
/auto/binos-tools/llvm14/llvm-14.0-p24/lib/clang/14.0.5/include/arm_neon.h:24150:24:
 note: expanded from macro 'vsetq_lane_u32'
__ret = (uint32x4_t) __builtin_neon_vsetq_lane_i32(__s0, (int32x4_t)__s1, 
__p2); \
^ 
2 errors generated.


clang14 does appear to support the vcopyq_laneq_u32() intrinsic, s0 we want to 
skip the conditional implementation.

Two approaches I have tested to resolve the error are:

1) skip if building with clang:

#if !defined(__clang__) && ((defined(RTE_ARCH_ARM) && defined(RTE_ARCH_32)) || \
72 (defined(RTE_ARCH_ARM64) && RTE_CC_IS_GNU && (GCC_VERSION < 7)))

2) skip if not building for ARMv7:

#if (defined(RTE_ARCH_ARMv7) && defined(RTE_ARCH_32)) || \
(defined(RTE_ARCH_ARM64) && RTE_CC_IS_GNU && (GCC_VERSION < 7))

Both address our immediate problem, but may not be a appropriate for all cases.

Can anyone suggest the proper way to address this?  I'll be submitting an patch 
once I have a solution that is acceptable to the community.

Regards,
Roger







Re: [PATCH v1 09/21] net/ixgbe: use common Tx queue structure

2024-12-02 Thread Bruce Richardson
On Mon, Dec 02, 2024 at 02:09:35PM +, Bruce Richardson wrote:
> On Mon, Dec 02, 2024 at 01:51:35PM +, Medvedkin, Vladimir wrote:
> >Hi Bruce,
> > 
> >On 02/12/2024 11:24, Bruce Richardson wrote:
> > 
> > Merge in additional fields used by the ixgbe driver and then convert it
> > over to using the common Tx queue structure.
> > 
> > Signed-off-by: Bruce Richardson [1]
> > ---
> >  drivers/net/_common_intel/tx.h| 14 +++-
> >  drivers/net/ixgbe/ixgbe_ethdev.c  |  4 +-
> >  .../ixgbe/ixgbe_recycle_mbufs_vec_common.c|  2 +-
> >  drivers/net/ixgbe/ixgbe_rxtx.c| 64 +--
> >  drivers/net/ixgbe/ixgbe_rxtx.h| 56 ++--
> >  drivers/net/ixgbe/ixgbe_rxtx_vec_common.h | 26 
> >  drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c   | 14 ++--
> >  drivers/net/ixgbe/ixgbe_rxtx_vec_sse.c| 14 ++--
> >  8 files changed, 80 insertions(+), 114 deletions(-)
> > 
> > diff --git a/drivers/net/_common_intel/tx.h b/drivers/net/_common_intel/tx.h
> > index c4a1a0c816..51ae3b051d 100644
> > --- a/drivers/net/_common_intel/tx.h
> > +++ b/drivers/net/_common_intel/tx.h
> > @@ -34,9 +34,13 @@ struct ci_tx_queue {
> > volatile struct i40e_tx_desc *i40e_tx_ring;
> > volatile struct iavf_tx_desc *iavf_tx_ring;
> > volatile struct ice_tx_desc *ice_tx_ring;
> > +   volatile union ixgbe_adv_tx_desc *ixgbe_tx_ring;
> > };
> > volatile uint8_t *qtx_tail;   /* register address of 
> > tail */
> > -   struct ci_tx_entry *sw_ring; /* virtual address of SW ring */
> > +   union {
> > +   struct ci_tx_entry *sw_ring; /* virtual address of SW ring 
> > */
> > +   struct ci_tx_entry_vec *sw_ring_vec;
> > +   };
> > rte_iova_t tx_ring_dma;/* TX ring DMA address */
> > uint16_t nb_tx_desc;   /* number of TX descriptors */
> > uint16_t tx_tail; /* current value of tail register */
> > @@ -87,6 +91,14 @@ struct ci_tx_queue {
> > uint8_t tc;
> > bool use_ctx;  /* with ctx info, each pkt needs two 
> > desc
> > riptors */
> > };
> > +   struct { /* ixgbe specific values */
> > +   const struct ixgbe_txq_ops *ops;
> > +   struct ixgbe_advctx_info *ctx_cache;
> > 
> >'struct ixgbe_advctx_info ctx_cache[IXGBE_CTX_NUM];' takes only 80
> >bytes of memory, so using a pointer saves 72 bytes. Since the final
> >version of the 'struct ci_tx_queue' without driver specific fields
> >takes 96 bytes, embedding 'ixgbe_advctx_info ctx_cache[2]' array will
> >take one more cache line, which is hot a huge deal in my opinion.
> > 
> 
> Maybe not, though another way to look at it is that it is that those two
> context entries are nearly as big as the rest of the struct!
> 
> >Or consider another (possibly better) approach, where for non IXGBE
> >'struct ci_tx_queue' will remain the same size, but only for IXGBE an
> >extra 80 bytes will be alllocated:
> > 
> >struct { /* ixgbe specific values */
> > 
> >const struct ixgbe_txq_ops *ops;
> > 
> >uint32_t ctx_curr;
> > 
> >uint8_t pthresh;   /**< Prefetch threshold
> >register. */
> > 
> >uint8_t hthresh;   /**< Host threshold
> >register. */
> > 
> >uint8_t wthresh;   /**< Write-back threshold
> >reg. */
> > 
> >uint8_t using_ipsec;  /**< indicates that IPsec
> >TX feature is in use */
> >struct ixgbe_advctx_info ctx_cache[0];
> > 
> >};
> > 
> > +   uint32_t ctx_curr;
> > +#ifdef RTE_LIB_SECURITY
> > +   uint8_t using_ipsec;  /**< indicates that IPsec TX 
> > featu
> > re is in use */
> > +#endif
> > +   };
> > };
> >  };
> > 
> 
> I prefer solutions where the extra 80 bytes are only allocated for the one
> driver that needs them. I'll see if this alternative can work ok for us.
> 
Trying out this solution, I hit the problem described in the commit log of
the previous patch to this one - it introduces a dependency on ixgbe
structures inside the common driver. By changing the type of the ctx field
from an array to a pointer, we remove the need to have the actual type
defined at compile time - as long as we never dereference the pointer. This
no-reference is why, for example, we have have the union of all the
different descriptor types in the structure without having to include the
headers that define them.

If we include ixgbe_advctx_info as an array rather than a struct - even as
a zero-length array - then we need to have the definition of the structure
present at that point in the code. This means we either need t

[PATCH v4 1/1] usertools/devbind: allow changing UID/GID for VFIO

2024-12-02 Thread Anatoly Burakov
Currently, when binding a device to VFIO, the UID/GID for the device will
always stay as system default (`root`). Yet, when running DPDK as non-root
user, one has to change the UID/GID of the device to match the user's
UID/GID to use the device.

This patch adds an option to `dpdk-devbind.py` to change the UID/GID of
the device when binding it to VFIO.

Signed-off-by: Anatoly Burakov 
---

Notes:
v3 -> v4:
- Added documentation

v2 -> v3:
- Replaced error printout back to hard exit
- Reworked UID/GID validation to be at command line parsing
- Simplified chown code

v1 -> v2:
- Replaced hard exit with an error printout

 doc/guides/tools/devbind.rst |  6 ++
 usertools/dpdk-devbind.py| 41 +---
 2 files changed, 44 insertions(+), 3 deletions(-)

diff --git a/doc/guides/tools/devbind.rst b/doc/guides/tools/devbind.rst
index df4f3505ac..841615570f 100644
--- a/doc/guides/tools/devbind.rst
+++ b/doc/guides/tools/devbind.rst
@@ -56,6 +56,12 @@ OPTIONS
 WARNING: This can lead to loss of network connection and should be used
 with caution.
 
+* ``--uid uid, --gid gid``
+
+  By default, devices which are bound to VFIO will be owned by ``root``.
+  Use this flag to change ownership to the specified user and group, so 
that
+  devices bound to VFIO would be usable by unprivileged users.
+
 
 .. warning::
 
diff --git a/usertools/dpdk-devbind.py b/usertools/dpdk-devbind.py
index f2a2a9a12f..ed1ef0cabc 100755
--- a/usertools/dpdk-devbind.py
+++ b/usertools/dpdk-devbind.py
@@ -3,11 +3,13 @@
 # Copyright(c) 2010-2014 Intel Corporation
 #
 
-import sys
-import os
-import subprocess
 import argparse
+import grp
+import os
 import platform
+import pwd
+import subprocess
+import sys
 
 from glob import glob
 from os.path import exists, basename
@@ -108,6 +110,8 @@
 status_flag = False
 force_flag = False
 noiommu_flag = False
+vfio_uid = -1
+vfio_gid = -1
 args = []
 
 
@@ -544,6 +548,19 @@ def bind_all(dev_list, driver, force=False):
 
 for d in dev_list:
 bind_one(d, driver, force)
+# if we're binding to vfio-pci, set the IOMMU user/group ownership if 
one was specified
+if driver == "vfio-pci" and (vfio_uid != -1 or vfio_gid != -1):
+# find IOMMU group for a particular PCI device
+iommu_grp_base_path = os.path.join("/sys/bus/pci/devices", d, 
"iommu_group")
+# extract the IOMMU group number
+iommu_grp = os.path.basename(os.readlink(iommu_grp_base_path))
+# find VFIO device correspondiong to this IOMMU group
+dev_path = os.path.join("/dev/vfio", iommu_grp)
+# set ownership
+try:
+os.chown(dev_path, vfio_uid, vfio_gid)
+except OSError as err:
+sys.exit(f"Error: failed to set IOMMU group ownership for {d}: 
{err}")
 
 # For kernels < 3.15 when binding devices to a generic driver
 # (i.e. one that doesn't have a PCI ID table) using new_id, some devices
@@ -697,6 +714,8 @@ def parse_args():
 global force_flag
 global noiommu_flag
 global args
+global vfio_uid
+global vfio_gid
 
 parser = argparse.ArgumentParser(
 description='Utility to bind and unbind devices from Linux kernel',
@@ -746,6 +765,20 @@ def parse_args():
 '--noiommu-mode',
 action='store_true',
 help="If IOMMU is not available, enable no IOMMU mode for VFIO 
drivers")
+parser.add_argument(
+"-U",
+"--uid",
+help="For VFIO, specify the UID to set IOMMU group ownership",
+type=lambda u: pwd.getpwnam(u).pw_uid,
+default=-1,
+)
+parser.add_argument(
+"-G",
+"--gid",
+help="For VFIO, specify the GID to set IOMMU group ownership",
+type=lambda g: grp.getgrnam(g).gr_gid,
+default=-1,
+)
 parser.add_argument(
 '--force',
 action='store_true',
@@ -778,6 +811,8 @@ def parse_args():
 b_flag = opt.bind
 elif opt.unbind:
 b_flag = "none"
+vfio_uid = opt.uid
+vfio_gid = opt.gid
 args = opt.devices
 
 if not b_flag and not status_flag:
-- 
2.43.5



[PATCH v1 00/21] Reduce code duplication across Intel NIC drivers

2024-12-02 Thread Bruce Richardson
This RFC attempts to reduce the amount of code duplication across a
number of Intel NIC drivers, specifically: ixgbe, i40e, iavf, and ice.

The first patch extract a function from the Rx side, otherwise the
majority of the changes are on the Tx side, leading to a converged Tx
queue structure across the 4 drivers, and a large number of common
functions.

RFC->v1:
* Moved the location of the common code from "common/intel_eth" to
  "net/_common_intel", and added only ".." to the driver include path so
  that the paths included "_common_intel" in them, to make it clear it's
  not driver-local headers.
* Due to change in location, structure/fn prefix changes from "ieth" to
  "ci" for "common intel".
* Removed the seeming-arbitrary split of vector and non-vector code -
  since much of the code taken from vector files was scalar code which
  was used by the vector drivers.
* Split code into separate Rx and Tx files.
* Fixed multiple checkpatch issues (but not all).
* Attempted to improve name standardization, by using "_vec" as a common
  suffix for all vector-related fns and data. Previously, some names had
  "vec" in the middle, others had just "_v" suffix or full word "vector"
  as suffix.
* Other minor changes...

Bruce Richardson (21):
  net/_common_intel: add pkt reassembly fn for intel drivers
  net/_common_intel: provide common Tx entry structures
  net/_common_intel: add Tx mbuf ring replenish fn
  drivers/net: align Tx queue struct field names
  drivers/net: add prefix for driver-specific structs
  net/_common_intel: merge ice and i40e Tx queue struct
  net/iavf: use common Tx queue structure
  net/ixgbe: convert Tx queue context cache field to ptr
  net/ixgbe: use common Tx queue structure
  net/_common_intel: pack Tx queue structure
  net/_common_intel: add post-Tx buffer free function
  net/_common_intel: add Tx buffer free fn for AVX-512
  net/iavf: use common Tx free fn for AVX-512
  net/ice: move Tx queue mbuf cleanup fn to common
  net/i40e: use common Tx queue mbuf cleanup fn
  net/ixgbe: use common Tx queue mbuf cleanup fn
  net/iavf: use common Tx queue mbuf cleanup fn
  net/ice: use vector SW ring for all vector paths
  net/i40e: use vector SW ring for all vector paths
  net/iavf: use vector SW ring for all vector paths
  net/ixgbe: use common Tx backlog entry fn

 drivers/net/_common_intel/rx.h|  81 +
 drivers/net/_common_intel/tx.h| 327 ++
 drivers/net/i40e/i40e_ethdev.c|   4 +-
 drivers/net/i40e/i40e_ethdev.h|   8 +-
 drivers/net/i40e/i40e_fdir.c  |  10 +-
 .../net/i40e/i40e_recycle_mbufs_vec_common.c  |   6 +-
 drivers/net/i40e/i40e_rxtx.c  | 193 ---
 drivers/net/i40e/i40e_rxtx.h  |  61 +---
 drivers/net/i40e/i40e_rxtx_vec_altivec.c  |  26 +-
 drivers/net/i40e/i40e_rxtx_vec_avx2.c |  26 +-
 drivers/net/i40e/i40e_rxtx_vec_avx512.c   | 144 +---
 drivers/net/i40e/i40e_rxtx_vec_common.h   | 144 +---
 drivers/net/i40e/i40e_rxtx_vec_neon.c |  26 +-
 drivers/net/i40e/i40e_rxtx_vec_sse.c  |  26 +-
 drivers/net/i40e/meson.build  |   2 +-
 drivers/net/iavf/iavf.h   |   2 +-
 drivers/net/iavf/iavf_ethdev.c|   4 +-
 drivers/net/iavf/iavf_rxtx.c  | 180 --
 drivers/net/iavf/iavf_rxtx.h  |  61 +---
 drivers/net/iavf/iavf_rxtx_vec_avx2.c |  47 +--
 drivers/net/iavf/iavf_rxtx_vec_avx512.c   | 214 ++--
 drivers/net/iavf/iavf_rxtx_vec_common.h   | 160 +
 drivers/net/iavf/iavf_rxtx_vec_sse.c  |  57 ++-
 drivers/net/iavf/iavf_vchnl.c |   8 +-
 drivers/net/iavf/meson.build  |   2 +-
 drivers/net/ice/ice_dcf.c |   4 +-
 drivers/net/ice/ice_dcf_ethdev.c  |  21 +-
 drivers/net/ice/ice_diagnose.c|   2 +-
 drivers/net/ice/ice_ethdev.c  |   2 +-
 drivers/net/ice/ice_ethdev.h  |   7 +-
 drivers/net/ice/ice_rxtx.c| 164 -
 drivers/net/ice/ice_rxtx.h|  52 +--
 drivers/net/ice/ice_rxtx_vec_avx2.c   |  26 +-
 drivers/net/ice/ice_rxtx_vec_avx512.c | 153 +---
 drivers/net/ice/ice_rxtx_vec_common.h | 190 +-
 drivers/net/ice/ice_rxtx_vec_sse.c|  30 +-
 drivers/net/ice/meson.build   |   2 +-
 drivers/net/ixgbe/base/ixgbe_osdep.h  |   2 +-
 drivers/net/ixgbe/ixgbe_ethdev.c  |   4 +-
 .../ixgbe/ixgbe_recycle_mbufs_vec_common.c|   6 +-
 drivers/net/ixgbe/ixgbe_rxtx.c| 139 
 drivers/net/ixgbe/ixgbe_rxtx.h|  73 +---
 drivers/net/ixgbe/ixgbe_rxtx_vec_common.h | 129 +--
 drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c   |  37 +-
 drivers/net/ixgbe/ixgbe_rxtx_vec_sse.c|  37 +-
 drivers/net/ixgbe/meson.build |   2 

[PATCH v1 04/21] drivers/net: align Tx queue struct field names

2024-12-02 Thread Bruce Richardson
Across the various Intel drivers sometimes different names are given to
fields in the Tx queue structure which have the same function. Do some
renaming to align things better for future merging.

Signed-off-by: Bruce Richardson 
---
 drivers/net/i40e/i40e_rxtx.c|  6 +--
 drivers/net/i40e/i40e_rxtx.h|  2 +-
 drivers/net/iavf/iavf_rxtx.c| 60 -
 drivers/net/iavf/iavf_rxtx.h| 14 +++---
 drivers/net/iavf/iavf_rxtx_vec_avx2.c   | 19 
 drivers/net/iavf/iavf_rxtx_vec_avx512.c | 57 +++
 drivers/net/iavf/iavf_rxtx_vec_common.h | 24 +-
 drivers/net/iavf/iavf_rxtx_vec_sse.c| 18 
 drivers/net/iavf/iavf_vchnl.c   |  2 +-
 drivers/net/ixgbe/base/ixgbe_osdep.h|  2 +-
 drivers/net/ixgbe/ixgbe_rxtx.c  | 16 +++
 drivers/net/ixgbe/ixgbe_rxtx.h  |  6 +--
 drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c |  2 +-
 drivers/net/ixgbe/ixgbe_rxtx_vec_sse.c  |  2 +-
 14 files changed, 116 insertions(+), 114 deletions(-)

diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c
index 2e1f07d2a1..b0bb20fe9a 100644
--- a/drivers/net/i40e/i40e_rxtx.c
+++ b/drivers/net/i40e/i40e_rxtx.c
@@ -2549,7 +2549,7 @@ i40e_dev_tx_queue_setup(struct rte_eth_dev *dev,
txq->vsi = vsi;
txq->tx_deferred_start = tx_conf->tx_deferred_start;
 
-   txq->tx_ring_phys_addr = tz->iova;
+   txq->tx_ring_dma = tz->iova;
txq->tx_ring = (struct i40e_tx_desc *)tz->addr;
 
/* Allocate software ring */
@@ -2923,7 +2923,7 @@ i40e_tx_queue_init(struct i40e_tx_queue *txq)
/* clear the context structure first */
memset(&tx_ctx, 0, sizeof(tx_ctx));
tx_ctx.new_context = 1;
-   tx_ctx.base = txq->tx_ring_phys_addr / I40E_QUEUE_BASE_ADDR_UNIT;
+   tx_ctx.base = txq->tx_ring_dma / I40E_QUEUE_BASE_ADDR_UNIT;
tx_ctx.qlen = txq->nb_tx_desc;
 
 #ifdef RTE_LIBRTE_IEEE1588
@@ -3209,7 +3209,7 @@ i40e_fdir_setup_tx_resources(struct i40e_pf *pf)
txq->reg_idx = pf->fdir.fdir_vsi->base_queue;
txq->vsi = pf->fdir.fdir_vsi;
 
-   txq->tx_ring_phys_addr = tz->iova;
+   txq->tx_ring_dma = tz->iova;
txq->tx_ring = (struct i40e_tx_desc *)tz->addr;
 
/*
diff --git a/drivers/net/i40e/i40e_rxtx.h b/drivers/net/i40e/i40e_rxtx.h
index 0f5d3cb0b7..f420c98687 100644
--- a/drivers/net/i40e/i40e_rxtx.h
+++ b/drivers/net/i40e/i40e_rxtx.h
@@ -129,7 +129,7 @@ struct i40e_rx_queue {
  */
 struct i40e_tx_queue {
uint16_t nb_tx_desc; /**< number of TX descriptors */
-   uint64_t tx_ring_phys_addr; /**< TX ring DMA address */
+   rte_iova_t tx_ring_dma; /**< TX ring DMA address */
volatile struct i40e_tx_desc *tx_ring; /**< TX ring virtual address */
struct ci_tx_entry *sw_ring; /**< virtual address of SW ring */
uint16_t tx_tail; /**< current value of tail register */
diff --git a/drivers/net/iavf/iavf_rxtx.c b/drivers/net/iavf/iavf_rxtx.c
index e337f20073..adaaeb4625 100644
--- a/drivers/net/iavf/iavf_rxtx.c
+++ b/drivers/net/iavf/iavf_rxtx.c
@@ -216,8 +216,8 @@ static inline bool
 check_tx_vec_allow(struct iavf_tx_queue *txq)
 {
if (!(txq->offloads & IAVF_TX_NO_VECTOR_FLAGS) &&
-   txq->rs_thresh >= IAVF_VPMD_TX_MAX_BURST &&
-   txq->rs_thresh <= IAVF_VPMD_TX_MAX_FREE_BUF) {
+   txq->tx_rs_thresh >= IAVF_VPMD_TX_MAX_BURST &&
+   txq->tx_rs_thresh <= IAVF_VPMD_TX_MAX_FREE_BUF) {
PMD_INIT_LOG(DEBUG, "Vector tx can be enabled on this txq.");
return true;
}
@@ -309,13 +309,13 @@ reset_tx_queue(struct iavf_tx_queue *txq)
}
 
txq->tx_tail = 0;
-   txq->nb_used = 0;
+   txq->nb_tx_used = 0;
 
txq->last_desc_cleaned = txq->nb_tx_desc - 1;
-   txq->nb_free = txq->nb_tx_desc - 1;
+   txq->nb_tx_free = txq->nb_tx_desc - 1;
 
-   txq->next_dd = txq->rs_thresh - 1;
-   txq->next_rs = txq->rs_thresh - 1;
+   txq->tx_next_dd = txq->tx_rs_thresh - 1;
+   txq->tx_next_rs = txq->tx_rs_thresh - 1;
 }
 
 static int
@@ -845,8 +845,8 @@ iavf_dev_tx_queue_setup(struct rte_eth_dev *dev,
}
 
txq->nb_tx_desc = nb_desc;
-   txq->rs_thresh = tx_rs_thresh;
-   txq->free_thresh = tx_free_thresh;
+   txq->tx_rs_thresh = tx_rs_thresh;
+   txq->tx_free_thresh = tx_free_thresh;
txq->queue_id = queue_idx;
txq->port_id = dev->data->port_id;
txq->offloads = offloads;
@@ -881,7 +881,7 @@ iavf_dev_tx_queue_setup(struct rte_eth_dev *dev,
rte_free(txq);
return -ENOMEM;
}
-   txq->tx_ring_phys_addr = mz->iova;
+   txq->tx_ring_dma = mz->iova;
txq->tx_ring = (struct iavf_tx_desc *)mz->addr;
 
txq->mz = mz;
@@ -2387,7 +2387,7 @@ iavf_xmit_cleanup(struct iavf_tx_queue *txq)
 
volatile struct iavf_tx_desc *txd = txq->tx_ring;
 
-   desc_to_clean_to = (uint16_t)(last_desc_clea

[PATCH v1 05/21] drivers/net: add prefix for driver-specific structs

2024-12-02 Thread Bruce Richardson
In preparation for merging the Tx structs for multiple drivers into a
single struct, rename the driver-specific pointers in each struct to
have a prefix on it, to avoid conflicts.

Signed-off-by: Bruce Richardson 
---
 drivers/net/i40e/i40e_fdir.c  |  6 +--
 .../net/i40e/i40e_recycle_mbufs_vec_common.c  |  2 +-
 drivers/net/i40e/i40e_rxtx.c  | 30 ++--
 drivers/net/i40e/i40e_rxtx.h  |  4 +-
 drivers/net/i40e/i40e_rxtx_vec_altivec.c  |  6 +--
 drivers/net/i40e/i40e_rxtx_vec_avx2.c |  6 +--
 drivers/net/i40e/i40e_rxtx_vec_avx512.c   |  8 ++--
 drivers/net/i40e/i40e_rxtx_vec_common.h   |  2 +-
 drivers/net/i40e/i40e_rxtx_vec_neon.c |  6 +--
 drivers/net/i40e/i40e_rxtx_vec_sse.c  |  6 +--
 drivers/net/iavf/iavf_rxtx.c  | 24 +-
 drivers/net/iavf/iavf_rxtx.h  |  4 +-
 drivers/net/iavf/iavf_rxtx_vec_avx2.c |  6 +--
 drivers/net/iavf/iavf_rxtx_vec_avx512.c   | 14 +++---
 drivers/net/iavf/iavf_rxtx_vec_common.h   |  2 +-
 drivers/net/iavf/iavf_rxtx_vec_sse.c  |  6 +--
 drivers/net/ice/ice_dcf_ethdev.c  |  4 +-
 drivers/net/ice/ice_rxtx.c| 48 +--
 drivers/net/ice/ice_rxtx.h|  4 +-
 drivers/net/ice/ice_rxtx_vec_avx2.c   |  6 +--
 drivers/net/ice/ice_rxtx_vec_avx512.c |  8 ++--
 drivers/net/ice/ice_rxtx_vec_common.h |  4 +-
 drivers/net/ice/ice_rxtx_vec_sse.c|  6 +--
 .../ixgbe/ixgbe_recycle_mbufs_vec_common.c|  2 +-
 drivers/net/ixgbe/ixgbe_rxtx.c| 22 -
 drivers/net/ixgbe/ixgbe_rxtx.h|  2 +-
 drivers/net/ixgbe/ixgbe_rxtx_vec_common.h |  6 +--
 drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c   |  6 +--
 drivers/net/ixgbe/ixgbe_rxtx_vec_sse.c|  6 +--
 29 files changed, 128 insertions(+), 128 deletions(-)

diff --git a/drivers/net/i40e/i40e_fdir.c b/drivers/net/i40e/i40e_fdir.c
index 47f79ecf11..c600167634 100644
--- a/drivers/net/i40e/i40e_fdir.c
+++ b/drivers/net/i40e/i40e_fdir.c
@@ -1383,7 +1383,7 @@ i40e_find_available_buffer(struct rte_eth_dev *dev)
volatile struct i40e_tx_desc *tmp_txdp;
 
tmp_tail = txq->tx_tail;
-   tmp_txdp = &txq->tx_ring[tmp_tail + 1];
+   tmp_txdp = &txq->i40e_tx_ring[tmp_tail + 1];
 
do {
if ((tmp_txdp->cmd_type_offset_bsz &
@@ -1640,7 +1640,7 @@ i40e_flow_fdir_filter_programming(struct i40e_pf *pf,
 
PMD_DRV_LOG(INFO, "filling filter programming descriptor.");
fdirdp = (volatile struct i40e_filter_program_desc *)
-   (&txq->tx_ring[txq->tx_tail]);
+   (&txq->i40e_tx_ring[txq->tx_tail]);
 
fdirdp->qindex_flex_ptype_vsi =
rte_cpu_to_le_32((fdir_action->rx_queue <<
@@ -1710,7 +1710,7 @@ i40e_flow_fdir_filter_programming(struct i40e_pf *pf,
fdirdp->fd_id = rte_cpu_to_le_32(filter->soft_id);
 
PMD_DRV_LOG(INFO, "filling transmit descriptor.");
-   txdp = &txq->tx_ring[txq->tx_tail + 1];
+   txdp = &txq->i40e_tx_ring[txq->tx_tail + 1];
txdp->buffer_addr = rte_cpu_to_le_64(pf->fdir.dma_addr[txq->tx_tail >> 
1]);
 
td_cmd = I40E_TX_DESC_CMD_EOP |
diff --git a/drivers/net/i40e/i40e_recycle_mbufs_vec_common.c 
b/drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
index 260d238ce4..8679e5c1fd 100644
--- a/drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
+++ b/drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
@@ -75,7 +75,7 @@ i40e_recycle_tx_mbufs_reuse_vec(void *tx_queue,
return 0;
 
/* check DD bits on threshold descriptor */
-   if ((txq->tx_ring[txq->tx_next_dd].cmd_type_offset_bsz &
+   if ((txq->i40e_tx_ring[txq->tx_next_dd].cmd_type_offset_bsz &
rte_cpu_to_le_64(I40E_TXD_QW1_DTYPE_MASK)) !=
rte_cpu_to_le_64(I40E_TX_DESC_DTYPE_DESC_DONE))
return 0;
diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c
index b0bb20fe9a..34ef931859 100644
--- a/drivers/net/i40e/i40e_rxtx.c
+++ b/drivers/net/i40e/i40e_rxtx.c
@@ -379,7 +379,7 @@ static inline int
 i40e_xmit_cleanup(struct i40e_tx_queue *txq)
 {
struct ci_tx_entry *sw_ring = txq->sw_ring;
-   volatile struct i40e_tx_desc *txd = txq->tx_ring;
+   volatile struct i40e_tx_desc *txd = txq->i40e_tx_ring;
uint16_t last_desc_cleaned = txq->last_desc_cleaned;
uint16_t nb_tx_desc = txq->nb_tx_desc;
uint16_t desc_to_clean_to;
@@ -1103,7 +1103,7 @@ i40e_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, 
uint16_t nb_pkts)
 
txq = tx_queue;
sw_ring = txq->sw_ring;
-   txr = txq->tx_ring;
+   txr = txq->i40e_tx_ring;
tx_id = txq->tx_tail;
txe = &sw_ring[tx_id];
 
@@ -1338,7 +1338,7 @@ i40e_tx_free_bufs(struct i40e_tx_queue *

[PATCH v1 06/21] net/_common_intel: merge ice and i40e Tx queue struct

2024-12-02 Thread Bruce Richardson
The queue structures of i40e and ice drivers are virtually identical, so
merge them into a common struct. This should allow easier function
merging in future using that common struct.

Signed-off-by: Bruce Richardson 
---
 drivers/net/_common_intel/tx.h| 55 +
 drivers/net/i40e/i40e_ethdev.c|  4 +-
 drivers/net/i40e/i40e_ethdev.h|  4 +-
 drivers/net/i40e/i40e_fdir.c  |  4 +-
 .../net/i40e/i40e_recycle_mbufs_vec_common.c  |  2 +-
 drivers/net/i40e/i40e_rxtx.c  | 58 +-
 drivers/net/i40e/i40e_rxtx.h  | 50 ++--
 drivers/net/i40e/i40e_rxtx_vec_altivec.c  |  4 +-
 drivers/net/i40e/i40e_rxtx_vec_avx2.c |  4 +-
 drivers/net/i40e/i40e_rxtx_vec_avx512.c   |  6 +-
 drivers/net/i40e/i40e_rxtx_vec_common.h   |  2 +-
 drivers/net/i40e/i40e_rxtx_vec_neon.c |  4 +-
 drivers/net/i40e/i40e_rxtx_vec_sse.c  |  4 +-
 drivers/net/ice/ice_dcf.c |  4 +-
 drivers/net/ice/ice_dcf_ethdev.c  | 10 ++--
 drivers/net/ice/ice_diagnose.c|  2 +-
 drivers/net/ice/ice_ethdev.c  |  2 +-
 drivers/net/ice/ice_ethdev.h  |  4 +-
 drivers/net/ice/ice_rxtx.c| 60 +--
 drivers/net/ice/ice_rxtx.h| 41 +
 drivers/net/ice/ice_rxtx_vec_avx2.c   |  4 +-
 drivers/net/ice/ice_rxtx_vec_avx512.c |  8 +--
 drivers/net/ice/ice_rxtx_vec_common.h |  8 +--
 drivers/net/ice/ice_rxtx_vec_sse.c|  6 +-
 24 files changed, 165 insertions(+), 185 deletions(-)

diff --git a/drivers/net/_common_intel/tx.h b/drivers/net/_common_intel/tx.h
index 5397007411..c965f5ee6c 100644
--- a/drivers/net/_common_intel/tx.h
+++ b/drivers/net/_common_intel/tx.h
@@ -8,6 +8,9 @@
 #include 
 #include 
 
+/* forward declaration of the common intel (ci) queue structure */
+struct ci_tx_queue;
+
 /**
  * Structure associated with each descriptor of the TX ring of a TX queue.
  */
@@ -24,6 +27,58 @@ struct ci_tx_entry_vec {
struct rte_mbuf *mbuf; /* mbuf associated with TX desc, if any. */
 };
 
+typedef void (*ice_tx_release_mbufs_t)(struct ci_tx_queue *txq);
+
+struct ci_tx_queue {
+   union { /* TX ring virtual address */
+   volatile struct ice_tx_desc *ice_tx_ring;
+   volatile struct i40e_tx_desc *i40e_tx_ring;
+   };
+   volatile uint8_t *qtx_tail;   /* register address of tail */
+   struct ci_tx_entry *sw_ring; /* virtual address of SW ring */
+   rte_iova_t tx_ring_dma;/* TX ring DMA address */
+   uint16_t nb_tx_desc;   /* number of TX descriptors */
+   uint16_t tx_tail; /* current value of tail register */
+   uint16_t nb_tx_used; /* number of TX desc used since RS bit set */
+   /* index to last TX descriptor to have been cleaned */
+   uint16_t last_desc_cleaned;
+   /* Total number of TX descriptors ready to be allocated. */
+   uint16_t nb_tx_free;
+   /* Start freeing TX buffers if there are less free descriptors than
+* this value.
+*/
+   uint16_t tx_free_thresh;
+   /* Number of TX descriptors to use before RS bit is set. */
+   uint16_t tx_rs_thresh;
+   uint8_t pthresh;   /**< Prefetch threshold register. */
+   uint8_t hthresh;   /**< Host threshold register. */
+   uint8_t wthresh;   /**< Write-back threshold reg. */
+   uint16_t port_id;  /* Device port identifier. */
+   uint16_t queue_id; /* TX queue index. */
+   uint16_t reg_idx;
+   uint64_t offloads;
+   uint16_t tx_next_dd;
+   uint16_t tx_next_rs;
+   uint64_t mbuf_errors;
+   bool tx_deferred_start; /* don't start this queue in dev start */
+   bool q_set; /* indicate if tx queue has been configured */
+   union {  /* the VSI this queue belongs to */
+   struct ice_vsi *ice_vsi;
+   struct i40e_vsi *i40e_vsi;
+   };
+   const struct rte_memzone *mz;
+
+   union {
+   struct { /* ICE driver specific values */
+   ice_tx_release_mbufs_t tx_rel_mbufs;
+   uint32_t q_teid; /* TX schedule node id. */
+   };
+   struct { /* I40E driver specific values */
+   uint8_t dcb_tc;
+   };
+   };
+};
+
 static __rte_always_inline void
 ci_tx_backlog_entry(struct ci_tx_entry *txep, struct rte_mbuf **tx_pkts, 
uint16_t nb_pkts)
 {
diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
index 30dcdc68a8..bf5560ccc8 100644
--- a/drivers/net/i40e/i40e_ethdev.c
+++ b/drivers/net/i40e/i40e_ethdev.c
@@ -3685,7 +3685,7 @@ i40e_dev_update_mbuf_stats(struct rte_eth_dev *ethdev,
struct i40e_mbuf_stats *mbuf_stats)
 {
uint16_t idx;
-   struct i40e_tx_queue *txq;
+   struct c

[PATCH v1 02/21] net/_common_intel: provide common Tx entry structures

2024-12-02 Thread Bruce Richardson
The Tx entry structures, both vector and scalar, are common across Intel
drivers, so provide a single definition to be used everywhere.

Signed-off-by: Bruce Richardson 
---
 drivers/net/_common_intel/tx.h| 27 +++
 .../net/i40e/i40e_recycle_mbufs_vec_common.c  |  2 +-
 drivers/net/i40e/i40e_rxtx.c  | 18 ++---
 drivers/net/i40e/i40e_rxtx.h  | 14 +++---
 drivers/net/i40e/i40e_rxtx_vec_altivec.c  |  2 +-
 drivers/net/i40e/i40e_rxtx_vec_avx2.c |  2 +-
 drivers/net/i40e/i40e_rxtx_vec_avx512.c   |  6 ++---
 drivers/net/i40e/i40e_rxtx_vec_common.h   |  4 +--
 drivers/net/i40e/i40e_rxtx_vec_neon.c |  2 +-
 drivers/net/i40e/i40e_rxtx_vec_sse.c  |  2 +-
 drivers/net/iavf/iavf_rxtx.c  | 12 -
 drivers/net/iavf/iavf_rxtx.h  | 14 +++---
 drivers/net/iavf/iavf_rxtx_vec_avx2.c |  2 +-
 drivers/net/iavf/iavf_rxtx_vec_avx512.c   | 10 +++
 drivers/net/iavf/iavf_rxtx_vec_common.h   |  4 +--
 drivers/net/iavf/iavf_rxtx_vec_sse.c  |  2 +-
 drivers/net/ice/ice_dcf_ethdev.c  |  2 +-
 drivers/net/ice/ice_rxtx.c| 16 +--
 drivers/net/ice/ice_rxtx.h| 13 ++---
 drivers/net/ice/ice_rxtx_vec_avx2.c   |  2 +-
 drivers/net/ice/ice_rxtx_vec_avx512.c |  6 ++---
 drivers/net/ice/ice_rxtx_vec_common.h |  6 ++---
 drivers/net/ice/ice_rxtx_vec_sse.c|  2 +-
 .../ixgbe/ixgbe_recycle_mbufs_vec_common.c|  2 +-
 drivers/net/ixgbe/ixgbe_rxtx.c| 16 +--
 drivers/net/ixgbe/ixgbe_rxtx.h| 22 +++
 drivers/net/ixgbe/ixgbe_rxtx_vec_common.h |  8 +++---
 drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c   |  2 +-
 drivers/net/ixgbe/ixgbe_rxtx_vec_sse.c|  2 +-
 29 files changed, 105 insertions(+), 117 deletions(-)
 create mode 100644 drivers/net/_common_intel/tx.h

diff --git a/drivers/net/_common_intel/tx.h b/drivers/net/_common_intel/tx.h
new file mode 100644
index 00..384352b9db
--- /dev/null
+++ b/drivers/net/_common_intel/tx.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2024 Intel Corporation
+ */
+
+#ifndef _COMMON_INTEL_TX_H_
+#define _COMMON_INTEL_TX_H_
+
+#include 
+#include 
+
+/**
+ * Structure associated with each descriptor of the TX ring of a TX queue.
+ */
+struct ci_tx_entry {
+   struct rte_mbuf *mbuf; /* mbuf associated with TX desc, if any. */
+   uint16_t next_id; /* Index of next descriptor in ring. */
+   uint16_t last_id; /* Index of last scattered descriptor. */
+};
+
+/**
+ * Structure associated with each descriptor of the TX ring of a TX queue in 
vector Tx.
+ */
+struct ci_tx_entry_vec {
+   struct rte_mbuf *mbuf; /* mbuf associated with TX desc, if any. */
+};
+
+#endif /* _COMMON_INTEL_TX_H_ */
diff --git a/drivers/net/i40e/i40e_recycle_mbufs_vec_common.c 
b/drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
index 14424c9921..260d238ce4 100644
--- a/drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
+++ b/drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
@@ -56,7 +56,7 @@ i40e_recycle_tx_mbufs_reuse_vec(void *tx_queue,
struct rte_eth_recycle_rxq_info *recycle_rxq_info)
 {
struct i40e_tx_queue *txq = tx_queue;
-   struct i40e_tx_entry *txep;
+   struct ci_tx_entry *txep;
struct rte_mbuf **rxep;
int i, n;
uint16_t nb_recycle_mbufs;
diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c
index 839c8a5442..2e1f07d2a1 100644
--- a/drivers/net/i40e/i40e_rxtx.c
+++ b/drivers/net/i40e/i40e_rxtx.c
@@ -378,7 +378,7 @@ i40e_build_ctob(uint32_t td_cmd,
 static inline int
 i40e_xmit_cleanup(struct i40e_tx_queue *txq)
 {
-   struct i40e_tx_entry *sw_ring = txq->sw_ring;
+   struct ci_tx_entry *sw_ring = txq->sw_ring;
volatile struct i40e_tx_desc *txd = txq->tx_ring;
uint16_t last_desc_cleaned = txq->last_desc_cleaned;
uint16_t nb_tx_desc = txq->nb_tx_desc;
@@ -1081,8 +1081,8 @@ uint16_t
 i40e_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 {
struct i40e_tx_queue *txq;
-   struct i40e_tx_entry *sw_ring;
-   struct i40e_tx_entry *txe, *txn;
+   struct ci_tx_entry *sw_ring;
+   struct ci_tx_entry *txe, *txn;
volatile struct i40e_tx_desc *txd;
volatile struct i40e_tx_desc *txr;
struct rte_mbuf *tx_pkt;
@@ -1331,7 +1331,7 @@ i40e_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, 
uint16_t nb_pkts)
 static __rte_always_inline int
 i40e_tx_free_bufs(struct i40e_tx_queue *txq)
 {
-   struct i40e_tx_entry *txep;
+   struct ci_tx_entry *txep;
uint16_t tx_rs_thresh = txq->tx_rs_thresh;
uint16_t i = 0, j = 0;
struct rte_mbuf *free[RTE_I40E_TX_MAX_FREE_BUF_SZ];
@@ -1418,7 +1418,7 @@ i40e_tx_fill_hw_ring(struct i40e_tx_queue *txq,
 uint16_t nb_pkts)

[PATCH v1 09/21] net/ixgbe: use common Tx queue structure

2024-12-02 Thread Bruce Richardson
Merge in additional fields used by the ixgbe driver and then convert it
over to using the common Tx queue structure.

Signed-off-by: Bruce Richardson 
---
 drivers/net/_common_intel/tx.h| 14 +++-
 drivers/net/ixgbe/ixgbe_ethdev.c  |  4 +-
 .../ixgbe/ixgbe_recycle_mbufs_vec_common.c|  2 +-
 drivers/net/ixgbe/ixgbe_rxtx.c| 64 +--
 drivers/net/ixgbe/ixgbe_rxtx.h| 56 ++--
 drivers/net/ixgbe/ixgbe_rxtx_vec_common.h | 26 
 drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c   | 14 ++--
 drivers/net/ixgbe/ixgbe_rxtx_vec_sse.c| 14 ++--
 8 files changed, 80 insertions(+), 114 deletions(-)

diff --git a/drivers/net/_common_intel/tx.h b/drivers/net/_common_intel/tx.h
index c4a1a0c816..51ae3b051d 100644
--- a/drivers/net/_common_intel/tx.h
+++ b/drivers/net/_common_intel/tx.h
@@ -34,9 +34,13 @@ struct ci_tx_queue {
volatile struct i40e_tx_desc *i40e_tx_ring;
volatile struct iavf_tx_desc *iavf_tx_ring;
volatile struct ice_tx_desc *ice_tx_ring;
+   volatile union ixgbe_adv_tx_desc *ixgbe_tx_ring;
};
volatile uint8_t *qtx_tail;   /* register address of tail */
-   struct ci_tx_entry *sw_ring; /* virtual address of SW ring */
+   union {
+   struct ci_tx_entry *sw_ring; /* virtual address of SW ring */
+   struct ci_tx_entry_vec *sw_ring_vec;
+   };
rte_iova_t tx_ring_dma;/* TX ring DMA address */
uint16_t nb_tx_desc;   /* number of TX descriptors */
uint16_t tx_tail; /* current value of tail register */
@@ -87,6 +91,14 @@ struct ci_tx_queue {
uint8_t tc;
bool use_ctx;  /* with ctx info, each pkt needs two 
descriptors */
};
+   struct { /* ixgbe specific values */
+   const struct ixgbe_txq_ops *ops;
+   struct ixgbe_advctx_info *ctx_cache;
+   uint32_t ctx_curr;
+#ifdef RTE_LIB_SECURITY
+   uint8_t using_ipsec;  /**< indicates that IPsec TX 
feature is in use */
+#endif
+   };
};
 };
 
diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c b/drivers/net/ixgbe/ixgbe_ethdev.c
index 8bee97d191..5f18fbaad5 100644
--- a/drivers/net/ixgbe/ixgbe_ethdev.c
+++ b/drivers/net/ixgbe/ixgbe_ethdev.c
@@ -1118,7 +1118,7 @@ eth_ixgbe_dev_init(struct rte_eth_dev *eth_dev, void 
*init_params __rte_unused)
 * RX and TX function.
 */
if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
-   struct ixgbe_tx_queue *txq;
+   struct ci_tx_queue *txq;
/* TX queue function in primary, set by last queue initialized
 * Tx queue may not initialized by primary process
 */
@@ -1623,7 +1623,7 @@ eth_ixgbevf_dev_init(struct rte_eth_dev *eth_dev)
 * RX function
 */
if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
-   struct ixgbe_tx_queue *txq;
+   struct ci_tx_queue *txq;
/* TX queue function in primary, set by last queue initialized
 * Tx queue may not initialized by primary process
 */
diff --git a/drivers/net/ixgbe/ixgbe_recycle_mbufs_vec_common.c 
b/drivers/net/ixgbe/ixgbe_recycle_mbufs_vec_common.c
index a878db3150..3fd05ed5eb 100644
--- a/drivers/net/ixgbe/ixgbe_recycle_mbufs_vec_common.c
+++ b/drivers/net/ixgbe/ixgbe_recycle_mbufs_vec_common.c
@@ -51,7 +51,7 @@ uint16_t
 ixgbe_recycle_tx_mbufs_reuse_vec(void *tx_queue,
struct rte_eth_recycle_rxq_info *recycle_rxq_info)
 {
-   struct ixgbe_tx_queue *txq = tx_queue;
+   struct ci_tx_queue *txq = tx_queue;
struct ci_tx_entry *txep;
struct rte_mbuf **rxep;
int i, n;
diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c
index 2ca26cd132..f8f5f42e5c 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx.c
+++ b/drivers/net/ixgbe/ixgbe_rxtx.c
@@ -98,7 +98,7 @@
  * Return the total number of buffers freed.
  */
 static __rte_always_inline int
-ixgbe_tx_free_bufs(struct ixgbe_tx_queue *txq)
+ixgbe_tx_free_bufs(struct ci_tx_queue *txq)
 {
struct ci_tx_entry *txep;
uint32_t status;
@@ -195,7 +195,7 @@ tx1(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf 
**pkts)
  * Copy mbuf pointers to the S/W ring.
  */
 static inline void
-ixgbe_tx_fill_hw_ring(struct ixgbe_tx_queue *txq, struct rte_mbuf **pkts,
+ixgbe_tx_fill_hw_ring(struct ci_tx_queue *txq, struct rte_mbuf **pkts,
  uint16_t nb_pkts)
 {
volatile union ixgbe_adv_tx_desc *txdp = 
&txq->ixgbe_tx_ring[txq->tx_tail];
@@ -231,7 +231,7 @@ static inline uint16_t
 tx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 uint16_t nb_pkts)
 {
-   struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
+   struct ci_

[PATCH v1 11/21] net/_common_intel: add post-Tx buffer free function

2024-12-02 Thread Bruce Richardson
The actions taken for post-Tx buffer free for the SSE and AVX drivers
for i40e, iavf and ice drivers are all common, so centralize those in
common/intel_eth driver.

Signed-off-by: Bruce Richardson 
---
 drivers/net/_common_intel/tx.h  | 71 
 drivers/net/i40e/i40e_rxtx_vec_common.h | 72 -
 drivers/net/iavf/iavf_rxtx_vec_common.h | 61 -
 drivers/net/ice/ice_rxtx_vec_common.h   | 61 -
 4 files changed, 98 insertions(+), 167 deletions(-)

diff --git a/drivers/net/_common_intel/tx.h b/drivers/net/_common_intel/tx.h
index c372d2838b..a930309c05 100644
--- a/drivers/net/_common_intel/tx.h
+++ b/drivers/net/_common_intel/tx.h
@@ -7,6 +7,7 @@
 
 #include 
 #include 
+#include 
 
 /* forward declaration of the common intel (ci) queue structure */
 struct ci_tx_queue;
@@ -107,4 +108,74 @@ ci_tx_backlog_entry(struct ci_tx_entry *txep, struct 
rte_mbuf **tx_pkts, uint16_
txep[i].mbuf = tx_pkts[i];
 }
 
+#define IETH_VPMD_TX_MAX_FREE_BUF 64
+
+typedef int (*ci_desc_done_fn)(struct ci_tx_queue *txq, uint16_t idx);
+
+static __rte_always_inline int
+ci_tx_free_bufs(struct ci_tx_queue *txq, ci_desc_done_fn desc_done)
+{
+   struct ci_tx_entry *txep;
+   uint32_t n;
+   uint32_t i;
+   int nb_free = 0;
+   struct rte_mbuf *m, *free[IETH_VPMD_TX_MAX_FREE_BUF];
+
+   /* check DD bits on threshold descriptor */
+   if (!desc_done(txq, txq->tx_next_dd))
+   return 0;
+
+   n = txq->tx_rs_thresh;
+
+/* first buffer to free from S/W ring is at index
+ * tx_next_dd - (tx_rs_thresh-1)
+ */
+   txep = &txq->sw_ring[txq->tx_next_dd - (n - 1)];
+
+   if (txq->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) {
+   for (i = 0; i < n; i++) {
+   free[i] = txep[i].mbuf;
+   /* no need to reset txep[i].mbuf in vector path */
+   }
+   rte_mempool_put_bulk(free[0]->pool, (void **)free, n);
+   goto done;
+   }
+
+   m = rte_pktmbuf_prefree_seg(txep[0].mbuf);
+   if (likely(m != NULL)) {
+   free[0] = m;
+   nb_free = 1;
+   for (i = 1; i < n; i++) {
+   m = rte_pktmbuf_prefree_seg(txep[i].mbuf);
+   if (likely(m != NULL)) {
+   if (likely(m->pool == free[0]->pool)) {
+   free[nb_free++] = m;
+   } else {
+   rte_mempool_put_bulk(free[0]->pool,
+(void *)free,
+nb_free);
+   free[0] = m;
+   nb_free = 1;
+   }
+   }
+   }
+   rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
+   } else {
+   for (i = 1; i < n; i++) {
+   m = rte_pktmbuf_prefree_seg(txep[i].mbuf);
+   if (m != NULL)
+   rte_mempool_put(m->pool, m);
+   }
+   }
+
+done:
+   /* buffers were freed, update counters */
+   txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
+   txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
+   if (txq->tx_next_dd >= txq->nb_tx_desc)
+   txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
+
+   return txq->tx_rs_thresh;
+}
+
 #endif /* _COMMON_INTEL_TX_H_ */
diff --git a/drivers/net/i40e/i40e_rxtx_vec_common.h 
b/drivers/net/i40e/i40e_rxtx_vec_common.h
index 57d6263ccf..907d32dd0b 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_common.h
+++ b/drivers/net/i40e/i40e_rxtx_vec_common.h
@@ -16,72 +16,18 @@
 #pragma GCC diagnostic ignored "-Wcast-qual"
 #endif
 
+static inline int
+i40e_tx_desc_done(struct ci_tx_queue *txq, uint16_t idx)
+{
+   return (txq->i40e_tx_ring[idx].cmd_type_offset_bsz &
+   rte_cpu_to_le_64(I40E_TXD_QW1_DTYPE_MASK)) ==
+   rte_cpu_to_le_64(I40E_TX_DESC_DTYPE_DESC_DONE);
+}
+
 static __rte_always_inline int
 i40e_tx_free_bufs(struct ci_tx_queue *txq)
 {
-   struct ci_tx_entry *txep;
-   uint32_t n;
-   uint32_t i;
-   int nb_free = 0;
-   struct rte_mbuf *m, *free[RTE_I40E_TX_MAX_FREE_BUF_SZ];
-
-   /* check DD bits on threshold descriptor */
-   if ((txq->i40e_tx_ring[txq->tx_next_dd].cmd_type_offset_bsz &
-   rte_cpu_to_le_64(I40E_TXD_QW1_DTYPE_MASK)) !=
-   rte_cpu_to_le_64(I40E_TX_DESC_DTYPE_DESC_DONE))
-   return 0;
-
-   n = txq->tx_rs_thresh;
-
-/* first buffer to free from S/W ring is at index
- * tx_next_dd - (tx_rs_thresh-1)
- */
-   txep = &txq->

[PATCH v1 07/21] net/iavf: use common Tx queue structure

2024-12-02 Thread Bruce Richardson
Merge in the few additional fields used by iavf driver and convert it to
using the common Tx queue structure also.

Signed-off-by: Bruce Richardson 
---
 drivers/net/_common_intel/tx.h  | 15 +++-
 drivers/net/iavf/iavf.h |  2 +-
 drivers/net/iavf/iavf_ethdev.c  |  4 +-
 drivers/net/iavf/iavf_rxtx.c| 42 ++---
 drivers/net/iavf/iavf_rxtx.h| 49 +++--
 drivers/net/iavf/iavf_rxtx_vec_avx2.c   |  4 +-
 drivers/net/iavf/iavf_rxtx_vec_avx512.c | 14 +++
 drivers/net/iavf/iavf_rxtx_vec_common.h |  8 ++--
 drivers/net/iavf/iavf_rxtx_vec_sse.c|  8 ++--
 drivers/net/iavf/iavf_vchnl.c   |  6 +--
 10 files changed, 62 insertions(+), 90 deletions(-)

diff --git a/drivers/net/_common_intel/tx.h b/drivers/net/_common_intel/tx.h
index c965f5ee6c..c4a1a0c816 100644
--- a/drivers/net/_common_intel/tx.h
+++ b/drivers/net/_common_intel/tx.h
@@ -31,8 +31,9 @@ typedef void (*ice_tx_release_mbufs_t)(struct ci_tx_queue 
*txq);
 
 struct ci_tx_queue {
union { /* TX ring virtual address */
-   volatile struct ice_tx_desc *ice_tx_ring;
volatile struct i40e_tx_desc *i40e_tx_ring;
+   volatile struct iavf_tx_desc *iavf_tx_ring;
+   volatile struct ice_tx_desc *ice_tx_ring;
};
volatile uint8_t *qtx_tail;   /* register address of tail */
struct ci_tx_entry *sw_ring; /* virtual address of SW ring */
@@ -63,8 +64,9 @@ struct ci_tx_queue {
bool tx_deferred_start; /* don't start this queue in dev start */
bool q_set; /* indicate if tx queue has been configured */
union {  /* the VSI this queue belongs to */
-   struct ice_vsi *ice_vsi;
struct i40e_vsi *i40e_vsi;
+   struct iavf_vsi *iavf_vsi;
+   struct ice_vsi *ice_vsi;
};
const struct rte_memzone *mz;
 
@@ -76,6 +78,15 @@ struct ci_tx_queue {
struct { /* I40E driver specific values */
uint8_t dcb_tc;
};
+   struct { /* iavf driver specific values */
+   uint16_t ipsec_crypto_pkt_md_offset;
+   uint8_t rel_mbufs_type;
+#define IAVF_TX_FLAGS_VLAN_TAG_LOC_L2TAG1 BIT(0)
+#define IAVF_TX_FLAGS_VLAN_TAG_LOC_L2TAG2 BIT(1)
+   uint8_t vlan_flag;
+   uint8_t tc;
+   bool use_ctx;  /* with ctx info, each pkt needs two 
descriptors */
+   };
};
 };
 
diff --git a/drivers/net/iavf/iavf.h b/drivers/net/iavf/iavf.h
index ad526c644c..956c60ef45 100644
--- a/drivers/net/iavf/iavf.h
+++ b/drivers/net/iavf/iavf.h
@@ -98,7 +98,7 @@
 
 struct iavf_adapter;
 struct iavf_rx_queue;
-struct iavf_tx_queue;
+struct ci_tx_queue;
 
 
 struct iavf_ipsec_crypto_stats {
diff --git a/drivers/net/iavf/iavf_ethdev.c b/drivers/net/iavf/iavf_ethdev.c
index 7f80cd6258..328c224c93 100644
--- a/drivers/net/iavf/iavf_ethdev.c
+++ b/drivers/net/iavf/iavf_ethdev.c
@@ -954,7 +954,7 @@ static int
 iavf_start_queues(struct rte_eth_dev *dev)
 {
struct iavf_rx_queue *rxq;
-   struct iavf_tx_queue *txq;
+   struct ci_tx_queue *txq;
int i;
uint16_t nb_txq, nb_rxq;
 
@@ -1885,7 +1885,7 @@ iavf_dev_update_mbuf_stats(struct rte_eth_dev *ethdev,
struct iavf_mbuf_stats *mbuf_stats)
 {
uint16_t idx;
-   struct iavf_tx_queue *txq;
+   struct ci_tx_queue *txq;
 
for (idx = 0; idx < ethdev->data->nb_tx_queues; idx++) {
txq = ethdev->data->tx_queues[idx];
diff --git a/drivers/net/iavf/iavf_rxtx.c b/drivers/net/iavf/iavf_rxtx.c
index 6eda91e76b..7e381b2a17 100644
--- a/drivers/net/iavf/iavf_rxtx.c
+++ b/drivers/net/iavf/iavf_rxtx.c
@@ -213,7 +213,7 @@ check_rx_vec_allow(struct iavf_rx_queue *rxq)
 }
 
 static inline bool
-check_tx_vec_allow(struct iavf_tx_queue *txq)
+check_tx_vec_allow(struct ci_tx_queue *txq)
 {
if (!(txq->offloads & IAVF_TX_NO_VECTOR_FLAGS) &&
txq->tx_rs_thresh >= IAVF_VPMD_TX_MAX_BURST &&
@@ -282,7 +282,7 @@ reset_rx_queue(struct iavf_rx_queue *rxq)
 }
 
 static inline void
-reset_tx_queue(struct iavf_tx_queue *txq)
+reset_tx_queue(struct ci_tx_queue *txq)
 {
struct ci_tx_entry *txe;
uint32_t i, size;
@@ -388,7 +388,7 @@ release_rxq_mbufs(struct iavf_rx_queue *rxq)
 }
 
 static inline void
-release_txq_mbufs(struct iavf_tx_queue *txq)
+release_txq_mbufs(struct ci_tx_queue *txq)
 {
uint16_t i;
 
@@ -778,7 +778,7 @@ iavf_dev_tx_queue_setup(struct rte_eth_dev *dev,
struct iavf_info *vf =
IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
struct iavf_vsi *vsi = &vf->vsi;
-   struct iavf_tx_queue *txq;
+   struct ci_tx_queue *txq;
const struct rte_memzone *mz;
uint32_t ring_size;
uint16_t tx_rs_thresh, tx_free_thresh;
@@ -814,7 +814,7 @@ i

[PATCH v1 08/21] net/ixgbe: convert Tx queue context cache field to ptr

2024-12-02 Thread Bruce Richardson
Rather than having a two element array of context cache values inside
the Tx queue structure, convert it to a pointer to a cache at the end of
the structure. This makes future merging of the structure easier as we
don't need the "ixgbe_advctx_info" struct defined when defining a
combined queue structure.

Signed-off-by: Bruce Richardson 
---
 drivers/net/ixgbe/ixgbe_rxtx.c | 7 ---
 drivers/net/ixgbe/ixgbe_rxtx.h | 4 ++--
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c
index f7ddbba1b6..2ca26cd132 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx.c
+++ b/drivers/net/ixgbe/ixgbe_rxtx.c
@@ -2522,8 +2522,7 @@ ixgbe_reset_tx_queue(struct ixgbe_tx_queue *txq)
txq->last_desc_cleaned = (uint16_t)(txq->nb_tx_desc - 1);
txq->nb_tx_free = (uint16_t)(txq->nb_tx_desc - 1);
txq->ctx_curr = 0;
-   memset((void *)&txq->ctx_cache, 0,
-   IXGBE_CTX_NUM * sizeof(struct ixgbe_advctx_info));
+   memset(txq->ctx_cache, 0, IXGBE_CTX_NUM * sizeof(struct 
ixgbe_advctx_info));
 }
 
 static const struct ixgbe_txq_ops def_txq_ops = {
@@ -2741,10 +2740,12 @@ ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
}
 
/* First allocate the tx queue data structure */
-   txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct 
ixgbe_tx_queue),
+   txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct 
ixgbe_tx_queue) +
+   sizeof(struct ixgbe_advctx_info) * 
IXGBE_CTX_NUM,
 RTE_CACHE_LINE_SIZE, socket_id);
if (txq == NULL)
return -ENOMEM;
+   txq->ctx_cache = RTE_PTR_ADD(txq, sizeof(struct ixgbe_tx_queue));
 
/*
 * Allocate TX ring hardware descriptors. A memzone large enough to
diff --git a/drivers/net/ixgbe/ixgbe_rxtx.h b/drivers/net/ixgbe/ixgbe_rxtx.h
index f6bae37cf3..847cacf7b5 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx.h
+++ b/drivers/net/ixgbe/ixgbe_rxtx.h
@@ -215,8 +215,8 @@ struct ixgbe_tx_queue {
uint8_t wthresh;   /**< Write-back threshold reg. */
uint64_t offloads; /**< Tx offload flags of RTE_ETH_TX_OFFLOAD_* */
uint32_tctx_curr;  /**< Hardware context states. */
-   /** Hardware context0 history. */
-   struct ixgbe_advctx_info ctx_cache[IXGBE_CTX_NUM];
+   /** Hardware context history. */
+   struct ixgbe_advctx_info *ctx_cache;
const struct ixgbe_txq_ops *ops;   /**< txq ops */
booltx_deferred_start; /**< not in global dev start. */
 #ifdef RTE_LIB_SECURITY
-- 
2.43.0



[PATCH v1 10/21] net/_common_intel: pack Tx queue structure

2024-12-02 Thread Bruce Richardson
Move some fields about to better pack the Tx queue structure and make
sure all data used by the vector codepaths is on the first cacheline of
the structure. Checking with "pahole" on 64-bit build, only one 6-byte
hole is left in the structure - on second cacheline - after this patch.

As part of the reordering, move the p/h/wthresh values to the
ixgbe-specific part of the union. That is the only driver which actually
uses those values. i40e and ice drivers just record the values for later
return, so we can drop them from the Tx queue structure for those
drivers and just report the defaults in all cases.

Signed-off-by: Bruce Richardson 
---
 drivers/net/_common_intel/tx.h | 12 +---
 drivers/net/i40e/i40e_rxtx.c   |  9 +++--
 drivers/net/ice/ice_rxtx.c |  9 +++--
 3 files changed, 11 insertions(+), 19 deletions(-)

diff --git a/drivers/net/_common_intel/tx.h b/drivers/net/_common_intel/tx.h
index 51ae3b051d..c372d2838b 100644
--- a/drivers/net/_common_intel/tx.h
+++ b/drivers/net/_common_intel/tx.h
@@ -41,7 +41,6 @@ struct ci_tx_queue {
struct ci_tx_entry *sw_ring; /* virtual address of SW ring */
struct ci_tx_entry_vec *sw_ring_vec;
};
-   rte_iova_t tx_ring_dma;/* TX ring DMA address */
uint16_t nb_tx_desc;   /* number of TX descriptors */
uint16_t tx_tail; /* current value of tail register */
uint16_t nb_tx_used; /* number of TX desc used since RS bit set */
@@ -55,16 +54,14 @@ struct ci_tx_queue {
uint16_t tx_free_thresh;
/* Number of TX descriptors to use before RS bit is set. */
uint16_t tx_rs_thresh;
-   uint8_t pthresh;   /**< Prefetch threshold register. */
-   uint8_t hthresh;   /**< Host threshold register. */
-   uint8_t wthresh;   /**< Write-back threshold reg. */
uint16_t port_id;  /* Device port identifier. */
uint16_t queue_id; /* TX queue index. */
uint16_t reg_idx;
-   uint64_t offloads;
uint16_t tx_next_dd;
uint16_t tx_next_rs;
+   uint64_t offloads;
uint64_t mbuf_errors;
+   rte_iova_t tx_ring_dma;/* TX ring DMA address */
bool tx_deferred_start; /* don't start this queue in dev start */
bool q_set; /* indicate if tx queue has been configured */
union {  /* the VSI this queue belongs to */
@@ -95,9 +92,10 @@ struct ci_tx_queue {
const struct ixgbe_txq_ops *ops;
struct ixgbe_advctx_info *ctx_cache;
uint32_t ctx_curr;
-#ifdef RTE_LIB_SECURITY
+   uint8_t pthresh;   /**< Prefetch threshold register. */
+   uint8_t hthresh;   /**< Host threshold register. */
+   uint8_t wthresh;   /**< Write-back threshold reg. */
uint8_t using_ipsec;  /**< indicates that IPsec TX 
feature is in use */
-#endif
};
};
 };
diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c
index 305bc53480..539b170266 100644
--- a/drivers/net/i40e/i40e_rxtx.c
+++ b/drivers/net/i40e/i40e_rxtx.c
@@ -2539,9 +2539,6 @@ i40e_dev_tx_queue_setup(struct rte_eth_dev *dev,
txq->nb_tx_desc = nb_desc;
txq->tx_rs_thresh = tx_rs_thresh;
txq->tx_free_thresh = tx_free_thresh;
-   txq->pthresh = tx_conf->tx_thresh.pthresh;
-   txq->hthresh = tx_conf->tx_thresh.hthresh;
-   txq->wthresh = tx_conf->tx_thresh.wthresh;
txq->queue_id = queue_idx;
txq->reg_idx = reg_idx;
txq->port_id = dev->data->port_id;
@@ -3310,9 +3307,9 @@ i40e_txq_info_get(struct rte_eth_dev *dev, uint16_t 
queue_id,
 
qinfo->nb_desc = txq->nb_tx_desc;
 
-   qinfo->conf.tx_thresh.pthresh = txq->pthresh;
-   qinfo->conf.tx_thresh.hthresh = txq->hthresh;
-   qinfo->conf.tx_thresh.wthresh = txq->wthresh;
+   qinfo->conf.tx_thresh.pthresh = I40E_DEFAULT_TX_PTHRESH;
+   qinfo->conf.tx_thresh.hthresh = I40E_DEFAULT_TX_HTHRESH;
+   qinfo->conf.tx_thresh.wthresh = I40E_DEFAULT_TX_WTHRESH;
 
qinfo->conf.tx_free_thresh = txq->tx_free_thresh;
qinfo->conf.tx_rs_thresh = txq->tx_rs_thresh;
diff --git a/drivers/net/ice/ice_rxtx.c b/drivers/net/ice/ice_rxtx.c
index bcc7c7a016..e2e147ba3e 100644
--- a/drivers/net/ice/ice_rxtx.c
+++ b/drivers/net/ice/ice_rxtx.c
@@ -1492,9 +1492,6 @@ ice_tx_queue_setup(struct rte_eth_dev *dev,
txq->nb_tx_desc = nb_desc;
txq->tx_rs_thresh = tx_rs_thresh;
txq->tx_free_thresh = tx_free_thresh;
-   txq->pthresh = tx_conf->tx_thresh.pthresh;
-   txq->hthresh = tx_conf->tx_thresh.hthresh;
-   txq->wthresh = tx_conf->tx_thresh.wthresh;
txq->queue_id = queue_idx;
 
txq->reg_idx = vsi->base_queue + queue_idx;
@@ -1583,9 +1580,9 @@ ice_txq_info_get(struct rte_eth_dev *dev, uint16_t 
queue_id,
 
qinfo->nb_desc = txq->nb_tx_desc;
 
-   qinfo->conf.tx_th

[PATCH v1 14/21] net/ice: move Tx queue mbuf cleanup fn to common

2024-12-02 Thread Bruce Richardson
The functions to loop over the Tx queue and clean up all the mbufs on
it, e.g. for queue shutdown, is not device specific and so can move into
the common_intel headers. Only complication is ensuring that the
correct ring format, either minimal vector or full structure, is used.
Ice driver currently uses two functions and a function pointer to help
with this - though actually one of those functions uses a further check
inside it - so we can simplify this down to just one common function,
with a flag set in the appropriate place. This avoids checking for
AVX-512-specific functions, which were the only function using the
smaller struct in this driver.

Signed-off-by: Bruce Richardson 
---
 drivers/net/_common_intel/tx.h| 49 -
 drivers/net/ice/ice_dcf_ethdev.c  |  5 +--
 drivers/net/ice/ice_ethdev.h  |  3 +-
 drivers/net/ice/ice_rxtx.c| 33 +
 drivers/net/ice/ice_rxtx_vec_common.h | 51 ---
 drivers/net/ice/ice_rxtx_vec_sse.c|  4 +--
 6 files changed, 61 insertions(+), 84 deletions(-)

diff --git a/drivers/net/_common_intel/tx.h b/drivers/net/_common_intel/tx.h
index 21f4d71e50..2a34ec267d 100644
--- a/drivers/net/_common_intel/tx.h
+++ b/drivers/net/_common_intel/tx.h
@@ -65,6 +65,8 @@ struct ci_tx_queue {
rte_iova_t tx_ring_dma;/* TX ring DMA address */
bool tx_deferred_start; /* don't start this queue in dev start */
bool q_set; /* indicate if tx queue has been configured */
+   bool vector_tx; /* port is using vector TX */
+   bool vector_sw_ring;/* port is using vectorized SW ring 
(ieth_tx_entry_vec) */
union {  /* the VSI this queue belongs to */
struct i40e_vsi *i40e_vsi;
struct iavf_vsi *iavf_vsi;
@@ -74,7 +76,6 @@ struct ci_tx_queue {
 
union {
struct { /* ICE driver specific values */
-   ice_tx_release_mbufs_t tx_rel_mbufs;
uint32_t q_teid; /* TX schedule node id. */
};
struct { /* I40E driver specific values */
@@ -271,4 +272,50 @@ ci_tx_free_bufs_vec(struct ci_tx_queue *txq, 
ci_desc_done_fn desc_done, bool ctx
return txq->tx_rs_thresh;
 }
 
+#define IETH_FREE_BUFS_LOOP(txq, swr, start) do { \
+   uint16_t i = start; \
+   if (txq->tx_tail < i) { \
+   for (; i < txq->nb_tx_desc; i++) { \
+   rte_pktmbuf_free_seg(swr[i].mbuf); \
+   swr[i].mbuf = NULL; \
+   } \
+   i = 0; \
+   } \
+   for (; i < txq->tx_tail; i++) { \
+   rte_pktmbuf_free_seg(swr[i].mbuf); \
+   swr[i].mbuf = NULL; \
+   } \
+} while (0)
+
+static inline void
+ci_txq_release_all_mbufs(struct ci_tx_queue *txq)
+{
+   if (unlikely(!txq || !txq->sw_ring))
+   return;
+
+   if (!txq->vector_tx) {
+   for (uint16_t i = 0; i < txq->nb_tx_desc; i++) {
+   if (txq->sw_ring[i].mbuf != NULL) {
+   rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
+   txq->sw_ring[i].mbuf = NULL;
+   }
+   }
+   return;
+   }
+
+   /**
+*  vPMD tx will not set sw_ring's mbuf to NULL after free,
+*  so need to free remains more carefully.
+*/
+   const uint16_t start = txq->tx_next_dd - txq->tx_rs_thresh + 1;
+
+   if (txq->vector_sw_ring) {
+   struct ci_tx_entry_vec *swr = txq->sw_ring_vec;
+   IETH_FREE_BUFS_LOOP(txq, swr, start);
+   } else {
+   struct ci_tx_entry *swr = txq->sw_ring;
+   IETH_FREE_BUFS_LOOP(txq, swr, start);
+   }
+}
+
 #endif /* _COMMON_INTEL_TX_H_ */
diff --git a/drivers/net/ice/ice_dcf_ethdev.c b/drivers/net/ice/ice_dcf_ethdev.c
index a0c065d78c..c20399cd84 100644
--- a/drivers/net/ice/ice_dcf_ethdev.c
+++ b/drivers/net/ice/ice_dcf_ethdev.c
@@ -24,6 +24,7 @@
 #include "ice_generic_flow.h"
 #include "ice_dcf_ethdev.h"
 #include "ice_rxtx.h"
+#include "_common_intel/tx.h"
 
 #define DCF_NUM_MACADDR_MAX  64
 
@@ -500,7 +501,7 @@ ice_dcf_tx_queue_stop(struct rte_eth_dev *dev, uint16_t 
tx_queue_id)
}
 
txq = dev->data->tx_queues[tx_queue_id];
-   txq->tx_rel_mbufs(txq);
+   ci_txq_release_all_mbufs(txq);
reset_tx_queue(txq);
dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
 
@@ -650,7 +651,7 @@ ice_dcf_stop_queues(struct rte_eth_dev *dev)
txq = dev->data->tx_queues[i];
if (!txq)
continue;
-   txq->tx_rel_mbufs(txq);
+   ci_txq_release_all_mbufs(txq);
reset_tx_queue(txq);
dev->data->tx_queue_s

[PATCH v1 12/21] net/_common_intel: add Tx buffer free fn for AVX-512

2024-12-02 Thread Bruce Richardson
AVX-512 code paths for ice and i40e drivers are common, and differ from
the regular post-Tx free function in that the SW ring from which the
buffers are freed does not contain anything other than the mbuf pointer.
Merge these into a common function in intel_common to reduce
duplication.

Signed-off-by: Bruce Richardson 
---
 drivers/net/_common_intel/tx.h  |  93 +++
 drivers/net/i40e/i40e_rxtx_vec_avx512.c | 114 +--
 drivers/net/ice/ice_rxtx_vec_avx512.c   | 117 +---
 3 files changed, 95 insertions(+), 229 deletions(-)

diff --git a/drivers/net/_common_intel/tx.h b/drivers/net/_common_intel/tx.h
index a930309c05..145501834a 100644
--- a/drivers/net/_common_intel/tx.h
+++ b/drivers/net/_common_intel/tx.h
@@ -178,4 +178,97 @@ ci_tx_free_bufs(struct ci_tx_queue *txq, ci_desc_done_fn 
desc_done)
return txq->tx_rs_thresh;
 }
 
+static __rte_always_inline int
+ci_tx_free_bufs_vec(struct ci_tx_queue *txq, ci_desc_done_fn desc_done)
+{
+   int nb_free = 0;
+   struct rte_mbuf *free[IETH_VPMD_TX_MAX_FREE_BUF];
+   struct rte_mbuf *m;
+
+   /* check DD bits on threshold descriptor */
+   if (!desc_done(txq, txq->tx_next_dd))
+   return 0;
+
+   const uint32_t n = txq->tx_rs_thresh;
+
+   /* first buffer to free from S/W ring is at index
+* tx_next_dd - (tx_rs_thresh - 1)
+*/
+   struct ci_tx_entry_vec *txep = txq->sw_ring_vec;
+   txep += txq->tx_next_dd - (n - 1);
+
+   if (txq->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE && (n & 31) == 0) 
{
+   struct rte_mempool *mp = txep[0].mbuf->pool;
+   void **cache_objs;
+   struct rte_mempool_cache *cache = rte_mempool_default_cache(mp,
+   rte_lcore_id());
+
+   if (!cache || cache->len == 0)
+   goto normal;
+
+   cache_objs = &cache->objs[cache->len];
+
+   if (n > RTE_MEMPOOL_CACHE_MAX_SIZE) {
+   rte_mempool_ops_enqueue_bulk(mp, (void *)txep, n);
+   goto done;
+   }
+
+   /* The cache follows the following algorithm
+*   1. Add the objects to the cache
+*   2. Anything greater than the cache min value (if it
+*   crosses the cache flush threshold) is flushed to the ring.
+*/
+   /* Add elements back into the cache */
+   uint32_t copied = 0;
+   /* n is multiple of 32 */
+   while (copied < n) {
+   memcpy(&cache_objs[copied], &txep[copied], 32 * 
sizeof(void *));
+   copied += 32;
+   }
+   cache->len += n;
+
+   if (cache->len >= cache->flushthresh) {
+   rte_mempool_ops_enqueue_bulk(mp, 
&cache->objs[cache->size],
+cache->len - cache->size);
+   cache->len = cache->size;
+   }
+   goto done;
+   }
+
+normal:
+   m = rte_pktmbuf_prefree_seg(txep[0].mbuf);
+   if (likely(m)) {
+   free[0] = m;
+   nb_free = 1;
+   for (uint32_t i = 1; i < n; i++) {
+   m = rte_pktmbuf_prefree_seg(txep[i].mbuf);
+   if (likely(m)) {
+   if (likely(m->pool == free[0]->pool)) {
+   free[nb_free++] = m;
+   } else {
+   rte_mempool_put_bulk(free[0]->pool, 
(void *)free, nb_free);
+   free[0] = m;
+   nb_free = 1;
+   }
+   }
+   }
+   rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
+   } else {
+   for (uint32_t i = 1; i < n; i++) {
+   m = rte_pktmbuf_prefree_seg(txep[i].mbuf);
+   if (m)
+   rte_mempool_put(m->pool, m);
+   }
+   }
+
+done:
+   /* buffers were freed, update counters */
+   txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
+   txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
+   if (txq->tx_next_dd >= txq->nb_tx_desc)
+   txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
+
+   return txq->tx_rs_thresh;
+}
+
 #endif /* _COMMON_INTEL_TX_H_ */
diff --git a/drivers/net/i40e/i40e_rxtx_vec_avx512.c 
b/drivers/net/i40e/i40e_rxtx_vec_avx512.c
index a3f6d1667f..9bb2a44231 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_avx512.c
+++ b/drivers/net/i40e/i40e_rxtx_vec_avx512.c
@@ -754,118 +754,6 @@ i40e_recv_scattered_pkts_vec_avx512(void *rx_queue,
rx_pkts + retval, nb_pkts);
 }
 
-static __rte_always_inline int
-i40e_tx_f

[PATCH v1 15/21] net/i40e: use common Tx queue mbuf cleanup fn

2024-12-02 Thread Bruce Richardson
Update driver to be similar to the "ice" driver and use the common mbuf
ring cleanup code on shutdown of a Tx queue.

Signed-off-by: Bruce Richardson 
---
 drivers/net/i40e/i40e_ethdev.h |  4 +-
 drivers/net/i40e/i40e_rxtx.c   | 70 --
 drivers/net/i40e/i40e_rxtx.h   |  1 -
 3 files changed, 9 insertions(+), 66 deletions(-)

diff --git a/drivers/net/i40e/i40e_ethdev.h b/drivers/net/i40e/i40e_ethdev.h
index d351193ed9..ccc8732d7d 100644
--- a/drivers/net/i40e/i40e_ethdev.h
+++ b/drivers/net/i40e/i40e_ethdev.h
@@ -1260,12 +1260,12 @@ struct i40e_adapter {
 
/* For RSS reta table update */
uint8_t rss_reta_updated;
-#ifdef RTE_ARCH_X86
+
+   /* used only on x86, zero on other architectures */
bool rx_use_avx2;
bool rx_use_avx512;
bool tx_use_avx2;
bool tx_use_avx512;
-#endif
 };
 
 /**
diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c
index 539b170266..b70919c5dc 100644
--- a/drivers/net/i40e/i40e_rxtx.c
+++ b/drivers/net/i40e/i40e_rxtx.c
@@ -1875,6 +1875,7 @@ i40e_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t 
tx_queue_id)
int err;
struct ci_tx_queue *txq;
struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+   const struct i40e_adapter *ad = 
I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
 
PMD_INIT_FUNC_TRACE();
 
@@ -1889,6 +1890,9 @@ i40e_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t 
tx_queue_id)
PMD_DRV_LOG(WARNING, "TX queue %u is deferred start",
tx_queue_id);
 
+   txq->vector_tx = ad->tx_vec_allowed;
+   txq->vector_sw_ring = ad->tx_use_avx512;
+
/*
 * tx_queue_id is queue id application refers to, while
 * rxq->reg_idx is the real queue index.
@@ -1929,7 +1933,7 @@ i40e_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t 
tx_queue_id)
return err;
}
 
-   i40e_tx_queue_release_mbufs(txq);
+   ci_txq_release_all_mbufs(txq);
i40e_reset_tx_queue(txq);
dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
 
@@ -2604,7 +2608,7 @@ i40e_tx_queue_release(void *txq)
return;
}
 
-   i40e_tx_queue_release_mbufs(q);
+   ci_txq_release_all_mbufs(q);
rte_free(q->sw_ring);
rte_memzone_free(q->mz);
rte_free(q);
@@ -2701,66 +2705,6 @@ i40e_reset_rx_queue(struct i40e_rx_queue *rxq)
rxq->rxrearm_nb = 0;
 }
 
-void
-i40e_tx_queue_release_mbufs(struct ci_tx_queue *txq)
-{
-   struct rte_eth_dev *dev;
-   uint16_t i;
-
-   if (!txq || !txq->sw_ring) {
-   PMD_DRV_LOG(DEBUG, "Pointer to txq or sw_ring is NULL");
-   return;
-   }
-
-   dev = &rte_eth_devices[txq->port_id];
-
-   /**
-*  vPMD tx will not set sw_ring's mbuf to NULL after free,
-*  so need to free remains more carefully.
-*/
-#ifdef CC_AVX512_SUPPORT
-   if (dev->tx_pkt_burst == i40e_xmit_pkts_vec_avx512) {
-   struct ci_tx_entry_vec *swr = (void *)txq->sw_ring;
-
-   i = txq->tx_next_dd - txq->tx_rs_thresh + 1;
-   if (txq->tx_tail < i) {
-   for (; i < txq->nb_tx_desc; i++) {
-   rte_pktmbuf_free_seg(swr[i].mbuf);
-   swr[i].mbuf = NULL;
-   }
-   i = 0;
-   }
-   for (; i < txq->tx_tail; i++) {
-   rte_pktmbuf_free_seg(swr[i].mbuf);
-   swr[i].mbuf = NULL;
-   }
-   return;
-   }
-#endif
-   if (dev->tx_pkt_burst == i40e_xmit_pkts_vec_avx2 ||
-   dev->tx_pkt_burst == i40e_xmit_pkts_vec) {
-   i = txq->tx_next_dd - txq->tx_rs_thresh + 1;
-   if (txq->tx_tail < i) {
-   for (; i < txq->nb_tx_desc; i++) {
-   rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
-   txq->sw_ring[i].mbuf = NULL;
-   }
-   i = 0;
-   }
-   for (; i < txq->tx_tail; i++) {
-   rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
-   txq->sw_ring[i].mbuf = NULL;
-   }
-   } else {
-   for (i = 0; i < txq->nb_tx_desc; i++) {
-   if (txq->sw_ring[i].mbuf) {
-   rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
-   txq->sw_ring[i].mbuf = NULL;
-   }
-   }
-   }
-}
-
 static int
 i40e_tx_done_cleanup_full(struct ci_tx_queue *txq,
uint32_t free_cnt)
@@ -3127,7 +3071,7 @@ i40e_dev_clear_queues(struct rte_eth_dev *dev)
for (i = 0; i < dev->data->nb_tx_queues; i++) {
if (!dev->data->tx_queues[i])
 

[PATCH v1 16/21] net/ixgbe: use common Tx queue mbuf cleanup fn

2024-12-02 Thread Bruce Richardson
Update driver to use the common cleanup function.

Signed-off-by: Bruce Richardson 
---
 drivers/net/ixgbe/ixgbe_rxtx.c| 22 +++---
 drivers/net/ixgbe/ixgbe_rxtx.h|  1 -
 drivers/net/ixgbe/ixgbe_rxtx_vec_common.h | 28 ++-
 drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c   |  7 --
 drivers/net/ixgbe/ixgbe_rxtx_vec_sse.c|  7 --
 5 files changed, 5 insertions(+), 60 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c
index f8f5f42e5c..5ab62808a0 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx.c
+++ b/drivers/net/ixgbe/ixgbe_rxtx.c
@@ -2334,21 +2334,6 @@ ixgbe_recv_pkts_lro_bulk_alloc(void *rx_queue, struct 
rte_mbuf **rx_pkts,
  *
  **/
 
-static void __rte_cold
-ixgbe_tx_queue_release_mbufs(struct ci_tx_queue *txq)
-{
-   unsigned i;
-
-   if (txq->sw_ring != NULL) {
-   for (i = 0; i < txq->nb_tx_desc; i++) {
-   if (txq->sw_ring[i].mbuf != NULL) {
-   rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
-   txq->sw_ring[i].mbuf = NULL;
-   }
-   }
-   }
-}
-
 static int
 ixgbe_tx_done_cleanup_full(struct ci_tx_queue *txq, uint32_t free_cnt)
 {
@@ -2472,7 +2457,7 @@ static void __rte_cold
 ixgbe_tx_queue_release(struct ci_tx_queue *txq)
 {
if (txq != NULL && txq->ops != NULL) {
-   txq->ops->release_mbufs(txq);
+   ci_txq_release_all_mbufs(txq);
txq->ops->free_swring(txq);
rte_memzone_free(txq->mz);
rte_free(txq);
@@ -2526,7 +2511,6 @@ ixgbe_reset_tx_queue(struct ci_tx_queue *txq)
 }
 
 static const struct ixgbe_txq_ops def_txq_ops = {
-   .release_mbufs = ixgbe_tx_queue_release_mbufs,
.free_swring = ixgbe_tx_free_swring,
.reset = ixgbe_reset_tx_queue,
 };
@@ -3380,7 +3364,7 @@ ixgbe_dev_clear_queues(struct rte_eth_dev *dev)
struct ci_tx_queue *txq = dev->data->tx_queues[i];
 
if (txq != NULL) {
-   txq->ops->release_mbufs(txq);
+   ci_txq_release_all_mbufs(txq);
txq->ops->reset(txq);
dev->data->tx_queue_state[i] = 
RTE_ETH_QUEUE_STATE_STOPPED;
}
@@ -5655,7 +5639,7 @@ ixgbe_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t 
tx_queue_id)
}
 
if (txq->ops != NULL) {
-   txq->ops->release_mbufs(txq);
+   ci_txq_release_all_mbufs(txq);
txq->ops->reset(txq);
}
dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
diff --git a/drivers/net/ixgbe/ixgbe_rxtx.h b/drivers/net/ixgbe/ixgbe_rxtx.h
index 4333e5bf2f..11689eb432 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx.h
+++ b/drivers/net/ixgbe/ixgbe_rxtx.h
@@ -181,7 +181,6 @@ struct ixgbe_advctx_info {
 };
 
 struct ixgbe_txq_ops {
-   void (*release_mbufs)(struct ci_tx_queue *txq);
void (*free_swring)(struct ci_tx_queue *txq);
void (*reset)(struct ci_tx_queue *txq);
 };
diff --git a/drivers/net/ixgbe/ixgbe_rxtx_vec_common.h 
b/drivers/net/ixgbe/ixgbe_rxtx_vec_common.h
index 81fd8bb64d..65794e45cb 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx_vec_common.h
+++ b/drivers/net/ixgbe/ixgbe_rxtx_vec_common.h
@@ -78,32 +78,6 @@ tx_backlog_entry(struct ci_tx_entry_vec *txep,
txep[i].mbuf = tx_pkts[i];
 }
 
-static inline void
-_ixgbe_tx_queue_release_mbufs_vec(struct ci_tx_queue *txq)
-{
-   unsigned int i;
-   struct ci_tx_entry_vec *txe;
-   const uint16_t max_desc = (uint16_t)(txq->nb_tx_desc - 1);
-
-   if (txq->sw_ring == NULL || txq->nb_tx_free == max_desc)
-   return;
-
-   /* release the used mbufs in sw_ring */
-   for (i = txq->tx_next_dd - (txq->tx_rs_thresh - 1);
-i != txq->tx_tail;
-i = (i + 1) % txq->nb_tx_desc) {
-   txe = &txq->sw_ring_vec[i];
-   rte_pktmbuf_free_seg(txe->mbuf);
-   }
-   txq->nb_tx_free = max_desc;
-
-   /* reset tx_entry */
-   for (i = 0; i < txq->nb_tx_desc; i++) {
-   txe = &txq->sw_ring_vec[i];
-   txe->mbuf = NULL;
-   }
-}
-
 static inline void
 _ixgbe_rx_queue_release_mbufs_vec(struct ixgbe_rx_queue *rxq)
 {
@@ -208,6 +182,8 @@ ixgbe_txq_vec_setup_default(struct ci_tx_queue *txq,
/* leave the first one for overflow */
txq->sw_ring_vec = txq->sw_ring_vec + 1;
txq->ops = txq_ops;
+   txq->vector_tx = 1;
+   txq->vector_sw_ring = 1;
 
return 0;
 }
diff --git a/drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c 
b/drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c
index cb749a3760..2ccb399b64 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c
+++ b/drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c
@@ -633,12 +633,6 @@ ixgbe_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf 

[PATCH v1 17/21] net/iavf: use common Tx queue mbuf cleanup fn

2024-12-02 Thread Bruce Richardson
Adjust iavf driver to also use the common mbuf freeing functions on Tx
queue release/cleanup. The implementation is complicated a little by the
need to integrate the additional "has_ctx" parameter for the iavf code,
but changes in other drivers are minimal - just a constant "false"
parameter.

Signed-off-by: Bruce Richardson 
---
 drivers/net/_common_intel/tx.h  | 27 +-
 drivers/net/i40e/i40e_rxtx.c|  6 ++--
 drivers/net/iavf/iavf_rxtx.c| 37 ++---
 drivers/net/iavf/iavf_rxtx_vec_avx512.c | 24 ++--
 drivers/net/iavf/iavf_rxtx_vec_common.h | 18 
 drivers/net/iavf/iavf_rxtx_vec_sse.c|  9 ++
 drivers/net/ice/ice_dcf_ethdev.c|  4 +--
 drivers/net/ice/ice_rxtx.c  |  6 ++--
 drivers/net/ixgbe/ixgbe_rxtx.c  |  6 ++--
 9 files changed, 31 insertions(+), 106 deletions(-)

diff --git a/drivers/net/_common_intel/tx.h b/drivers/net/_common_intel/tx.h
index 2a34ec267d..279eb6ea67 100644
--- a/drivers/net/_common_intel/tx.h
+++ b/drivers/net/_common_intel/tx.h
@@ -272,23 +272,23 @@ ci_tx_free_bufs_vec(struct ci_tx_queue *txq, 
ci_desc_done_fn desc_done, bool ctx
return txq->tx_rs_thresh;
 }
 
-#define IETH_FREE_BUFS_LOOP(txq, swr, start) do { \
+#define IETH_FREE_BUFS_LOOP(swr, nb_desc, start, end) do { \
uint16_t i = start; \
-   if (txq->tx_tail < i) { \
-   for (; i < txq->nb_tx_desc; i++) { \
+   if (end < i) { \
+   for (; i < nb_desc; i++) { \
rte_pktmbuf_free_seg(swr[i].mbuf); \
swr[i].mbuf = NULL; \
} \
i = 0; \
} \
-   for (; i < txq->tx_tail; i++) { \
+   for (; i < end; i++) { \
rte_pktmbuf_free_seg(swr[i].mbuf); \
swr[i].mbuf = NULL; \
} \
 } while (0)
 
 static inline void
-ci_txq_release_all_mbufs(struct ci_tx_queue *txq)
+ci_txq_release_all_mbufs(struct ci_tx_queue *txq, bool use_ctx)
 {
if (unlikely(!txq || !txq->sw_ring))
return;
@@ -307,15 +307,14 @@ ci_txq_release_all_mbufs(struct ci_tx_queue *txq)
 *  vPMD tx will not set sw_ring's mbuf to NULL after free,
 *  so need to free remains more carefully.
 */
-   const uint16_t start = txq->tx_next_dd - txq->tx_rs_thresh + 1;
-
-   if (txq->vector_sw_ring) {
-   struct ci_tx_entry_vec *swr = txq->sw_ring_vec;
-   IETH_FREE_BUFS_LOOP(txq, swr, start);
-   } else {
-   struct ci_tx_entry *swr = txq->sw_ring;
-   IETH_FREE_BUFS_LOOP(txq, swr, start);
-   }
+   const uint16_t start = (txq->tx_next_dd - txq->tx_rs_thresh + 1) >> 
use_ctx;
+   const uint16_t nb_desc = txq->nb_tx_desc >> use_ctx;
+   const uint16_t end = txq->tx_tail >> use_ctx;
+
+   if (txq->vector_sw_ring)
+   IETH_FREE_BUFS_LOOP(txq->sw_ring_vec, nb_desc, start, end);
+   else
+   IETH_FREE_BUFS_LOOP(txq->sw_ring, nb_desc, start, end);
 }
 
 #endif /* _COMMON_INTEL_TX_H_ */
diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c
index b70919c5dc..081d743e62 100644
--- a/drivers/net/i40e/i40e_rxtx.c
+++ b/drivers/net/i40e/i40e_rxtx.c
@@ -1933,7 +1933,7 @@ i40e_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t 
tx_queue_id)
return err;
}
 
-   ci_txq_release_all_mbufs(txq);
+   ci_txq_release_all_mbufs(txq, false);
i40e_reset_tx_queue(txq);
dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
 
@@ -2608,7 +2608,7 @@ i40e_tx_queue_release(void *txq)
return;
}
 
-   ci_txq_release_all_mbufs(q);
+   ci_txq_release_all_mbufs(q, false);
rte_free(q->sw_ring);
rte_memzone_free(q->mz);
rte_free(q);
@@ -3071,7 +3071,7 @@ i40e_dev_clear_queues(struct rte_eth_dev *dev)
for (i = 0; i < dev->data->nb_tx_queues; i++) {
if (!dev->data->tx_queues[i])
continue;
-   ci_txq_release_all_mbufs(dev->data->tx_queues[i]);
+   ci_txq_release_all_mbufs(dev->data->tx_queues[i], false);
i40e_reset_tx_queue(dev->data->tx_queues[i]);
}
 
diff --git a/drivers/net/iavf/iavf_rxtx.c b/drivers/net/iavf/iavf_rxtx.c
index 7e381b2a17..f0ab881ac5 100644
--- a/drivers/net/iavf/iavf_rxtx.c
+++ b/drivers/net/iavf/iavf_rxtx.c
@@ -387,24 +387,6 @@ release_rxq_mbufs(struct iavf_rx_queue *rxq)
rxq->rx_nb_avail = 0;
 }
 
-static inline void
-release_txq_mbufs(struct ci_tx_queue *txq)
-{
-   uint16_t i;
-
-   if (!txq || !txq->sw_ring) {
-   PMD_DRV_LOG(DEBUG, "Pointer to rxq or sw_ring is NULL");
-   return;
-   }
-
-   for (i = 0; i < txq->nb_tx_desc; i++) {
-   if (txq->sw

[PATCH v1 20/21] net/iavf: use vector SW ring for all vector paths

2024-12-02 Thread Bruce Richardson
The AVX-512 code path used a smaller SW ring structure only containing
the mbuf pointer, but no other fields. The other fields are only used in
the scalar code path, so update all vector driver code paths (AVX2, SSE)
to use the smaller, faster structure.

Signed-off-by: Bruce Richardson 
---
 drivers/net/iavf/iavf_rxtx.c|  7 ---
 drivers/net/iavf/iavf_rxtx_vec_avx2.c   | 12 ++--
 drivers/net/iavf/iavf_rxtx_vec_avx512.c |  8 
 drivers/net/iavf/iavf_rxtx_vec_common.h |  6 --
 drivers/net/iavf/iavf_rxtx_vec_sse.c| 14 +++---
 5 files changed, 13 insertions(+), 34 deletions(-)

diff --git a/drivers/net/iavf/iavf_rxtx.c b/drivers/net/iavf/iavf_rxtx.c
index f0ab881ac5..6692f6992b 100644
--- a/drivers/net/iavf/iavf_rxtx.c
+++ b/drivers/net/iavf/iavf_rxtx.c
@@ -4193,14 +4193,7 @@ iavf_set_tx_function(struct rte_eth_dev *dev)
txq = dev->data->tx_queues[i];
if (!txq)
continue;
-#ifdef CC_AVX512_SUPPORT
-   if (use_avx512)
-   iavf_txq_vec_setup_avx512(txq);
-   else
-   iavf_txq_vec_setup(txq);
-#else
iavf_txq_vec_setup(txq);
-#endif
}
 
if (no_poll_on_link_down) {
diff --git a/drivers/net/iavf/iavf_rxtx_vec_avx2.c 
b/drivers/net/iavf/iavf_rxtx_vec_avx2.c
index fdb98b417a..b847886081 100644
--- a/drivers/net/iavf/iavf_rxtx_vec_avx2.c
+++ b/drivers/net/iavf/iavf_rxtx_vec_avx2.c
@@ -1736,14 +1736,14 @@ iavf_xmit_fixed_burst_vec_avx2(void *tx_queue, struct 
rte_mbuf **tx_pkts,
 {
struct ci_tx_queue *txq = (struct ci_tx_queue *)tx_queue;
volatile struct iavf_tx_desc *txdp;
-   struct ci_tx_entry *txep;
+   struct ci_tx_entry_vec *txep;
uint16_t n, nb_commit, tx_id;
/* bit2 is reserved and must be set to 1 according to Spec */
uint64_t flags = IAVF_TX_DESC_CMD_EOP | IAVF_TX_DESC_CMD_ICRC;
uint64_t rs = IAVF_TX_DESC_CMD_RS | flags;
 
if (txq->nb_tx_free < txq->tx_free_thresh)
-   iavf_tx_free_bufs(txq);
+   ci_tx_free_bufs_vec(txq, iavf_tx_desc_done, false);
 
nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
if (unlikely(nb_pkts == 0))
@@ -1752,13 +1752,13 @@ iavf_xmit_fixed_burst_vec_avx2(void *tx_queue, struct 
rte_mbuf **tx_pkts,
 
tx_id = txq->tx_tail;
txdp = &txq->iavf_tx_ring[tx_id];
-   txep = &txq->sw_ring[tx_id];
+   txep = &txq->sw_ring_vec[tx_id];
 
txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
 
n = (uint16_t)(txq->nb_tx_desc - tx_id);
if (nb_commit >= n) {
-   ci_tx_backlog_entry(txep, tx_pkts, n);
+   ci_tx_backlog_entry_vec(txep, tx_pkts, n);
 
iavf_vtx(txdp, tx_pkts, n - 1, flags, offload);
tx_pkts += (n - 1);
@@ -1773,10 +1773,10 @@ iavf_xmit_fixed_burst_vec_avx2(void *tx_queue, struct 
rte_mbuf **tx_pkts,
 
/* avoid reach the end of ring */
txdp = &txq->iavf_tx_ring[tx_id];
-   txep = &txq->sw_ring[tx_id];
+   txep = &txq->sw_ring_vec[tx_id];
}
 
-   ci_tx_backlog_entry(txep, tx_pkts, nb_commit);
+   ci_tx_backlog_entry_vec(txep, tx_pkts, nb_commit);
 
iavf_vtx(txdp, tx_pkts, nb_commit, flags, offload);
 
diff --git a/drivers/net/iavf/iavf_rxtx_vec_avx512.c 
b/drivers/net/iavf/iavf_rxtx_vec_avx512.c
index 007759e451..641f3311eb 100644
--- a/drivers/net/iavf/iavf_rxtx_vec_avx512.c
+++ b/drivers/net/iavf/iavf_rxtx_vec_avx512.c
@@ -2357,14 +2357,6 @@ iavf_xmit_pkts_vec_avx512(void *tx_queue, struct 
rte_mbuf **tx_pkts,
return iavf_xmit_pkts_vec_avx512_cmn(tx_queue, tx_pkts, nb_pkts, false);
 }
 
-int __rte_cold
-iavf_txq_vec_setup_avx512(struct ci_tx_queue *txq)
-{
-   txq->vector_tx = true;
-   txq->vector_sw_ring = true;
-   return 0;
-}
-
 uint16_t
 iavf_xmit_pkts_vec_avx512_offload(void *tx_queue, struct rte_mbuf **tx_pkts,
  uint16_t nb_pkts)
diff --git a/drivers/net/iavf/iavf_rxtx_vec_common.h 
b/drivers/net/iavf/iavf_rxtx_vec_common.h
index 6f94587eee..c69399a173 100644
--- a/drivers/net/iavf/iavf_rxtx_vec_common.h
+++ b/drivers/net/iavf/iavf_rxtx_vec_common.h
@@ -24,12 +24,6 @@ iavf_tx_desc_done(struct ci_tx_queue *txq, uint16_t idx)
rte_cpu_to_le_64(IAVF_TX_DESC_DTYPE_DESC_DONE);
 }
 
-static __rte_always_inline int
-iavf_tx_free_bufs(struct ci_tx_queue *txq)
-{
-   return ci_tx_free_bufs(txq, iavf_tx_desc_done);
-}
-
 static inline void
 _iavf_rx_queue_release_mbufs_vec(struct iavf_rx_queue *rxq)
 {
diff --git a/drivers/net/iavf/iavf_rxtx_vec_sse.c 
b/drivers/net/iavf/iavf_rxtx_vec_sse.c
index 3adf2a59e4..9f7db80bfd 100644
--- a/drivers/net/iavf/iavf_rxtx_vec_sse.c
+++ b/drivers/net/iavf/iavf_rxtx_vec_sse.c
@@ -1368,14 +1368,14 @@ iavf_xm

[PATCH v1 21/21] net/ixgbe: use common Tx backlog entry fn

2024-12-02 Thread Bruce Richardson
Remove the custom vector Tx backlog entry function and use the standard
intel_common one, now that all vector drivers are using the same,
smaller ring structure.

Signed-off-by: Bruce Richardson 
---
 drivers/net/ixgbe/ixgbe_rxtx_vec_common.h | 10 --
 drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c   |  4 ++--
 drivers/net/ixgbe/ixgbe_rxtx_vec_sse.c|  4 ++--
 3 files changed, 4 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe_rxtx_vec_common.h 
b/drivers/net/ixgbe/ixgbe_rxtx_vec_common.h
index 65794e45cb..22f77b1a4d 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx_vec_common.h
+++ b/drivers/net/ixgbe/ixgbe_rxtx_vec_common.h
@@ -68,16 +68,6 @@ ixgbe_tx_free_bufs(struct ci_tx_queue *txq)
return txq->tx_rs_thresh;
 }
 
-static __rte_always_inline void
-tx_backlog_entry(struct ci_tx_entry_vec *txep,
-struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
-{
-   int i;
-
-   for (i = 0; i < (int)nb_pkts; ++i)
-   txep[i].mbuf = tx_pkts[i];
-}
-
 static inline void
 _ixgbe_rx_queue_release_mbufs_vec(struct ixgbe_rx_queue *rxq)
 {
diff --git a/drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c 
b/drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c
index 2ccb399b64..f879f6fa9a 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c
+++ b/drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c
@@ -597,7 +597,7 @@ ixgbe_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf 
**tx_pkts,
 
n = (uint16_t)(txq->nb_tx_desc - tx_id);
if (nb_commit >= n) {
-   tx_backlog_entry(txep, tx_pkts, n);
+   ci_tx_backlog_entry_vec(txep, tx_pkts, n);
 
for (i = 0; i < n - 1; ++i, ++tx_pkts, ++txdp)
vtx1(txdp, *tx_pkts, flags);
@@ -614,7 +614,7 @@ ixgbe_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf 
**tx_pkts,
txep = &txq->sw_ring_vec[tx_id];
}
 
-   tx_backlog_entry(txep, tx_pkts, nb_commit);
+   ci_tx_backlog_entry_vec(txep, tx_pkts, nb_commit);
 
vtx(txdp, tx_pkts, nb_commit, flags);
 
diff --git a/drivers/net/ixgbe/ixgbe_rxtx_vec_sse.c 
b/drivers/net/ixgbe/ixgbe_rxtx_vec_sse.c
index fa26365f06..915358e16b 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx_vec_sse.c
+++ b/drivers/net/ixgbe/ixgbe_rxtx_vec_sse.c
@@ -720,7 +720,7 @@ ixgbe_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf 
**tx_pkts,
n = (uint16_t)(txq->nb_tx_desc - tx_id);
if (nb_commit >= n) {
 
-   tx_backlog_entry(txep, tx_pkts, n);
+   ci_tx_backlog_entry_vec(txep, tx_pkts, n);
 
for (i = 0; i < n - 1; ++i, ++tx_pkts, ++txdp)
vtx1(txdp, *tx_pkts, flags);
@@ -737,7 +737,7 @@ ixgbe_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf 
**tx_pkts,
txep = &txq->sw_ring_vec[tx_id];
}
 
-   tx_backlog_entry(txep, tx_pkts, nb_commit);
+   ci_tx_backlog_entry_vec(txep, tx_pkts, nb_commit);
 
vtx(txdp, tx_pkts, nb_commit, flags);
 
-- 
2.43.0



[PATCH v1 18/21] net/ice: use vector SW ring for all vector paths

2024-12-02 Thread Bruce Richardson
The AVX-512 code path used a smaller SW ring structure only containing
the mbuf pointer, but no other fields. The other fields are only used in
the scalar code path, so update all vector driver code paths to use the
smaller, faster structure.

Signed-off-by: Bruce Richardson 
---
 drivers/net/_common_intel/tx.h|  7 +++
 drivers/net/ice/ice_rxtx.c|  2 +-
 drivers/net/ice/ice_rxtx_vec_avx2.c   | 12 ++--
 drivers/net/ice/ice_rxtx_vec_avx512.c | 14 ++
 drivers/net/ice/ice_rxtx_vec_common.h |  6 --
 drivers/net/ice/ice_rxtx_vec_sse.c| 12 ++--
 6 files changed, 22 insertions(+), 31 deletions(-)

diff --git a/drivers/net/_common_intel/tx.h b/drivers/net/_common_intel/tx.h
index 279eb6ea67..d4054d7150 100644
--- a/drivers/net/_common_intel/tx.h
+++ b/drivers/net/_common_intel/tx.h
@@ -109,6 +109,13 @@ ci_tx_backlog_entry(struct ci_tx_entry *txep, struct 
rte_mbuf **tx_pkts, uint16_
txep[i].mbuf = tx_pkts[i];
 }
 
+static __rte_always_inline void
+ci_tx_backlog_entry_vec(struct ci_tx_entry_vec *txep, struct rte_mbuf 
**tx_pkts, uint16_t nb_pkts)
+{
+   for (uint16_t i = 0; i < nb_pkts; ++i)
+   txep[i].mbuf = tx_pkts[i];
+}
+
 #define IETH_VPMD_TX_MAX_FREE_BUF 64
 
 typedef int (*ci_desc_done_fn)(struct ci_tx_queue *txq, uint16_t idx);
diff --git a/drivers/net/ice/ice_rxtx.c b/drivers/net/ice/ice_rxtx.c
index ad0ddf6a88..77cb6688a7 100644
--- a/drivers/net/ice/ice_rxtx.c
+++ b/drivers/net/ice/ice_rxtx.c
@@ -825,7 +825,7 @@ ice_tx_queue_start(struct rte_eth_dev *dev, uint16_t 
tx_queue_id)
 
/* record what kind of descriptor cleanup we need on teardown */
txq->vector_tx = ad->tx_vec_allowed;
-   txq->vector_sw_ring = ad->tx_use_avx512;
+   txq->vector_sw_ring = txq->vector_tx;
 
dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
 
diff --git a/drivers/net/ice/ice_rxtx_vec_avx2.c 
b/drivers/net/ice/ice_rxtx_vec_avx2.c
index 12ffa0fa9a..98bab322b4 100644
--- a/drivers/net/ice/ice_rxtx_vec_avx2.c
+++ b/drivers/net/ice/ice_rxtx_vec_avx2.c
@@ -858,7 +858,7 @@ ice_xmit_fixed_burst_vec_avx2(void *tx_queue, struct 
rte_mbuf **tx_pkts,
 {
struct ci_tx_queue *txq = (struct ci_tx_queue *)tx_queue;
volatile struct ice_tx_desc *txdp;
-   struct ci_tx_entry *txep;
+   struct ci_tx_entry_vec *txep;
uint16_t n, nb_commit, tx_id;
uint64_t flags = ICE_TD_CMD;
uint64_t rs = ICE_TX_DESC_CMD_RS | ICE_TD_CMD;
@@ -867,7 +867,7 @@ ice_xmit_fixed_burst_vec_avx2(void *tx_queue, struct 
rte_mbuf **tx_pkts,
nb_pkts = RTE_MIN(nb_pkts, txq->tx_rs_thresh);
 
if (txq->nb_tx_free < txq->tx_free_thresh)
-   ice_tx_free_bufs_vec(txq);
+   ci_tx_free_bufs_vec(txq, ice_tx_desc_done, false);
 
nb_commit = nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
if (unlikely(nb_pkts == 0))
@@ -875,13 +875,13 @@ ice_xmit_fixed_burst_vec_avx2(void *tx_queue, struct 
rte_mbuf **tx_pkts,
 
tx_id = txq->tx_tail;
txdp = &txq->ice_tx_ring[tx_id];
-   txep = &txq->sw_ring[tx_id];
+   txep = &txq->sw_ring_vec[tx_id];
 
txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
 
n = (uint16_t)(txq->nb_tx_desc - tx_id);
if (nb_commit >= n) {
-   ci_tx_backlog_entry(txep, tx_pkts, n);
+   ci_tx_backlog_entry_vec(txep, tx_pkts, n);
 
ice_vtx(txdp, tx_pkts, n - 1, flags, offload);
tx_pkts += (n - 1);
@@ -896,10 +896,10 @@ ice_xmit_fixed_burst_vec_avx2(void *tx_queue, struct 
rte_mbuf **tx_pkts,
 
/* avoid reach the end of ring */
txdp = &txq->ice_tx_ring[tx_id];
-   txep = &txq->sw_ring[tx_id];
+   txep = &txq->sw_ring_vec[tx_id];
}
 
-   ci_tx_backlog_entry(txep, tx_pkts, nb_commit);
+   ci_tx_backlog_entry_vec(txep, tx_pkts, nb_commit);
 
ice_vtx(txdp, tx_pkts, nb_commit, flags, offload);
 
diff --git a/drivers/net/ice/ice_rxtx_vec_avx512.c 
b/drivers/net/ice/ice_rxtx_vec_avx512.c
index f6ec593f96..481f784e34 100644
--- a/drivers/net/ice/ice_rxtx_vec_avx512.c
+++ b/drivers/net/ice/ice_rxtx_vec_avx512.c
@@ -924,16 +924,6 @@ ice_vtx(volatile struct ice_tx_desc *txdp, struct rte_mbuf 
**pkt,
}
 }
 
-static __rte_always_inline void
-ice_tx_backlog_entry_avx512(struct ci_tx_entry_vec *txep,
-   struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
-{
-   int i;
-
-   for (i = 0; i < (int)nb_pkts; ++i)
-   txep[i].mbuf = tx_pkts[i];
-}
-
 static __rte_always_inline uint16_t
 ice_xmit_fixed_burst_vec_avx512(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts, bool do_offload)
@@ -964,7 +954,7 @@ ice_xmit_fixed_burst_vec_avx512(void *tx_queue, struct 
rte_mbuf **tx_pkts,
 
n = (uint16_t)(txq->nb_tx_desc - tx_id);
if (nb_commit >= n) {
-   ice_tx

[PATCH v1 19/21] net/i40e: use vector SW ring for all vector paths

2024-12-02 Thread Bruce Richardson
The AVX-512 code path used a smaller SW ring structure only containing
the mbuf pointer, but no other fields. The other fields are only used in
the scalar code path, so update all vector driver code paths (AVX2, SSE,
Neon, Altivec) to use the smaller, faster structure.

Signed-off-by: Bruce Richardson 
---
 drivers/net/i40e/i40e_rxtx.c |  8 +---
 drivers/net/i40e/i40e_rxtx_vec_altivec.c | 12 ++--
 drivers/net/i40e/i40e_rxtx_vec_avx2.c| 12 ++--
 drivers/net/i40e/i40e_rxtx_vec_avx512.c  | 14 ++
 drivers/net/i40e/i40e_rxtx_vec_common.h  |  6 --
 drivers/net/i40e/i40e_rxtx_vec_neon.c| 12 ++--
 drivers/net/i40e/i40e_rxtx_vec_sse.c | 12 ++--
 7 files changed, 31 insertions(+), 45 deletions(-)

diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c
index 081d743e62..745c467912 100644
--- a/drivers/net/i40e/i40e_rxtx.c
+++ b/drivers/net/i40e/i40e_rxtx.c
@@ -1891,7 +1891,7 @@ i40e_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t 
tx_queue_id)
tx_queue_id);
 
txq->vector_tx = ad->tx_vec_allowed;
-   txq->vector_sw_ring = ad->tx_use_avx512;
+   txq->vector_sw_ring = txq->vector_tx;
 
/*
 * tx_queue_id is queue id application refers to, while
@@ -3550,9 +3550,11 @@ i40e_set_tx_function(struct rte_eth_dev *dev)
}
}
 
+   if (rte_vect_get_max_simd_bitwidth() < RTE_VECT_SIMD_128)
+   ad->tx_vec_allowed = false;
+
if (ad->tx_simple_allowed) {
-   if (ad->tx_vec_allowed &&
-   rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128) {
+   if (ad->tx_vec_allowed) {
 #ifdef RTE_ARCH_X86
if (ad->tx_use_avx512) {
 #ifdef CC_AVX512_SUPPORT
diff --git a/drivers/net/i40e/i40e_rxtx_vec_altivec.c 
b/drivers/net/i40e/i40e_rxtx_vec_altivec.c
index 500bba2cef..b6900a3e15 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_altivec.c
+++ b/drivers/net/i40e/i40e_rxtx_vec_altivec.c
@@ -553,14 +553,14 @@ i40e_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf 
**tx_pkts,
 {
struct ci_tx_queue *txq = (struct ci_tx_queue *)tx_queue;
volatile struct i40e_tx_desc *txdp;
-   struct ci_tx_entry *txep;
+   struct ci_tx_entry_vec *txep;
uint16_t n, nb_commit, tx_id;
uint64_t flags = I40E_TD_CMD;
uint64_t rs = I40E_TX_DESC_CMD_RS | I40E_TD_CMD;
int i;
 
if (txq->nb_tx_free < txq->tx_free_thresh)
-   i40e_tx_free_bufs(txq);
+   ci_tx_free_bufs_vec(txq, i40e_tx_desc_done, false);
 
nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
nb_commit = nb_pkts;
@@ -569,13 +569,13 @@ i40e_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf 
**tx_pkts,
 
tx_id = txq->tx_tail;
txdp = &txq->i40e_tx_ring[tx_id];
-   txep = &txq->sw_ring[tx_id];
+   txep = &txq->sw_ring_vec[tx_id];
 
txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
 
n = (uint16_t)(txq->nb_tx_desc - tx_id);
if (nb_commit >= n) {
-   ci_tx_backlog_entry(txep, tx_pkts, n);
+   ci_tx_backlog_entry_vec(txep, tx_pkts, n);
 
for (i = 0; i < n - 1; ++i, ++tx_pkts, ++txdp)
vtx1(txdp, *tx_pkts, flags);
@@ -589,10 +589,10 @@ i40e_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf 
**tx_pkts,
 
/* avoid reach the end of ring */
txdp = &txq->i40e_tx_ring[tx_id];
-   txep = &txq->sw_ring[tx_id];
+   txep = &txq->sw_ring_vec[tx_id];
}
 
-   ci_tx_backlog_entry(txep, tx_pkts, nb_commit);
+   ci_tx_backlog_entry_vec(txep, tx_pkts, nb_commit);
 
vtx(txdp, tx_pkts, nb_commit, flags);
 
diff --git a/drivers/net/i40e/i40e_rxtx_vec_avx2.c 
b/drivers/net/i40e/i40e_rxtx_vec_avx2.c
index 29bef64287..2477573c01 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_avx2.c
+++ b/drivers/net/i40e/i40e_rxtx_vec_avx2.c
@@ -745,13 +745,13 @@ i40e_xmit_fixed_burst_vec_avx2(void *tx_queue, struct 
rte_mbuf **tx_pkts,
 {
struct ci_tx_queue *txq = (struct ci_tx_queue *)tx_queue;
volatile struct i40e_tx_desc *txdp;
-   struct ci_tx_entry *txep;
+   struct ci_tx_entry_vec *txep;
uint16_t n, nb_commit, tx_id;
uint64_t flags = I40E_TD_CMD;
uint64_t rs = I40E_TX_DESC_CMD_RS | I40E_TD_CMD;
 
if (txq->nb_tx_free < txq->tx_free_thresh)
-   i40e_tx_free_bufs(txq);
+   ci_tx_free_bufs_vec(txq, i40e_tx_desc_done, false);
 
nb_commit = nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
if (unlikely(nb_pkts == 0))
@@ -759,13 +759,13 @@ i40e_xmit_fixed_burst_vec_avx2(void *tx_queue, struct 
rte_mbuf **tx_pkts,
 
tx_id = txq->tx_tail;
txdp = &txq->i40e_tx_ring[tx_id];
-   txep = &txq->sw_ring[tx_id];
+   txep = &txq->sw_ring_vec[tx_id];
 
txq->nb_tx_free = (uint16_t

[PATCH v1 01/21] net/_common_intel: add pkt reassembly fn for intel drivers

2024-12-02 Thread Bruce Richardson
The code for reassembling a single, multi-mbuf packet from multiple
buffers received from the NIC is duplicated across many drivers. Rather
than having multiple copies of this function, we can create an
"_common_intel" directory to hold such functions and consolidate
multiple functions down to a single one for easier maintenance.

Signed-off-by: Bruce Richardson 
---
 drivers/net/_common_intel/rx.h| 81 +++
 drivers/net/i40e/i40e_rxtx_vec_altivec.c  |  4 +-
 drivers/net/i40e/i40e_rxtx_vec_avx2.c |  4 +-
 drivers/net/i40e/i40e_rxtx_vec_avx512.c   |  4 +-
 drivers/net/i40e/i40e_rxtx_vec_common.h   | 64 +-
 drivers/net/i40e/i40e_rxtx_vec_neon.c |  4 +-
 drivers/net/i40e/i40e_rxtx_vec_sse.c  |  4 +-
 drivers/net/i40e/meson.build  |  2 +-
 drivers/net/iavf/iavf_rxtx_vec_avx2.c |  8 +--
 drivers/net/iavf/iavf_rxtx_vec_avx512.c   |  8 +--
 drivers/net/iavf/iavf_rxtx_vec_common.h   | 65 +-
 drivers/net/iavf/iavf_rxtx_vec_sse.c  |  8 +--
 drivers/net/iavf/meson.build  |  2 +-
 drivers/net/ice/ice_rxtx_vec_avx2.c   |  4 +-
 drivers/net/ice/ice_rxtx_vec_avx512.c |  8 +--
 drivers/net/ice/ice_rxtx_vec_common.h | 66 +-
 drivers/net/ice/ice_rxtx_vec_sse.c|  4 +-
 drivers/net/ice/meson.build   |  2 +-
 drivers/net/ixgbe/ixgbe_rxtx_vec_common.h | 63 +-
 drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c   |  4 +-
 drivers/net/ixgbe/ixgbe_rxtx_vec_sse.c|  4 +-
 drivers/net/ixgbe/meson.build |  2 +-
 22 files changed, 123 insertions(+), 292 deletions(-)
 create mode 100644 drivers/net/_common_intel/rx.h

diff --git a/drivers/net/_common_intel/rx.h b/drivers/net/_common_intel/rx.h
new file mode 100644
index 00..f0155ceb50
--- /dev/null
+++ b/drivers/net/_common_intel/rx.h
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2024 Intel Corporation
+ */
+
+#ifndef _COMMON_INTEL_RX_H_
+#define _COMMON_INTEL_RX_H_
+
+#include 
+#include 
+#include 
+
+#define CI_RX_BURST 32
+
+static inline uint16_t
+ci_rx_reassemble_packets(struct rte_mbuf **rx_bufs,
+ uint16_t nb_bufs, uint8_t *split_flags,
+ struct rte_mbuf **pkt_first_seg,
+ struct rte_mbuf **pkt_last_seg,
+ const uint8_t crc_len)
+{
+   struct rte_mbuf *pkts[CI_RX_BURST] = {0}; /*finished pkts*/
+   struct rte_mbuf *start = *pkt_first_seg;
+   struct rte_mbuf *end = *pkt_last_seg;
+   unsigned int pkt_idx, buf_idx;
+
+   for (buf_idx = 0, pkt_idx = 0; buf_idx < nb_bufs; buf_idx++) {
+   if (end) {
+   /* processing a split packet */
+   end->next = rx_bufs[buf_idx];
+   rx_bufs[buf_idx]->data_len += crc_len;
+
+   start->nb_segs++;
+   start->pkt_len += rx_bufs[buf_idx]->data_len;
+   end = end->next;
+
+   if (!split_flags[buf_idx]) {
+   /* it's the last packet of the set */
+   start->hash = end->hash;
+   start->vlan_tci = end->vlan_tci;
+   start->ol_flags = end->ol_flags;
+   /* we need to strip crc for the whole packet */
+   start->pkt_len -= crc_len;
+   if (end->data_len > crc_len) {
+   end->data_len -= crc_len;
+   } else {
+   /* free up last mbuf */
+   struct rte_mbuf *secondlast = start;
+
+   start->nb_segs--;
+   while (secondlast->next != end)
+   secondlast = secondlast->next;
+   secondlast->data_len -= (crc_len - 
end->data_len);
+   secondlast->next = NULL;
+   rte_pktmbuf_free_seg(end);
+   }
+   pkts[pkt_idx++] = start;
+   start = NULL;
+   end = NULL;
+   }
+   } else {
+   /* not processing a split packet */
+   if (!split_flags[buf_idx]) {
+   /* not a split packet, save and skip */
+   pkts[pkt_idx++] = rx_bufs[buf_idx];
+   continue;
+   }
+   start = rx_bufs[buf_idx];
+   end = start;
+   rx_bufs[buf_idx]->data_len += crc_len;
+   rx_bufs[buf_idx]->pkt_len += crc_len;
+ 

[PATCH v1 03/21] net/_common_intel: add Tx mbuf ring replenish fn

2024-12-02 Thread Bruce Richardson
Move the short function used to place mbufs on the SW Tx ring to common
code to avoid duplication.

Signed-off-by: Bruce Richardson 
---
 drivers/net/_common_intel/tx.h   |  7 +++
 drivers/net/i40e/i40e_rxtx_vec_altivec.c |  4 ++--
 drivers/net/i40e/i40e_rxtx_vec_avx2.c|  4 ++--
 drivers/net/i40e/i40e_rxtx_vec_common.h  | 10 --
 drivers/net/i40e/i40e_rxtx_vec_neon.c|  4 ++--
 drivers/net/i40e/i40e_rxtx_vec_sse.c |  4 ++--
 drivers/net/iavf/iavf_rxtx_vec_avx2.c|  4 ++--
 drivers/net/iavf/iavf_rxtx_vec_common.h  | 10 --
 drivers/net/iavf/iavf_rxtx_vec_sse.c |  4 ++--
 drivers/net/ice/ice_rxtx_vec_avx2.c  |  4 ++--
 drivers/net/ice/ice_rxtx_vec_common.h| 10 --
 drivers/net/ice/ice_rxtx_vec_sse.c   |  4 ++--
 12 files changed, 23 insertions(+), 46 deletions(-)

diff --git a/drivers/net/_common_intel/tx.h b/drivers/net/_common_intel/tx.h
index 384352b9db..5397007411 100644
--- a/drivers/net/_common_intel/tx.h
+++ b/drivers/net/_common_intel/tx.h
@@ -24,4 +24,11 @@ struct ci_tx_entry_vec {
struct rte_mbuf *mbuf; /* mbuf associated with TX desc, if any. */
 };
 
+static __rte_always_inline void
+ci_tx_backlog_entry(struct ci_tx_entry *txep, struct rte_mbuf **tx_pkts, 
uint16_t nb_pkts)
+{
+   for (uint16_t i = 0; i < (int)nb_pkts; ++i)
+   txep[i].mbuf = tx_pkts[i];
+}
+
 #endif /* _COMMON_INTEL_TX_H_ */
diff --git a/drivers/net/i40e/i40e_rxtx_vec_altivec.c 
b/drivers/net/i40e/i40e_rxtx_vec_altivec.c
index ca1038eaa6..80f07a3e10 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_altivec.c
+++ b/drivers/net/i40e/i40e_rxtx_vec_altivec.c
@@ -575,7 +575,7 @@ i40e_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf 
**tx_pkts,
 
n = (uint16_t)(txq->nb_tx_desc - tx_id);
if (nb_commit >= n) {
-   tx_backlog_entry(txep, tx_pkts, n);
+   ci_tx_backlog_entry(txep, tx_pkts, n);
 
for (i = 0; i < n - 1; ++i, ++tx_pkts, ++txdp)
vtx1(txdp, *tx_pkts, flags);
@@ -592,7 +592,7 @@ i40e_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf 
**tx_pkts,
txep = &txq->sw_ring[tx_id];
}
 
-   tx_backlog_entry(txep, tx_pkts, nb_commit);
+   ci_tx_backlog_entry(txep, tx_pkts, nb_commit);
 
vtx(txdp, tx_pkts, nb_commit, flags);
 
diff --git a/drivers/net/i40e/i40e_rxtx_vec_avx2.c 
b/drivers/net/i40e/i40e_rxtx_vec_avx2.c
index e8441de759..b26bae4757 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_avx2.c
+++ b/drivers/net/i40e/i40e_rxtx_vec_avx2.c
@@ -765,7 +765,7 @@ i40e_xmit_fixed_burst_vec_avx2(void *tx_queue, struct 
rte_mbuf **tx_pkts,
 
n = (uint16_t)(txq->nb_tx_desc - tx_id);
if (nb_commit >= n) {
-   tx_backlog_entry(txep, tx_pkts, n);
+   ci_tx_backlog_entry(txep, tx_pkts, n);
 
vtx(txdp, tx_pkts, n - 1, flags);
tx_pkts += (n - 1);
@@ -783,7 +783,7 @@ i40e_xmit_fixed_burst_vec_avx2(void *tx_queue, struct 
rte_mbuf **tx_pkts,
txep = &txq->sw_ring[tx_id];
}
 
-   tx_backlog_entry(txep, tx_pkts, nb_commit);
+   ci_tx_backlog_entry(txep, tx_pkts, nb_commit);
 
vtx(txdp, tx_pkts, nb_commit, flags);
 
diff --git a/drivers/net/i40e/i40e_rxtx_vec_common.h 
b/drivers/net/i40e/i40e_rxtx_vec_common.h
index 619fb89110..325e99c1a4 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_common.h
+++ b/drivers/net/i40e/i40e_rxtx_vec_common.h
@@ -84,16 +84,6 @@ i40e_tx_free_bufs(struct i40e_tx_queue *txq)
return txq->tx_rs_thresh;
 }
 
-static __rte_always_inline void
-tx_backlog_entry(struct ci_tx_entry *txep,
-struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
-{
-   int i;
-
-   for (i = 0; i < (int)nb_pkts; ++i)
-   txep[i].mbuf = tx_pkts[i];
-}
-
 static inline void
 _i40e_rx_queue_release_mbufs_vec(struct i40e_rx_queue *rxq)
 {
diff --git a/drivers/net/i40e/i40e_rxtx_vec_neon.c 
b/drivers/net/i40e/i40e_rxtx_vec_neon.c
index 9b90a32e28..26bc345a0a 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_neon.c
+++ b/drivers/net/i40e/i40e_rxtx_vec_neon.c
@@ -702,7 +702,7 @@ i40e_xmit_fixed_burst_vec(void *__rte_restrict tx_queue,
 
n = (uint16_t)(txq->nb_tx_desc - tx_id);
if (nb_commit >= n) {
-   tx_backlog_entry(txep, tx_pkts, n);
+   ci_tx_backlog_entry(txep, tx_pkts, n);
 
for (i = 0; i < n - 1; ++i, ++tx_pkts, ++txdp)
vtx1(txdp, *tx_pkts, flags);
@@ -719,7 +719,7 @@ i40e_xmit_fixed_burst_vec(void *__rte_restrict tx_queue,
txep = &txq->sw_ring[tx_id];
}
 
-   tx_backlog_entry(txep, tx_pkts, nb_commit);
+   ci_tx_backlog_entry(txep, tx_pkts, nb_commit);
 
vtx(txdp, tx_pkts, nb_commit, flags);
 
diff --git a/drivers/net/i40e/i40e_rxtx_vec_sse.c 
b/drivers/net/i40e/i40e_rxtx_vec_sse.c
index e1fa2ed543..ebc32b0d27 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_sse.c
+++ b/drivers/net/i40e/i40e_rxtx_vec_sse.c
@

[PATCH v1 13/21] net/iavf: use common Tx free fn for AVX-512

2024-12-02 Thread Bruce Richardson
Switch the iavf driver to use the common Tx free function. This requires
one additional parameter to that function, since iavf sometimes uses
context descriptors which means that we have double the descriptors per
SW ring slot.

Signed-off-by: Bruce Richardson 
---
 drivers/net/_common_intel/tx.h  |   6 +-
 drivers/net/i40e/i40e_rxtx_vec_avx512.c |   2 +-
 drivers/net/iavf/iavf_rxtx_vec_avx512.c | 119 +---
 drivers/net/ice/ice_rxtx_vec_avx512.c   |   2 +-
 4 files changed, 7 insertions(+), 122 deletions(-)

diff --git a/drivers/net/_common_intel/tx.h b/drivers/net/_common_intel/tx.h
index 145501834a..21f4d71e50 100644
--- a/drivers/net/_common_intel/tx.h
+++ b/drivers/net/_common_intel/tx.h
@@ -179,7 +179,7 @@ ci_tx_free_bufs(struct ci_tx_queue *txq, ci_desc_done_fn 
desc_done)
 }
 
 static __rte_always_inline int
-ci_tx_free_bufs_vec(struct ci_tx_queue *txq, ci_desc_done_fn desc_done)
+ci_tx_free_bufs_vec(struct ci_tx_queue *txq, ci_desc_done_fn desc_done, bool 
ctx_descs)
 {
int nb_free = 0;
struct rte_mbuf *free[IETH_VPMD_TX_MAX_FREE_BUF];
@@ -189,13 +189,13 @@ ci_tx_free_bufs_vec(struct ci_tx_queue *txq, 
ci_desc_done_fn desc_done)
if (!desc_done(txq, txq->tx_next_dd))
return 0;
 
-   const uint32_t n = txq->tx_rs_thresh;
+   const uint32_t n = txq->tx_rs_thresh >> ctx_descs;
 
/* first buffer to free from S/W ring is at index
 * tx_next_dd - (tx_rs_thresh - 1)
 */
struct ci_tx_entry_vec *txep = txq->sw_ring_vec;
-   txep += txq->tx_next_dd - (n - 1);
+   txep += (txq->tx_next_dd >> ctx_descs) - (n - 1);
 
if (txq->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE && (n & 31) == 0) 
{
struct rte_mempool *mp = txep[0].mbuf->pool;
diff --git a/drivers/net/i40e/i40e_rxtx_vec_avx512.c 
b/drivers/net/i40e/i40e_rxtx_vec_avx512.c
index 9bb2a44231..c555c3491d 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_avx512.c
+++ b/drivers/net/i40e/i40e_rxtx_vec_avx512.c
@@ -829,7 +829,7 @@ i40e_xmit_fixed_burst_vec_avx512(void *tx_queue, struct 
rte_mbuf **tx_pkts,
uint64_t rs = I40E_TX_DESC_CMD_RS | I40E_TD_CMD;
 
if (txq->nb_tx_free < txq->tx_free_thresh)
-   ci_tx_free_bufs_vec(txq, i40e_tx_desc_done);
+   ci_tx_free_bufs_vec(txq, i40e_tx_desc_done, false);
 
nb_commit = nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
if (unlikely(nb_pkts == 0))
diff --git a/drivers/net/iavf/iavf_rxtx_vec_avx512.c 
b/drivers/net/iavf/iavf_rxtx_vec_avx512.c
index 9cf7171524..8543490c70 100644
--- a/drivers/net/iavf/iavf_rxtx_vec_avx512.c
+++ b/drivers/net/iavf/iavf_rxtx_vec_avx512.c
@@ -1844,121 +1844,6 @@ 
iavf_recv_scattered_pkts_vec_avx512_flex_rxd_offload(void *rx_queue,
true);
 }
 
-static __rte_always_inline int
-iavf_tx_free_bufs_avx512(struct ci_tx_queue *txq)
-{
-   struct ci_tx_entry_vec *txep;
-   uint32_t n;
-   uint32_t i;
-   int nb_free = 0;
-   struct rte_mbuf *m, *free[IAVF_VPMD_TX_MAX_FREE_BUF];
-
-   /* check DD bits on threshold descriptor */
-   if ((txq->iavf_tx_ring[txq->tx_next_dd].cmd_type_offset_bsz &
-rte_cpu_to_le_64(IAVF_TXD_QW1_DTYPE_MASK)) !=
-   rte_cpu_to_le_64(IAVF_TX_DESC_DTYPE_DESC_DONE))
-   return 0;
-
-   n = txq->tx_rs_thresh >> txq->use_ctx;
-
-/* first buffer to free from S/W ring is at index
- * tx_next_dd - (tx_rs_thresh-1)
- */
-   txep = (void *)txq->sw_ring;
-   txep += (txq->tx_next_dd >> txq->use_ctx) - (n - 1);
-
-   if (txq->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE && (n & 31) == 0) 
{
-   struct rte_mempool *mp = txep[0].mbuf->pool;
-   struct rte_mempool_cache *cache = rte_mempool_default_cache(mp,
-   rte_lcore_id());
-   void **cache_objs;
-
-   if (!cache || cache->len == 0)
-   goto normal;
-
-   cache_objs = &cache->objs[cache->len];
-
-   if (n > RTE_MEMPOOL_CACHE_MAX_SIZE) {
-   rte_mempool_ops_enqueue_bulk(mp, (void *)txep, n);
-   goto done;
-   }
-
-   /* The cache follows the following algorithm
-*   1. Add the objects to the cache
-*   2. Anything greater than the cache min value (if it 
crosses the
-*   cache flush threshold) is flushed to the ring.
-*/
-   /* Add elements back into the cache */
-   uint32_t copied = 0;
-   /* n is multiple of 32 */
-   while (copied < n) {
-#ifdef RTE_ARCH_64
-   const __m512i a = _mm512_loadu_si512(&txep[copied]);
-   const __m512i b = _mm512_loadu_si512(&txep[copied + 8]);
-   const __m512i c = _mm

[PATCH v1 0/1] Rewrite devbind

2024-12-02 Thread Anatoly Burakov
It has been suggested [1] that a major cleanup/rewrite of devbind would be
beneficial in terms of long term maintainability of the code. I was in a
coding mood over the weekend, and so I've went ahead and rewritten devbind.

Note that this is one giant patch, rather than a series of patches adjusting
existing code. Making it a patch series is possible, however the internal
code architecture diverges quite significantly from the original devbind
script due to its copious usage of string operations/pattern matching and
global variables, so it is unclear whether subdividing this patch would be
worth the effort.

The script has become slightly bigger - 1000 lines instead of 800, however
I would argue that since most of that increase is infrastructure, comments,
and sacrificing code golf for code readability (such as expanding one-liners
into multiple lines), the trade-off between being able to read and reason
about what happens in the script is worth the added line count.

[1] 
https://patches.dpdk.org/project/dpdk/patch/c2bf00195c2d43833a831a9cc9346b4606d6ea2e.1723810613.git.anatoly.bura...@intel.com/

Anatoly Burakov (1):
  usertools/devbind: update coding style

 usertools/dpdk-devbind.py | 1736 +
 1 file changed, 968 insertions(+), 768 deletions(-)

-- 
2.43.5



[PATCH v1 1/1] usertools/devbind: update coding style

2024-12-02 Thread Anatoly Burakov
Devbind is one of the oldest tools in DPDK, and is written in a way that
uses a lot of string matching, no type safety, lots of global variables,
and has a few inconsistencies in the way it handles data (such as
differences between lspci calls and parsing in different circumstances).

This patch is a nigh complete rewrite of devbind, with full 100% feature
and command-line compatibility with the old version, albeit with a few
differences in formatting and error messages. All file handling code has
also been replaced with context managers.

What's different from old code:
- Full PEP-484 compliance
- Formatted with Ruff
- Much better structured code
- Clean and consistent control flow
- More comments
- Better error handling
- Fewer lspci calls
- Unified lspci parsing
- Using /sys/bus/pci/drivers as a source of truth about kernel modules
- Check for iproute2 package
- Deprecate --status-dev in favor of optional --status argument

Signed-off-by: Anatoly Burakov 
---
 usertools/dpdk-devbind.py | 1736 +
 1 file changed, 968 insertions(+), 768 deletions(-)

diff --git a/usertools/dpdk-devbind.py b/usertools/dpdk-devbind.py
index f2a2a9a12f..fe4b60a541 100755
--- a/usertools/dpdk-devbind.py
+++ b/usertools/dpdk-devbind.py
@@ -1,705 +1,898 @@
 #!/usr/bin/env python3
 # SPDX-License-Identifier: BSD-3-Clause
-# Copyright(c) 2010-2014 Intel Corporation
-#
+# Copyright(c) 2010-2024 Intel Corporation
 
-import sys
+import argparse
+import glob
+import grp
 import os
+import pwd
 import subprocess
-import argparse
-import platform
-
-from glob import glob
-from os.path import exists, basename
-from os.path import join as path_join
-
-# The PCI base class for all devices
-network_class = {'Class': '02', 'Vendor': None, 'Device': None,
- 'SVendor': None, 'SDevice': None}
-acceleration_class = {'Class': '12', 'Vendor': None, 'Device': None,
-  'SVendor': None, 'SDevice': None}
-ifpga_class = {'Class': '12', 'Vendor': '8086', 'Device': '0b30',
-   'SVendor': None, 'SDevice': None}
-encryption_class = {'Class': '10', 'Vendor': None, 'Device': None,
-'SVendor': None, 'SDevice': None}
-intel_processor_class = {'Class': '0b', 'Vendor': '8086', 'Device': None,
- 'SVendor': None, 'SDevice': None}
-cavium_sso = {'Class': '08', 'Vendor': '177d', 'Device': 'a04b,a04d',
-  'SVendor': None, 'SDevice': None}
-cavium_fpa = {'Class': '08', 'Vendor': '177d', 'Device': 'a053',
-  'SVendor': None, 'SDevice': None}
-cavium_pkx = {'Class': '08', 'Vendor': '177d', 'Device': 'a0dd,a049',
-  'SVendor': None, 'SDevice': None}
-cavium_tim = {'Class': '08', 'Vendor': '177d', 'Device': 'a051',
-  'SVendor': None, 'SDevice': None}
-cavium_zip = {'Class': '12', 'Vendor': '177d', 'Device': 'a037',
-  'SVendor': None, 'SDevice': None}
-avp_vnic = {'Class': '05', 'Vendor': '1af4', 'Device': '1110',
-'SVendor': None, 'SDevice': None}
-
-cnxk_bphy = {'Class': '08', 'Vendor': '177d', 'Device': 'a089',
- 'SVendor': None, 'SDevice': None}
-cnxk_bphy_cgx = {'Class': '08', 'Vendor': '177d', 'Device': 'a059,a060',
- 'SVendor': None, 'SDevice': None}
-cnxk_dma = {'Class': '08', 'Vendor': '177d', 'Device': 'a081',
-'SVendor': None, 'SDevice': None}
-cnxk_inl_dev = {'Class': '08', 'Vendor': '177d', 'Device': 'a0f0,a0f1',
-'SVendor': None, 'SDevice': None}
-
-hisilicon_dma = {'Class': '08', 'Vendor': '19e5', 'Device': 'a122',
- 'SVendor': None, 'SDevice': None}
-odm_dma = {'Class': '08', 'Vendor': '177d', 'Device': 'a08c',
-   'SVendor': None, 'SDevice': None}
-
-intel_dlb = {'Class': '0b', 'Vendor': '8086', 'Device': '270b,2710,2714',
- 'SVendor': None, 'SDevice': None}
-intel_ioat_bdw = {'Class': '08', 'Vendor': '8086',
-  'Device': 
'6f20,6f21,6f22,6f23,6f24,6f25,6f26,6f27,6f2e,6f2f',
-  'SVendor': None, 'SDevice': None}
-intel_ioat_skx = {'Class': '08', 'Vendor': '8086', 'Device': '2021',
-  'SVendor': None, 'SDevice': None}
-intel_ioat_icx = {'Class': '08', 'Vendor': '8086', 'Device': '0b00',
-  'SVendor': None, 'SDevice': None}
-intel_idxd_spr = {'Class': '08', 'Vendor': '8086', 'Device': '0b25',
-  'SVendor': None, 'SDevice': None}
-intel_ntb_skx = {'Class': '06', 'Vendor': '8086', 'Device': '201c',
- 'SVendor': None, 'SDevice': None}
-intel_ntb_icx = {'Class': '06', 'Vendor': '8086', 'Device': '347e',
- 'SVendor': None, 'SDevice': None}
-
-cnxk_sso = {'Class': '08', 'Vendor': '177d', 'Device': 'a0f9,a0fa',
-'SVendor': None, 'SDevice': None}
-cnxk_npa = {'Class': '08', 'Vendor': '177d', 'Device': 'a0fb,a0fc',
-'SVendor': None, 'SDevice': None}
-cn9k_ree = {'Class': '08', 'Vendor': '177d', 'Device': 'a0f4',
-'SVendor': None, 'SDe

Re: [PATCH v1 11/21] net/_common_intel: add post-Tx buffer free function

2024-12-02 Thread Bruce Richardson
On Mon, Dec 02, 2024 at 01:59:37PM +0100, David Marchand wrote:
> On Mon, Dec 2, 2024 at 12:27 PM Bruce Richardson
>  wrote:
> >
> > The actions taken for post-Tx buffer free for the SSE and AVX drivers
> > for i40e, iavf and ice drivers are all common, so centralize those in
> > common/intel_eth driver.
> >
> > Signed-off-by: Bruce Richardson 
> > ---
> >  drivers/net/_common_intel/tx.h  | 71 
> >  drivers/net/i40e/i40e_rxtx_vec_common.h | 72 -
> >  drivers/net/iavf/iavf_rxtx_vec_common.h | 61 -
> >  drivers/net/ice/ice_rxtx_vec_common.h   | 61 -
> >  4 files changed, 98 insertions(+), 167 deletions(-)
> >
> > diff --git a/drivers/net/_common_intel/tx.h b/drivers/net/_common_intel/tx.h
> > index c372d2838b..a930309c05 100644
> > --- a/drivers/net/_common_intel/tx.h
> > +++ b/drivers/net/_common_intel/tx.h
> > @@ -7,6 +7,7 @@
> >
> >  #include 
> >  #include 
> > +#include 
> >
> >  /* forward declaration of the common intel (ci) queue structure */
> >  struct ci_tx_queue;
> > @@ -107,4 +108,74 @@ ci_tx_backlog_entry(struct ci_tx_entry *txep, struct 
> > rte_mbuf **tx_pkts, uint16_
> > txep[i].mbuf = tx_pkts[i];
> >  }
> >
> > +#define IETH_VPMD_TX_MAX_FREE_BUF 64
> > +
> > +typedef int (*ci_desc_done_fn)(struct ci_tx_queue *txq, uint16_t idx);
> > +
> > +static __rte_always_inline int
> > +ci_tx_free_bufs(struct ci_tx_queue *txq, ci_desc_done_fn desc_done)
> > +{
> > +   struct ci_tx_entry *txep;
> > +   uint32_t n;
> > +   uint32_t i;
> > +   int nb_free = 0;
> > +   struct rte_mbuf *m, *free[IETH_VPMD_TX_MAX_FREE_BUF];
> > +
> > +   /* check DD bits on threshold descriptor */
> > +   if (!desc_done(txq, txq->tx_next_dd))
> > +   return 0;
> > +
> > +   n = txq->tx_rs_thresh;
> > +
> > +/* first buffer to free from S/W ring is at index
> > + * tx_next_dd - (tx_rs_thresh-1)
> > + */
> > +   txep = &txq->sw_ring[txq->tx_next_dd - (n - 1)];
> > +
> > +   if (txq->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) {
> > +   for (i = 0; i < n; i++) {
> > +   free[i] = txep[i].mbuf;
> > +   /* no need to reset txep[i].mbuf in vector path */
> > +   }
> > +   rte_mempool_put_bulk(free[0]->pool, (void **)free, n);
> > +   goto done;
> > +   }
> > +
> > +   m = rte_pktmbuf_prefree_seg(txep[0].mbuf);
> > +   if (likely(m != NULL)) {
> > +   free[0] = m;
> > +   nb_free = 1;
> > +   for (i = 1; i < n; i++) {
> > +   m = rte_pktmbuf_prefree_seg(txep[i].mbuf);
> > +   if (likely(m != NULL)) {
> > +   if (likely(m->pool == free[0]->pool)) {
> > +   free[nb_free++] = m;
> > +   } else {
> > +   rte_mempool_put_bulk(free[0]->pool,
> > +(void *)free,
> > +nb_free);
> > +   free[0] = m;
> > +   nb_free = 1;
> > +   }
> > +   }
> > +   }
> > +   rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
> > +   } else {
> > +   for (i = 1; i < n; i++) {
> > +   m = rte_pktmbuf_prefree_seg(txep[i].mbuf);
> > +   if (m != NULL)
> > +   rte_mempool_put(m->pool, m);
> > +   }
> > +   }
> 
> Is it possible to take an extra step and convert to rte_pktmbuf_free_bulk?
> 
Right now that's not possible without some more severe refactoring - and
even then I'm not convinced that it should be done. The code here is
working off the buffers in the shadow ring directly, where they should be
flattened out to avoid having mbuf chains. Therefore, we are freeing
segment by segment as each buffer has been transmitted.

/Bruce


Re: [RFC 1/3] eal: add enhanced lock annotations

2024-12-02 Thread Stephen Hemminger
On Mon,  2 Dec 2024 13:53:14 +0100
David Marchand  wrote:

> diff --git a/doc/api/doxy-api.conf.in b/doc/api/doxy-api.conf.in
> index d23352d300..ac490e7631 100644
> --- a/doc/api/doxy-api.conf.in
> +++ b/doc/api/doxy-api.conf.in
> @@ -103,6 +103,18 @@ PREDEFINED  = __DOXYGEN__ \
>__rte_shared_trylock_function(x)= \
>__rte_assert_shared_lock(x)= \
>__rte_unlock_function(x)= \
> +  __rte_capability(x)= \
> +  __rte_requires_capability(x)= \
> +  __rte_acquire_capability(x)= \
> +  __rte_try_acquire_capability(x)= \
> +  __rte_release_capability(x)= \
> +  __rte_assert_capability(x)= \
> +  __rte_requires_shared_capability(x)= \
> +  __rte_acquire_shared_capability(x)= \
> +  __rte_try_acquire_shared_capability(x)= \
> +  __rte_release_shared_capability(x)= \
> +  __rte_assert_shared_capability(x)= \
> +  __rte_exclude_capability(x)= \
>__attribute__(x)=

I would suggest shortened names:
__rte_acquires(x)
__rte_releases(x)
__rte_must_hold(x)

Based on the original source of all these lock annotations which is sparse.


Re: [PATCH v1 0/1] Rewrite devbind

2024-12-02 Thread Bruce Richardson
On Mon, Dec 02, 2024 at 03:09:33PM +, Anatoly Burakov wrote:
> It has been suggested [1] that a major cleanup/rewrite of devbind would be
> beneficial in terms of long term maintainability of the code. I was in a
> coding mood over the weekend, and so I've went ahead and rewritten devbind.
> 
> Note that this is one giant patch, rather than a series of patches adjusting
> existing code. Making it a patch series is possible, however the internal
> code architecture diverges quite significantly from the original devbind
> script due to its copious usage of string operations/pattern matching and
> global variables, so it is unclear whether subdividing this patch would be
> worth the effort.
> 
One suggestion here which might help reviewing. Since it is essentially a
rewrite, is it worth making this a two-patch set, where:

Patch 1: introduces a new script called e.g. dpdk-devbind-new.py, which
 contains just the rewrite without any of old code. This then can be
 reviewed in isolation
Patch 2: moves dpdk-devbind-new.py to overwrite dpdk-devbind.py

WDYT?

Regards,
/Bruce


Re: [PATCH v4 1/1] usertools/devbind: allow changing UID/GID for VFIO

2024-12-02 Thread Burakov, Anatoly

On 12/2/2024 10:31 AM, Anatoly Burakov wrote:

Currently, when binding a device to VFIO, the UID/GID for the device will
always stay as system default (`root`). Yet, when running DPDK as non-root
user, one has to change the UID/GID of the device to match the user's
UID/GID to use the device.

This patch adds an option to `dpdk-devbind.py` to change the UID/GID of
the device when binding it to VFIO.

Signed-off-by: Anatoly Burakov 
---


Missed

Reviewed-by: Robin Jarry 

--
Thanks,
Anatoly


[PATCH v1 1/1] usertools/devbind: add documentation for no-IOMMU mode

2024-12-02 Thread Anatoly Burakov
Support for `--noiommu-mode` flag was added, but documentation for it was
not. Add documentation for the flag.

Signed-off-by: Anatoly Burakov 
---
 doc/guides/tools/devbind.rst | 5 +
 1 file changed, 5 insertions(+)

diff --git a/doc/guides/tools/devbind.rst b/doc/guides/tools/devbind.rst
index 841615570f..e010d4b7df 100644
--- a/doc/guides/tools/devbind.rst
+++ b/doc/guides/tools/devbind.rst
@@ -62,6 +62,11 @@ OPTIONS
   Use this flag to change ownership to the specified user and group, so 
that
   devices bound to VFIO would be usable by unprivileged users.
 
+* ``--noiommu-mode``
+
+  When using vfio-pci driver on a system with no IOMMU, this flag should 
be used to
+  enable unsafe no-IOMMU mode for vfio-pci driver.
+
 
 .. warning::
 
-- 
2.43.5



Re: [PATCH v1 11/21] net/_common_intel: add post-Tx buffer free function

2024-12-02 Thread David Marchand
On Mon, Dec 2, 2024 at 2:24 PM Bruce Richardson
 wrote:
>
> On Mon, Dec 02, 2024 at 01:59:37PM +0100, David Marchand wrote:
> > On Mon, Dec 2, 2024 at 12:27 PM Bruce Richardson
> >  wrote:
> > >
> > > The actions taken for post-Tx buffer free for the SSE and AVX drivers
> > > for i40e, iavf and ice drivers are all common, so centralize those in
> > > common/intel_eth driver.
> > >
> > > Signed-off-by: Bruce Richardson 
> > > ---
> > >  drivers/net/_common_intel/tx.h  | 71 
> > >  drivers/net/i40e/i40e_rxtx_vec_common.h | 72 -
> > >  drivers/net/iavf/iavf_rxtx_vec_common.h | 61 -
> > >  drivers/net/ice/ice_rxtx_vec_common.h   | 61 -
> > >  4 files changed, 98 insertions(+), 167 deletions(-)
> > >
> > > diff --git a/drivers/net/_common_intel/tx.h 
> > > b/drivers/net/_common_intel/tx.h
> > > index c372d2838b..a930309c05 100644
> > > --- a/drivers/net/_common_intel/tx.h
> > > +++ b/drivers/net/_common_intel/tx.h
> > > @@ -7,6 +7,7 @@
> > >
> > >  #include 
> > >  #include 
> > > +#include 
> > >
> > >  /* forward declaration of the common intel (ci) queue structure */
> > >  struct ci_tx_queue;
> > > @@ -107,4 +108,74 @@ ci_tx_backlog_entry(struct ci_tx_entry *txep, struct 
> > > rte_mbuf **tx_pkts, uint16_
> > > txep[i].mbuf = tx_pkts[i];
> > >  }
> > >
> > > +#define IETH_VPMD_TX_MAX_FREE_BUF 64
> > > +
> > > +typedef int (*ci_desc_done_fn)(struct ci_tx_queue *txq, uint16_t idx);
> > > +
> > > +static __rte_always_inline int
> > > +ci_tx_free_bufs(struct ci_tx_queue *txq, ci_desc_done_fn desc_done)
> > > +{
> > > +   struct ci_tx_entry *txep;
> > > +   uint32_t n;
> > > +   uint32_t i;
> > > +   int nb_free = 0;
> > > +   struct rte_mbuf *m, *free[IETH_VPMD_TX_MAX_FREE_BUF];
> > > +
> > > +   /* check DD bits on threshold descriptor */
> > > +   if (!desc_done(txq, txq->tx_next_dd))
> > > +   return 0;
> > > +
> > > +   n = txq->tx_rs_thresh;
> > > +
> > > +/* first buffer to free from S/W ring is at index
> > > + * tx_next_dd - (tx_rs_thresh-1)
> > > + */
> > > +   txep = &txq->sw_ring[txq->tx_next_dd - (n - 1)];
> > > +
> > > +   if (txq->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) {
> > > +   for (i = 0; i < n; i++) {
> > > +   free[i] = txep[i].mbuf;
> > > +   /* no need to reset txep[i].mbuf in vector path */
> > > +   }
> > > +   rte_mempool_put_bulk(free[0]->pool, (void **)free, n);
> > > +   goto done;
> > > +   }
> > > +
> > > +   m = rte_pktmbuf_prefree_seg(txep[0].mbuf);
> > > +   if (likely(m != NULL)) {
> > > +   free[0] = m;
> > > +   nb_free = 1;
> > > +   for (i = 1; i < n; i++) {
> > > +   m = rte_pktmbuf_prefree_seg(txep[i].mbuf);
> > > +   if (likely(m != NULL)) {
> > > +   if (likely(m->pool == free[0]->pool)) {
> > > +   free[nb_free++] = m;
> > > +   } else {
> > > +   
> > > rte_mempool_put_bulk(free[0]->pool,
> > > +(void *)free,
> > > +nb_free);
> > > +   free[0] = m;
> > > +   nb_free = 1;
> > > +   }
> > > +   }
> > > +   }
> > > +   rte_mempool_put_bulk(free[0]->pool, (void **)free, 
> > > nb_free);
> > > +   } else {
> > > +   for (i = 1; i < n; i++) {
> > > +   m = rte_pktmbuf_prefree_seg(txep[i].mbuf);
> > > +   if (m != NULL)
> > > +   rte_mempool_put(m->pool, m);
> > > +   }
> > > +   }
> >
> > Is it possible to take an extra step and convert to rte_pktmbuf_free_bulk?
> >
> Right now that's not possible without some more severe refactoring - and
> even then I'm not convinced that it should be done. The code here is
> working off the buffers in the shadow ring directly, where they should be
> flattened out to avoid having mbuf chains. Therefore, we are freeing
> segment by segment as each buffer has been transmitted.

Nevermind, at least, this series removes many copies of this loop.
Thanks Bruce.


-- 
David Marchand



RE: [PATCH v3] net/mlx5: fix RSS hash for non-RSS CQE zipping

2024-12-02 Thread Dariusz Sosnowski



> -Original Message-
> From: Alexander Kozyrev 
> Sent: Saturday, November 30, 2024 01:39
> To: dev@dpdk.org
> Cc: sta...@dpdk.org; Raslan Darawsheh ; Slava Ovsiienko
> ; Matan Azrad ; Dariusz
> Sosnowski ; Bing Zhao ;
> Suanming Mou 
> Subject: [PATCH v3] net/mlx5: fix RSS hash for non-RSS CQE zipping
> 
> Take the RSS hash value from the title packet before it gets overwritten by 
> the
> decompression routine.
> Set the RSS hash flag in the packet mbuf if RSS is enabled in case of non-RSS 
> CQE
> zipping format.
> 
> Fixes: 54c2d46 ("net/mlx5: support flow tag and packet header miniCQEs")
> Cc: sta...@dpdk.org
> 
> Signed-off-by: Alexander Kozyrev 

Acked-by: Dariusz Sosnowski 

Best regards,
Dariusz Sosnowski


Re: [PATCH v1 0/1] Rewrite devbind

2024-12-02 Thread Stephen Hemminger
On Mon,  2 Dec 2024 15:09:33 +
Anatoly Burakov  wrote:

> It has been suggested [1] that a major cleanup/rewrite of devbind would be
> beneficial in terms of long term maintainability of the code. I was in a
> coding mood over the weekend, and so I've went ahead and rewritten devbind.
> 
> Note that this is one giant patch, rather than a series of patches adjusting
> existing code. Making it a patch series is possible, however the internal
> code architecture diverges quite significantly from the original devbind
> script due to its copious usage of string operations/pattern matching and
> global variables, so it is unclear whether subdividing this patch would be
> worth the effort.
> 
> The script has become slightly bigger - 1000 lines instead of 800, however
> I would argue that since most of that increase is infrastructure, comments,
> and sacrificing code golf for code readability (such as expanding one-liners
> into multiple lines), the trade-off between being able to read and reason
> about what happens in the script is worth the added line count.
> 
> [1] 
> https://patches.dpdk.org/project/dpdk/patch/c2bf00195c2d43833a831a9cc9346b4606d6ea2e.1723810613.git.anatoly.bura...@intel.com/
> 
> Anatoly Burakov (1):
>   usertools/devbind: update coding style
> 
>  usertools/dpdk-devbind.py | 1736 +
>  1 file changed, 968 insertions(+), 768 deletions(-)
> 

Looks good, but needs to be rebased to current 24.11


Re: [v1 01/12] net/enetc: Add initial ENETC4 PMD driver support

2024-12-02 Thread Stephen Hemminger
On Fri, 18 Oct 2024 12:56:33 +0530
vanshika.shu...@nxp.com wrote:

> +#. **Linux Kernel**
> +
> +   It can be obtained from `NXP's Github hosting 
> `_.
> +

IF the driver only exists for the DPDK then shouldn't it be in the DPDK kmods 
repo?

I looked at the git hub version and the driver looks like it is not very 
polished.
Mostly things that would be flagged during a code review.
Lots of printk's like the developer was not sure.
Lots of casts which hide potential bugs.
Unsafe copy from user space.
Module license and SPDX license mismatch.




Re: [PATCH v2 1/3] net: add thread-safe crc api

2024-12-02 Thread Stephen Hemminger
On Tue,  1 Oct 2024 19:11:48 +0100
Arkadiusz Kusztal  wrote:

> The current net CRC API is not thread-safe, this patch
> solves this by adding another, thread-safe API functions.

Couldn't the old API be made threadsafe with TLS?

> This API is also safe to use across multiple processes,
> yet with limitations on max-simd-bitwidth, which will be checked only by
> the process that created the CRC context; all other processes will use
> the same CRC function when used with the same CRC context.
> It is an undefined behavior when process binaries are compiled
> with different SIMD capabilities when the same CRC context is used.
> 
> Signed-off-by: Arkadiusz Kusztal 

The API/ABI can't change for 25.03, do you want to support both?
Or wait until 25.11?


Re: lib/eal/arm/include/rte_vect.h fails to compile with clang14 for 32bit ARM

2024-12-02 Thread Ruifeng Wang
+Arm folks.

From: Roger Melton (rmelton) 
Date: Tuesday, December 3, 2024 at 3:39 AM
To: dev@dpdk.org , Ruifeng Wang 
Subject: lib/eal/arm/include/rte_vect.h fails to compile with clang14 for 32bit 
ARM

Hey folks,
We are building DPDK with clang14 for a 32bit armv8-a based CPU and ran into a 
compile error with the following from lib/eal/arm/include/rte_vect.h:



#if (defined(RTE_ARCH_ARM) && defined(RTE_ARCH_32)) || \

(defined(RTE_ARCH_ARM64) && 
RTE_CC_IS_GNU && 
(GCC_VERSION < 
7))

/* NEON intrinsic vcopyq_laneq_u32() is not supported in ARMv7-A(AArch32)

 * On AArch64, this intrinsic is supported since GCC version 7.

 */

static inline uint32x4_t

vcopyq_laneq_u32(uint32x4_t
 a, const int lane_a,

  uint32x4_t b, const int lane_b)

{

  return vsetq_lane_u32(vgetq_lane_u32(b, lane_b), a, lane_a);

}

#endif

clang14 compile fails as follows:

In file included from 
../../../../../../cisco-dpdk-upstream-arm-clang-fixes.git/lib/eal/common/eal_common_options.c:36:
../../../../../../cisco-dpdk-upstream-arm-clang-fixes.git/lib/eal/arm/include/rte_vect.h:80:24:
 error: argument to '__builtin_neon_vgetq_lane_i32' must be a constant integer
return vsetq_lane_u32(vgetq_lane_u32(b, lane_b), a, lane_a);
^ ~~
/auto/binos-tools/llvm14/llvm-14.0-p24/lib/clang/14.0.5/include/arm_neon.h:7697:22:
 note: expanded from macro 'vgetq_lane_u32'
__ret = (uint32_t) __builtin_neon_vgetq_lane_i32((int32x4_t)__s0, __p1); \
^ 
/auto/binos-tools/llvm14/llvm-14.0-p24/lib/clang/14.0.5/include/arm_neon.h:24148:19:
 note: expanded from macro 'vsetq_lane_u32'
uint32_t __s0 = __p0; \
^~~~
In file included from 
../../../../../../cisco-dpdk-upstream-arm-clang-fixes.git/lib/eal/common/eal_common_options.c:36:
../../../../../../cisco-dpdk-upstream-arm-clang-fixes.git/lib/eal/arm/include/rte_vect.h:80:9:
 error: argument to '__builtin_neon_vsetq_lane_i32' must be a constant integer
return vsetq_lane_u32(vgetq_lane_u32(b, lane_b), a, lane_a);
^ ~~
/auto/binos-tools/llvm14/llvm-14.0-p24/lib/clang/14.0.5/include/arm_neon.h:24150:24:
 note: expanded from macro 'vsetq_lane_u32'
__ret = (uint32x4_t) __builtin_neon_vsetq_lane_i32(__s0, (int32x4_t)__s1, 
__p2); \
^ 
2 errors generated.



clang14 does appear to support the vcopyq_laneq_u32() intrinsic, s0 we want to 
skip the conditional implementation.

Two approaches I have tested to resolve the error are:

1) skip if building with clang:

#if !defined(__clang__) && ((defined(RTE_ARCH_ARM) && defined(RTE_ARCH_32)) || \
72 (defined(RTE_ARCH_ARM64) && RTE_CC_IS_GNU && (GCC_VERSION < 7)))


2) skip if not building for ARMv7:


#if (defined(RTE_ARCH_ARMv7) && defined(RTE_ARCH_32)) || \
(defined(RTE_ARCH_ARM64) && RTE_CC_IS_GNU && (GCC_VERSION < 7))


Both address our immediate problem, but may not be a appropriate for all cases.

Can anyone suggest the proper way to address this?  I'll be submitting an patch 
once I have a solution that is acceptable to the community.
Regards,
Roger












Question on TX offload

2024-12-02 Thread Jie Hai

Hi, all maintainers and developers,

Since the hns3 NIC support TSO for UDP (not UDP Fragmentation)
and for ipv6 headers with extension header,
When I test it with testpmd, it cannot be performed.
The TCP segmentation flag is set only for the packets whose next header 
of L3 is TCP.


I checked some information on the TSO and have doubts about current 
support for both cases.


The commit
7238e63bce52 ("ethdev: add support for device offload capabilities")
introduces TX offload including DEV_TX_OFFLOAD_UDP_TSO and 
DEV_TX_OFFLOAD_TCP_TSO. The former evolves into UDP fragmentation

and the latter TCP segmentation.
[1].RTE_ETH_TX_OFFLOAD_UDP_TSO -- RTE_MBUF_F_TX_UDP_SEG
UDP Fragmentation Offload
[2].RTE_ETH_TX_OFFLOAD_TCP_TSO -- RTE_MBUF_F_TX_TCP_SEG
TCP segmentation offload, related to TCP packets.

While the commit message see the TCP segmentation offload as
Transmited segmentation offload.[1]Does the TCP segmentation
offload contains UDP packets?

There are many drivers support UDP segmatention offload similar to
TCP segmatention offload, for example, 
atlantic、bnxt、cxgb4、hns3、i40e、iavf、ice、idpf、igb、ixgbe、mlx5, etc.

Please refer to Linux kernel include/linux/netdev_features.h
NETIF_F_GSO_UDP_L4_BIT, /* ... UDP payload GSO (not UFO) */


My questions are:

[1]. Does the TCP segmentation offload(RTE_ETH_TX_OFFLOAD_TCP_TSO ) 
contains UDP packets?

If yes, then the testpmd app should add flag for UDP packets.
If no, a new offload should be added.
Considering that maybe not all drivers support both of TCP and UDP 
Segmentation, How about distinguish the three capabilities and flags?

[1].RTE_ETH_TX_OFFLOAD_TCP_TSO -- RTE_MBUF_F_TX_TCP_SEG
TCP segmentation offload, related to TCP as L4.
[2].RTE_ETH_TX_OFFLOAD_UDP_TSO -- RTE_MBUF_F_TX_UDP_SEG
UDP segmentation offload, related to UDP as L4.
[3].RTE_ETH_TX_OFFLOAD_UFO -- RTE_MBUF_F_TX_UFO_SEG
UDP Fragmentation Offload

[2]. TSO can be performed on packets with Ipv6 headers with extenstion?
If yes, documention and testpmd app should be updated.
If no, maybe documention should be updated more detailed.

Thanks,
Jie Hai


[PATCH] version: 25.03-rc0

2024-12-02 Thread David Marchand
Start a new release cycle with empty release notes.
Bump version and ABI minor.
Bump libabigail from 2.4 to 2.6 and enable ABI checks.

Signed-off-by: David Marchand 
---
 .github/workflows/build.yml|   8 +-
 ABI_VERSION|   2 +-
 VERSION|   2 +-
 doc/guides/rel_notes/index.rst |   1 +
 doc/guides/rel_notes/release_25_03.rst | 138 +
 5 files changed, 145 insertions(+), 6 deletions(-)
 create mode 100644 doc/guides/rel_notes/release_25_03.rst

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index d99700b6e9..dcafb4a8f5 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -12,7 +12,7 @@ defaults:
 env:
   REF_GIT_BRANCH: main
   REF_GIT_REPO: https://dpdk.org/git/dpdk
-  REF_GIT_TAG: none
+  REF_GIT_TAG: v24.11
 
 jobs:
   checkpatch:
@@ -46,7 +46,7 @@ jobs:
   BUILD_EXAMPLES: ${{ contains(matrix.config.checks, 'examples') }}
   CC: ccache ${{ matrix.config.compiler }}
   DEF_LIB: ${{ matrix.config.library }}
-  LIBABIGAIL_VERSION: libabigail-2.4
+  LIBABIGAIL_VERSION: libabigail-2.6
   MINGW: ${{ matrix.config.cross == 'mingw' }}
   MINI: ${{ matrix.config.mini != '' }}
   PPC64LE: ${{ matrix.config.cross == 'ppc64le' }}
@@ -69,7 +69,7 @@ jobs:
 checks: stdatomic
   - os: ubuntu-22.04
 compiler: gcc
-checks: debug+doc+examples+tests
+checks: abi+debug+doc+examples+tests
   - os: ubuntu-22.04
 compiler: clang
 checks: asan+doc+tests
@@ -133,7 +133,7 @@ jobs:
 python3-pyelftools python3-setuptools python3-wheel zlib1g-dev
 - name: Install libabigail build dependencies if no cache is available
   if: env.ABI_CHECKS == 'true' && steps.libabigail-cache.outputs.cache-hit 
!= 'true'
-  run: sudo apt install -y autoconf automake libdw-dev libtool libxml2-dev
+  run: sudo apt install -y autoconf automake libdw-dev libtool libxml2-dev 
libxxhash-dev
 - name: Install i386 cross compiling packages
   if: env.BUILD_32BIT == 'true'
   run: sudo apt install -y gcc-multilib g++-multilib libnuma-dev:i386
diff --git a/ABI_VERSION b/ABI_VERSION
index be8e64f5a3..8b9bee5b58 100644
--- a/ABI_VERSION
+++ b/ABI_VERSION
@@ -1 +1 @@
-25.0
+25.1
diff --git a/VERSION b/VERSION
index 0a492611a0..04a8405dad 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-24.11.0
+25.03.0-rc0
diff --git a/doc/guides/rel_notes/index.rst b/doc/guides/rel_notes/index.rst
index 74ddae3e81..fc0309113e 100644
--- a/doc/guides/rel_notes/index.rst
+++ b/doc/guides/rel_notes/index.rst
@@ -8,6 +8,7 @@ Release Notes
 :maxdepth: 1
 :numbered:
 
+release_25_03
 release_24_11
 release_24_07
 release_24_03
diff --git a/doc/guides/rel_notes/release_25_03.rst 
b/doc/guides/rel_notes/release_25_03.rst
new file mode 100644
index 00..426dfcd982
--- /dev/null
+++ b/doc/guides/rel_notes/release_25_03.rst
@@ -0,0 +1,138 @@
+.. SPDX-License-Identifier: BSD-3-Clause
+   Copyright 2024 The DPDK contributors
+
+.. include:: 
+
+DPDK Release 25.03
+==
+
+.. **Read this first.**
+
+   The text in the sections below explains how to update the release notes.
+
+   Use proper spelling, capitalization and punctuation in all sections.
+
+   Variable and config names should be quoted as fixed width text:
+   ``LIKE_THIS``.
+
+   Build the docs and view the output file to ensure the changes are correct::
+
+  ninja -C build doc
+  xdg-open build/doc/guides/html/rel_notes/release_25_03.html
+
+
+New Features
+
+
+.. This section should contain new features added in this release.
+   Sample format:
+
+   * **Add a title in the past tense with a full stop.**
+
+ Add a short 1-2 sentence description in the past tense.
+ The description should be enough to allow someone scanning
+ the release notes to understand the new feature.
+
+ If the feature adds a lot of sub-features you can use a bullet list
+ like this:
+
+ * Added feature foo to do something.
+ * Enhanced feature bar to do something else.
+
+ Refer to the previous release notes for examples.
+
+ Suggested order in release notes items:
+ * Core libs (EAL, mempool, ring, mbuf, buses)
+ * Device abstraction libs and PMDs (ordered alphabetically by vendor name)
+   - ethdev (lib, PMDs)
+   - cryptodev (lib, PMDs)
+   - eventdev (lib, PMDs)
+   - etc
+ * Other libs
+ * Apps, Examples, Tools (if significant)
+
+ This section is a comment. Do not overwrite or remove it.
+ Also, make sure to start the actual text at the margin.
+ ===
+
+
+Removed Items
+-
+
+.. This section should contain removed items in this release. Sample format:
+
+   * Add a short 1-2 sentence description of the removed item
+ in the past tense.
+
+   This