[PATCH v2 1/2] test/dma: update the sg test to verify wrap around case

2024-04-19 Thread Vidya Sagar Velumuri
Run the sg test in a loop to verify wrap around case.
Total number commands submitted to be more than the number descriptors
allocated to verify the scenario.

Signed-off-by: Vidya Sagar Velumuri 
diff --git a/app/test/test_dmadev.c b/app/test/test_dmadev.c
index 143e1bcd68..7462e90831 100644
--- a/app/test/test_dmadev.c
+++ b/app/test/test_dmadev.c
@@ -393,34 +393,26 @@ test_stop_start(int16_t dev_id, uint16_t vchan)
 }
 
 static int
-test_enqueue_sg_copies(int16_t dev_id, uint16_t vchan)
+test_enqueue_sg(int16_t dev_id, uint16_t vchan, unsigned int n_sge, unsigned 
int test_len)
 {
-   unsigned int src_len, dst_len, n_sge, len, i, j, k;
char orig_src[COPY_LEN], orig_dst[COPY_LEN];
-   struct rte_dma_info info = { 0 };
+   unsigned int src_len, dst_len, i, j, k;
enum rte_dma_status_code status;
uint16_t id, n_src, n_dst;
 
-   if (rte_dma_info_get(dev_id, &info) < 0)
-   ERR_RETURN("Failed to get dev info");
-
-   if (info.max_sges < 2)
-   ERR_RETURN("Test needs minimum 2 SG pointers");
-
-   n_sge = info.max_sges;
-
for (n_src = 1; n_src <= n_sge; n_src++) {
for (n_dst = 1; n_dst <= n_sge; n_dst++) {
/* Normalize SG buffer lengths */
-   len = COPY_LEN;
-   len -= (len % (n_src * n_dst));
-   dst_len = len / n_dst;
-   src_len = len / n_src;
-
+   unsigned int len = test_len - (test_len % (n_src * 
n_dst));
struct rte_dma_sge sg_src[n_sge], sg_dst[n_sge];
struct rte_mbuf *src[n_sge], *dst[n_sge];
char *src_data[n_sge], *dst_data[n_sge];
 
+   dst_len = len / n_dst;
+   src_len = len / n_src;
+   if (dst_len == 0 || src_len == 0)
+   continue;
+
for (i = 0 ; i < len; i++)
orig_src[i] = rte_rand() & 0xFF;
 
@@ -511,6 +503,27 @@ test_enqueue_sg_copies(int16_t dev_id, uint16_t vchan)
return 0;
 }
 
+static int
+test_enqueue_sg_copies(int16_t dev_id, uint16_t vchan)
+{
+   struct rte_dma_info info = { 0 };
+   unsigned int n_sge, len;
+   int loop_count = 0;
+
+   if (rte_dma_info_get(dev_id, &info) < 0)
+   ERR_RETURN("Failed to get dev info");
+
+   n_sge = RTE_MIN(info.max_sges, TEST_SG_MAX);
+   len = COPY_LEN;
+
+   do {
+   test_enqueue_sg(dev_id, vchan, n_sge, len);
+   loop_count++;
+   } while (loop_count * n_sge * n_sge < TEST_RINGSIZE * 3);
+
+   return 0;
+}
+
 /* Failure handling test cases - global macros and variables for those tests*/
 #define COMP_BURST_SZ  16
 #define OPT_FENCE(idx) ((fence && idx == 8) ? RTE_DMA_OP_FLAG_FENCE : 0)
diff --git a/app/test/test_dmadev_api.c b/app/test/test_dmadev_api.c
index d40c05cfbf..6a07ed593b 100644
--- a/app/test/test_dmadev_api.c
+++ b/app/test/test_dmadev_api.c
@@ -16,7 +16,6 @@ extern int test_dma_api(uint16_t dev_id);
 
 #define TEST_MEMCPY_SIZE   1024
 #define TEST_WAIT_US_VAL   5
-#define TEST_SG_MAX64
 
 static int16_t test_dev_id;
 static int16_t invalid_dev_id;
diff --git a/app/test/test_dmadev_api.h b/app/test/test_dmadev_api.h
index 33fbc5bd41..a03f7acd4f 100644
--- a/app/test/test_dmadev_api.h
+++ b/app/test/test_dmadev_api.h
@@ -2,4 +2,6 @@
  * Copyright(c) 2021 HiSilicon Limited
  */
 
+#define TEST_SG_MAX64
+
 int test_dma_api(uint16_t dev_id);
-- 
2.25.1



[PATCH v2 2/2] test/dma: add functions to verify zero and one fill

2024-04-19 Thread Vidya Sagar Velumuri
Add test cases to verify zero fill and one fill

Signed-off-by: Vidya Sagar Velumuri 
diff --git a/app/test/test.h b/app/test/test.h
index 15e23d297f..0ca6519f6e 100644
--- a/app/test/test.h
+++ b/app/test/test.h
@@ -27,6 +27,10 @@
 
 #include 
 
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#endif
+
 #define TEST_ASSERT RTE_TEST_ASSERT
 
 #define TEST_ASSERT_EQUAL RTE_TEST_ASSERT_EQUAL
diff --git a/app/test/test_dmadev.c b/app/test/test_dmadev.c
index 7462e90831..ec896a4905 100644
--- a/app/test/test_dmadev.c
+++ b/app/test/test_dmadev.c
@@ -869,42 +869,51 @@ test_completion_handling(int16_t dev_id, uint16_t vchan)
 static int
 test_enqueue_fill(int16_t dev_id, uint16_t vchan)
 {
+   uint64_t pattern[3] = {0x0, 0xfedcba9876543210, 0x};
const unsigned int lengths[] = {8, 64, 1024, 50, 100, 89};
+   unsigned int i, j, k;
struct rte_mbuf *dst;
char *dst_data;
-   uint64_t pattern = 0xfedcba9876543210;
-   unsigned int i, j;
 
dst = rte_pktmbuf_alloc(pool);
if (dst == NULL)
ERR_RETURN("Failed to allocate mbuf\n");
dst_data = rte_pktmbuf_mtod(dst, char *);
 
-   for (i = 0; i < RTE_DIM(lengths); i++) {
-   /* reset dst_data */
-   memset(dst_data, 0, rte_pktmbuf_data_len(dst));
+   for (k = 0; k < ARRAY_SIZE(pattern); k++) {
+   for (i = 0; i < RTE_DIM(lengths); i++) {
+   /* reset dst_data */
+   memset(dst_data, 0, rte_pktmbuf_data_len(dst));
+
+   /* perform the fill operation */
+   int id = rte_dma_fill(dev_id, vchan, pattern[k],
+   rte_pktmbuf_iova(dst), lengths[i], 
RTE_DMA_OP_FLAG_SUBMIT);
+   if (id < 0) {
+   if (id == -ENOTSUP) {
+   rte_pktmbuf_free(dst);
+   break;
+   }
+   ERR_RETURN("Error with rte_dma_fill\n");
+   }
+   await_hw(dev_id, vchan);
 
-   /* perform the fill operation */
-   int id = rte_dma_fill(dev_id, vchan, pattern,
-   rte_pktmbuf_iova(dst), lengths[i], 
RTE_DMA_OP_FLAG_SUBMIT);
-   if (id < 0)
-   ERR_RETURN("Error with rte_dma_fill\n");
-   await_hw(dev_id, vchan);
+   if (rte_dma_completed(dev_id, vchan, 1, NULL, NULL) != 
1)
+   ERR_RETURN("Error: fill operation failed 
(length: %u)\n",
+  lengths[i]);
+   /* check the data from the fill operation is correct */
+   for (j = 0; j < lengths[i]; j++) {
+   char pat_byte = ((char *)&pattern[k])[j % 8];
 
-   if (rte_dma_completed(dev_id, vchan, 1, NULL, NULL) != 1)
-   ERR_RETURN("Error: fill operation failed (length: 
%u)\n", lengths[i]);
-   /* check the data from the fill operation is correct */
-   for (j = 0; j < lengths[i]; j++) {
-   char pat_byte = ((char *)&pattern)[j % 8];
-   if (dst_data[j] != pat_byte)
-   ERR_RETURN("Error with fill operation (lengths 
= %u): got (%x), not (%x)\n",
-   lengths[i], dst_data[j], 
pat_byte);
+   if (dst_data[j] != pat_byte)
+   ERR_RETURN("Error with fill operation 
(lengths = %u): got (%x), not (%x)\n",
+   lengths[i], 
dst_data[j], pat_byte);
+   }
+   /* check that the data after the fill operation was not 
written to */
+   for (; j < rte_pktmbuf_data_len(dst); j++)
+   if (dst_data[j] != 0)
+   ERR_RETURN("Error, fill operation wrote 
too far (lengths = %u): got (%x), not (%x)\n",
+   lengths[i], 
dst_data[j], 0);
}
-   /* check that the data after the fill operation was not written 
to */
-   for (; j < rte_pktmbuf_data_len(dst); j++)
-   if (dst_data[j] != 0)
-   ERR_RETURN("Error, fill operation wrote too far 
(lengths = %u): got (%x), not (%x)\n",
-   lengths[i], dst_data[j], 0);
}
 
rte_pktmbuf_free(dst);
-- 
2.25.1



Re: [v14 1/3] docs: AF_XDP Device Plugin

2024-04-19 Thread Ferruh Yigit
On 4/8/2024 2:09 PM, Maryam Tahhan wrote:
> diff --git a/doc/guides/howto/af_xdp_cni.rst b/doc/guides/howto/af_xdp_cni.rst
> deleted file mode 100644
> index a1a6d5b99c..00
> --- a/doc/guides/howto/af_xdp_cni.rst
> +++ /dev/null
> @@ -1,253 +0,0 @@
> -.. SPDX-License-Identifier: BSD-3-Clause
> -   Copyright(c) 2023 Intel Corporation.
> -
> -Using a CNI with the AF_XDP driver
> -==
> -
> -Introduction
> -
> -
> -CNI, the Container Network Interface, is a technology for configuring
> -container network interfaces
> -and which can be used to setup Kubernetes networking.
> -AF_XDP is a Linux socket Address Family that enables an XDP program
> -to redirect packets to a memory buffer in userspace.
> -
> -This document explains how to enable the `AF_XDP Plugin for Kubernetes`_ 
> within
> -a DPDK application using the :doc:`../nics/af_xdp` to connect and use these 
> technologies.
> -
> -.. _AF_XDP Plugin for Kubernetes: 
> https://github.com/intel/afxdp-plugins-for-kubernetes
> -

Hi Maryam, Ciara,

Above 'AF_XDP Plugin for Kubernetes' plugin seems archived in github.
Will it continue to be developed in redhat clone?


Re: [v14 0/3] net/af_xdp: fix multi interface support for K8s

2024-04-19 Thread Ferruh Yigit
On 4/8/2024 2:19 PM, Loftus, Ciara wrote:
>> The original `use_cni` implementation was limited to
>> supporting only a single netdev in a DPDK pod. This patchset
>> aims to fix this limitation transparently to the end user.
>> It will also enable compatibility with the latest AF_XDP
>> Device Plugin.
>>
>> Signed-off-by: Maryam Tahhan 
> Thanks Maryam.
> 
> For the series,
> Acked-by: Ciara Loftus 
>

Series applied to dpdk-next-net/main, thanks.


Re: [PATCH v2] app/testpmd: fix lcore ID restriction

2024-04-19 Thread Ferruh Yigit
On 4/16/2024 10:55 AM, Sivaprasad Tummala wrote:
> diff --git a/app/test-pmd/config.c b/app/test-pmd/config.c
> index ba1007ace6..6b28c22c96 100644
> --- a/app/test-pmd/config.c
> +++ b/app/test-pmd/config.c
> @@ -4785,9 +4785,9 @@ fwd_stream_on_other_lcores(uint16_t domain_id, 
> lcoreid_t src_lc,
>   continue;
>   printf("Shared Rx queue group %u queue %hu can't be 
> scheduled on different cores:\n",
>  share_group, share_rxq);
> - printf("  lcore %hhu Port %hu queue %hu\n",
> + printf("  lcore %u Port %hu queue %hu\n",
>  src_lc, src_port, src_rxq);
> - printf("  lcore %hhu Port %hu queue %hu\n",
> + printf("  lcore %u Port %hu queue %hu\n",
>  lc_id, fs->rx_port, fs->rx_queue);
>   printf("Please use --nb-cores=%hu to limit number of 
> forwarding cores\n",
>  nb_rxq);
> @@ -5159,7 +5159,7 @@ icmp_echo_config_setup(void)
>   lcoreid_t lc_id;
>   uint16_t  sm_id;
>  
> - if ((nb_txq * nb_fwd_ports) < nb_fwd_lcores)
> + if ((lcoreid_t)(nb_txq * nb_fwd_ports) < nb_fwd_lcores)
>   cur_fwd_config.nb_fwd_lcores = (lcoreid_t)
>   (nb_txq * nb_fwd_ports);
>

Hi Sivaprasad,

Is this '(lcoreid_t)' cast required? Because of integer promotion I
think result will be correct without casting.

(And without integer promotion considered, casting needs to be done on
one of the variables, not to the result, because result may be already
cast down I think. Anyway this is not required for this case since
variables are u16.)


RE: [PATCH v3 1/7] net/ice: fix check for outer UDP checksum offload

2024-04-19 Thread Morten Brørup
For the series,
Acked-by: Morten Brørup 



RE: [RFC 2/6] eal/common: remove VLA warnings

2024-04-19 Thread Morten Brørup
> From: Konstantin Ananyev [mailto:konstantin.v.anan...@yandex.ru]
> Sent: Thursday, 18 April 2024 12.33
> 
> From: Konstantin Ananyev 
> 
> 1) ../lib/eal/common/eal_common_proc.c:695:15: warning: variable length array
> used [-Wvla]
> char control[CMSG_SPACE(fd_size)];
>  ^~~
> 
> As msg->num_fds should not exceed RTE_MP_MAX_FD_NUM, replaced
> it with fixed size array.
> 
> Signed-off-by: Konstantin Ananyev 
> ---

Acked-by: Morten Brørup 



RE: [RFC 3/6] ethdev: remove VLA warnings

2024-04-19 Thread Morten Brørup
> From: Konstantin Ananyev [mailto:konstantin.v.anan...@yandex.ru]
> Sent: Thursday, 18 April 2024 12.33
> 
> From: Konstantin Ananyev 
> 
> 1) ./lib/ethdev/rte_ethdev.c:3244:16: warning: ISO C90 forbids variable length
> array ‘xstats_names’ [-Wvla]
> 2) ./lib/ethdev/rte_ethdev.c:3345:17: warning: ISO C90 forbids variable length
> array ‘ids_copy’ [-Wvla]
> 3) ./lib/ethdev/rte_ethdev.c:3538:16: warning: ISO C90 forbids variable length
> array ‘xstats’ [-Wvla]
> 4) ./lib/ethdev/rte_ethdev.c:3554:17: warning: ISO C90 forbids variable length
> array ‘ids_copy’ [-Wvla]
> 
> For 1) and 3) - just replaced VLA with arrays allocated from heap.
> As I understand xstats extraction belongs to control-path, so extra
> calloc/free is jopefully acceptable.
> Also ethdev xstats already doing that within
> rte_eth_xstats_get_id_by_name().

Getting names and getting values are two different things.
I would slightly prefer alloca() as VLA replacement for "xstats".

> For 2) and 4) changed the code to use fixed size array and call
> appropriate devops function several times, if needed.
> 
> Signed-off-by: Konstantin Ananyev 
> ---

With or without suggested change...
Acked-by: Morten Brørup 

Thank you very much for your work on getting rid of VLAs, Konstantin!



RE: [RFC 4/6] hash: remove VLA warnings

2024-04-19 Thread Morten Brørup
> From: Konstantin Ananyev 
> 
> 1) ./lib/hash/rte_cuckoo_hash.c:2362:9: warning: ISO C90 forbids variable
> length array ‘positions’ [-Wvla]
> 2) ../lib/hash/rte_cuckoo_hash.c:2478:9: warning: ISO C90 forbids variable
> length array ‘positions’ [-Wvla]
> 
> Both rte_hash_lookup_bulk_data() and
> rte_hash_lookup_with_hash_bulk_data() expect
> @num_keys <= RTE_HASH_LOOKUP_BULK_MAX.
> So, for both cases it should be safe to replace VLA with fixed size
> array.
> 
> Signed-off-by: Konstantin Ananyev 
> ---

Acked-by: Morten Brørup 



RE: [RFC 5/6] hash/thash: remove VLA warnings

2024-04-19 Thread Morten Brørup
> @@ -771,7 +771,7 @@ rte_thash_adjust_tuple(struct rte_thash_ctx *ctx,
>   uint32_t desired_value, unsigned int attempts,
>   rte_thash_check_tuple_t fn, void *userdata)
>  {
> - uint32_t tmp_tuple[tuple_len / sizeof(uint32_t)];
> + uint32_t tmp_tuple[RTE_THASH_MAX_L4_LEN];

Keep the "/ sizeof(uint32_t)" here, rather than folding it into 
RTE_THASH_MAX_L4_LEN.

At your preference, keep RTE_THASH_MAX_L4_LEN (removing "in dwords" from it 
comment) or simply use:
uint32_t tmp_tuple[sizeof(union rte_thash_tuple) / sizeof(uint32_t)];

>   unsigned int i, j, ret = 0;
>   uint32_t hash, adj_bits;
>   const uint8_t *hash_key;
> diff --git a/lib/hash/rte_thash.h b/lib/hash/rte_thash.h
> index 30b657e67a..322fe3af66 100644
> --- a/lib/hash/rte_thash.h
> +++ b/lib/hash/rte_thash.h
> @@ -108,6 +108,14 @@ union rte_thash_tuple {
>   struct rte_ipv6_tuple   v6;
>  };
> 
> +/**
> + * maximum length in dwords of input tuple to
> + * calculate hash of ipv(4|6) header +
> + * transport header
> + */
> +#define RTE_THASH_MAX_L4_LEN \
> + ((sizeof(union rte_thash_tuple)) / sizeof(uint32_t))
> +
>  /**
>   * Prepare special converted key to use with rte_softrss_be()
>   * @param orig
> --
> 2.35.3

With sizeof(uint32_t) back in tmp_tuple...
Acked-by: Morten Brørup 



RE: [RFC 6/6] rcu: remove VLA warnings

2024-04-19 Thread Morten Brørup
> --- a/lib/rcu/rte_rcu_qsbr.h
> +++ b/lib/rcu/rte_rcu_qsbr.h
> @@ -86,6 +86,11 @@ struct __rte_cache_aligned rte_rcu_qsbr_cnt {
>  #define __RTE_QSBR_CNT_MAX ((uint64_t)~0)
>  #define __RTE_QSBR_TOKEN_SIZE sizeof(uint64_t)
> 
> +/**
> + * Max allowable size (in bytes) of each element in the defer queue
> + */
> +#define RTE_QSBR_ESIZE_MAX   (2 * RTE_CACHE_LINE_MIN_SIZE)

Consider moving this to /config/rte_config.h

With or without suggested change...
Acked-By: Morten Brørup 



RE: [RFC 1/6] eal/linux: remove VLA warnings

2024-04-19 Thread Morten Brørup
> From: Konstantin Ananyev 
> 
> 1) ./lib/eal/linux/eal_interrupts.c:1073:16: warning: ISO C90 forbids variable
> length array ‘events’ [-Wvla]
> 
> eal_intr_handle_interrupts() is called by eal_intr_thread_main()
> so it seems ok to simply alloc space for events from heap and reuse the
> same buffer through the life of the thread.
> 
> 2) ./lib/eal/linux/eal_interrupts.c:1319:16: warning: ISO C90 forbids variable
> length array ‘evs’ [-Wvla]
> 
> make eal_epoll_wait() to use fixed size array and use it though multiple
> iterations to preocess upt to @maxevents events.
> Note that techically it is not one to one raplacement, as here we might
> reduce number of events returned by first call to epoll_wait(..., timeout);
> 
> Signed-off-by: Konstantin Ananyev 
> ---

Acked-by: Morten Brørup 



RE: [RFC 5/6] hash/thash: remove VLA warnings

2024-04-19 Thread Konstantin Ananyev


> > @@ -771,7 +771,7 @@ rte_thash_adjust_tuple(struct rte_thash_ctx *ctx,
> > uint32_t desired_value, unsigned int attempts,
> > rte_thash_check_tuple_t fn, void *userdata)
> >  {
> > -   uint32_t tmp_tuple[tuple_len / sizeof(uint32_t)];
> > +   uint32_t tmp_tuple[RTE_THASH_MAX_L4_LEN];
> 
> Keep the "/ sizeof(uint32_t)" here, rather than folding it into 
> RTE_THASH_MAX_L4_LEN.
> 
> At your preference, keep RTE_THASH_MAX_L4_LEN (removing "in dwords" from it 
> comment) or simply use:
> uint32_t tmp_tuple[sizeof(union rte_thash_tuple) / sizeof(uint32_t)];

Not sure I got you here...
You are not happy with word "dwords" or ...?
Yes, it is size in 4B elems...
Same as   other RTE_THASH_*_LEN macros in the same .h 
 
> 
> > unsigned int i, j, ret = 0;
> > uint32_t hash, adj_bits;
> > const uint8_t *hash_key;
> > diff --git a/lib/hash/rte_thash.h b/lib/hash/rte_thash.h
> > index 30b657e67a..322fe3af66 100644
> > --- a/lib/hash/rte_thash.h
> > +++ b/lib/hash/rte_thash.h
> > @@ -108,6 +108,14 @@ union rte_thash_tuple {
> > struct rte_ipv6_tuple   v6;
> >  };
> >
> > +/**
> > + * maximum length in dwords of input tuple to
> > + * calculate hash of ipv(4|6) header +
> > + * transport header
> > + */
> > +#define RTE_THASH_MAX_L4_LEN   \
> > +   ((sizeof(union rte_thash_tuple)) / sizeof(uint32_t))
> > +
> >  /**
> >   * Prepare special converted key to use with rte_softrss_be()
> >   * @param orig
> > --
> > 2.35.3
> 
> With sizeof(uint32_t) back in tmp_tuple...
> Acked-by: Morten Brørup 



RE: [RFC 5/6] hash/thash: remove VLA warnings

2024-04-19 Thread Morten Brørup
> > > @@ -771,7 +771,7 @@ rte_thash_adjust_tuple(struct rte_thash_ctx *ctx,
> > >   uint32_t desired_value, unsigned int attempts,
> > >   rte_thash_check_tuple_t fn, void *userdata)
> > >  {
> > > - uint32_t tmp_tuple[tuple_len / sizeof(uint32_t)];
> > > + uint32_t tmp_tuple[RTE_THASH_MAX_L4_LEN];
> >
> > Keep the "/ sizeof(uint32_t)" here, rather than folding it into
> RTE_THASH_MAX_L4_LEN.
> >
> > At your preference, keep RTE_THASH_MAX_L4_LEN (removing "in dwords" from it
> comment) or simply use:
> > uint32_t tmp_tuple[sizeof(union rte_thash_tuple) / sizeof(uint32_t)];
> 
> Not sure I got you here...
> You are not happy with word "dwords" or ...?
> Yes, it is size in 4B elems...
> Same as   other RTE_THASH_*_LEN macros in the same .h

Sorry, I missed those. I only looked at the .c file and the description of the 
tuple_len parameter in the .h file.

Then the RFC is Acked as is. :-)

> 
> >
> > >   unsigned int i, j, ret = 0;
> > >   uint32_t hash, adj_bits;
> > >   const uint8_t *hash_key;
> > > diff --git a/lib/hash/rte_thash.h b/lib/hash/rte_thash.h
> > > index 30b657e67a..322fe3af66 100644
> > > --- a/lib/hash/rte_thash.h
> > > +++ b/lib/hash/rte_thash.h
> > > @@ -108,6 +108,14 @@ union rte_thash_tuple {
> > >   struct rte_ipv6_tuple   v6;
> > >  };
> > >
> > > +/**
> > > + * maximum length in dwords of input tuple to
> > > + * calculate hash of ipv(4|6) header +
> > > + * transport header
> > > + */
> > > +#define RTE_THASH_MAX_L4_LEN \
> > > + ((sizeof(union rte_thash_tuple)) / sizeof(uint32_t))
> > > +
> > >  /**
> > >   * Prepare special converted key to use with rte_softrss_be()
> > >   * @param orig
> > > --
> > > 2.35.3
> >
> > With sizeof(uint32_t) back in tmp_tuple...
> > Acked-by: Morten Brørup 



[PATCH v1] dts: remove the OS UDP test suite

2024-04-19 Thread Juraj Linkeš
The test suite served as a demonstration of the Scapy traffic generator
implementation. Now that we have a test suite that uses DPDK code (via
testpmd), there is no reason to keep the test suite, as there's no
expectation it'll be actually used in any setup.

Signed-off-by: Juraj Linkeš 
---
 dts/conf.yaml|  1 -
 dts/framework/config/conf_yaml_schema.json   |  1 -
 dts/framework/test_suite.py  | 36 -
 dts/framework/testbed_model/linux_session.py | 27 +-
 dts/framework/testbed_model/node.py  | 29 +--
 dts/framework/testbed_model/os_session.py| 36 +
 dts/framework/testbed_model/sut_node.py  |  9 
 dts/tests/TestSuite_os_udp.py| 53 
 8 files changed, 4 insertions(+), 188 deletions(-)
 delete mode 100644 dts/tests/TestSuite_os_udp.py

diff --git a/dts/conf.yaml b/dts/conf.yaml
index 8068345dd5..d61fb39303 100644
--- a/dts/conf.yaml
+++ b/dts/conf.yaml
@@ -16,7 +16,6 @@ executions:
 skip_smoke_tests: false # optional
 test_suites: # the following test suites will be run in their entirety
   - hello_world
-  - os_udp
 # The machine running the DPDK test executable
 system_under_test_node:
   node_name: "SUT 1"
diff --git a/dts/framework/config/conf_yaml_schema.json 
b/dts/framework/config/conf_yaml_schema.json
index 4731f4511d..105671a639 100644
--- a/dts/framework/config/conf_yaml_schema.json
+++ b/dts/framework/config/conf_yaml_schema.json
@@ -186,7 +186,6 @@
   "type": "string",
   "enum": [
 "hello_world",
-"os_udp",
 "pmd_buffer_scatter"
   ]
 },
diff --git a/dts/framework/test_suite.py b/dts/framework/test_suite.py
index 9c3b516002..9758a283de 100644
--- a/dts/framework/test_suite.py
+++ b/dts/framework/test_suite.py
@@ -139,42 +139,6 @@ def tear_down_test_case(self) -> None:
 This is done after *each* test case.
 """
 
-def configure_testbed_ipv4(self, restore: bool = False) -> None:
-"""Configure IPv4 addresses on all testbed ports.
-
-The configured ports are:
-
-* SUT ingress port,
-* SUT egress port,
-* TG ingress port,
-* TG egress port.
-
-Args:
-restore: If :data:`True`, will remove the configuration instead.
-"""
-delete = True if restore else False
-enable = False if restore else True
-self._configure_ipv4_forwarding(enable)
-self.sut_node.configure_port_ip_address(
-self._sut_ip_address_egress, self._sut_port_egress, delete
-)
-self.sut_node.configure_port_state(self._sut_port_egress, enable)
-self.sut_node.configure_port_ip_address(
-self._sut_ip_address_ingress, self._sut_port_ingress, delete
-)
-self.sut_node.configure_port_state(self._sut_port_ingress, enable)
-self.tg_node.configure_port_ip_address(
-self._tg_ip_address_ingress, self._tg_port_ingress, delete
-)
-self.tg_node.configure_port_state(self._tg_port_ingress, enable)
-self.tg_node.configure_port_ip_address(
-self._tg_ip_address_egress, self._tg_port_egress, delete
-)
-self.tg_node.configure_port_state(self._tg_port_egress, enable)
-
-def _configure_ipv4_forwarding(self, enable: bool) -> None:
-self.sut_node.configure_ipv4_forwarding(enable)
-
 def send_packet_and_capture(
 self,
 packet: Packet,
diff --git a/dts/framework/testbed_model/linux_session.py 
b/dts/framework/testbed_model/linux_session.py
index 5d24030c3d..e3fd0534ce 100644
--- a/dts/framework/testbed_model/linux_session.py
+++ b/dts/framework/testbed_model/linux_session.py
@@ -10,8 +10,7 @@
 """
 
 import json
-from ipaddress import IPv4Interface, IPv6Interface
-from typing import TypedDict, Union
+from typing import TypedDict
 
 from typing_extensions import NotRequired
 
@@ -179,25 +178,6 @@ def _update_port_attr(self, port: Port, attr_value: str | 
None, attr_name: str)
 f"Attempted to get '{attr_name}' of port {port.pci}, but it 
doesn't exist."
 )
 
-def configure_port_state(self, port: Port, enable: bool) -> None:
-"""Overrides :meth:`~.os_session.OSSession.configure_port_state`."""
-state = "up" if enable else "down"
-self.send_command(f"ip link set dev {port.logical_name} {state}", 
privileged=True)
-
-def configure_port_ip_address(
-self,
-address: Union[IPv4Interface, IPv6Interface],
-port: Port,
-delete: bool,
-) -> None:
-"""Overrides 
:meth:`~.os_session.OSSession.configure_port_ip_address`."""
-command = "del" if delete else "add"
-self.send_command(
-f"ip address {command} {address} dev {port.logical_name}",
-privileged=True,
-verify=True,
-)
-
 def configure_port_mtu(self, mtu: int, port: Po

Re: [PATCH v3 0/2] Wangxun support vector Rx/Tx

2024-04-19 Thread Ferruh Yigit
On 4/19/2024 10:04 AM, Jiawen Wu wrote:
> Add SSE/NEON vector instructions for TXGBE and NGBE driver to process
> packets.
> 
> v3:
> - Update release note.
> - Use spaces instead of tab in meson.build.
> 
> v2:
> - Add performance test results.
> - Cleanup codes and rebase.
> - Remove GCC "-Wcast-qual".
> 
> Jiawen Wu (2):
>   net/txgbe: add vectorized functions for Rx/Tx
>   net/ngbe: add vectorized functions for Rx/Tx
>

Series applied to dpdk-next-net/main, thanks.


Re: [PATCH v2 00/83] move alignment attribute on types

2024-04-19 Thread David Marchand
Hello Tyler,

On Mon, Apr 15, 2024 at 10:05 PM Tyler Retzlaff
 wrote:
>
> The current location used for __rte_aligned(a) for alignment of types
> and variables is not compatible with MSVC. There is only a single
> location accepted by both toolchains.
>
> After having established this as the conventional standard for lib/*
> this series is intended to convert the remainder of the source tree to
> use the same location for __rte_aligned(a) and alignas(a) for
> consistency.
>
> v2:
>   * examples/ipsec remove alignment of pointers to size of a pointer
> explicit alignment is unnecessary.
>
> Tyler Retzlaff (83):
>   examples: move alignment attribute on types
>   net/ark: move alignment attribute on types
>   net/avp: move alignment attribute on types
>   net/axgbe: move alignment attribute on types
>   net/bnxt: move alignment attribute on types
>   net/bonding: move alignment attribute on types
>   net/cxgbe: move alignment attribute on types
>   net/e1000: move alignment attribute on types
>   net/ena: move alignment attribute on types
>   net/enic: move alignment attribute on types
>   net/fm10k: move alignment attribute on types
>   net/hinic: move alignment attribute on types
>   net/hns3: move alignment attribute on types
>   net/i40e: move alignment attribute on types
>   net/iavf: move alignment attribute on types
>   net/ice: move alignment attribute on types
>   net/igc: move alignment attribute on types
>   net/ionic: move alignment attribute on types
>   net/ixgbe: move alignment attribute on types
>   net/memif: move alignment attribute on types
>   net/mlx5: move alignment attribute on types
>   net/mlx4: move alignment attribute on types
>   net/mvpp2: move alignment attribute on types
>   net/netvsc: move alignment attribute on types
>   net/nfp: move alignment attribute on types
>   net/ngbe: move alignment attribute on types
>   net/octeontx: move alignment attribute on types
>   net/pfe: move alignment attribute on types
>   net/qede: move alignment attribute on types
>   net/softnic: move alignment attribute on types
>   net/tap: move alignment attribute on types
>   net/thunderx: move alignment attribute on types
>   net/txgbe: move alignment attribute on types
>   net/virtio: move alignment attribute on types
>   vdpa/mlx5: move alignment attribute on types
>   regex/cn9k: move alignment attribute on types
>   raw/ntb: move alignment attribute on types
>   ml/cnxk: move alignment attribute on types
>   mempool/cnxk: move alignment attribute on types
>   event/sw: move alignment attribute on types
>   event/skeleton: move alignment attribute on types
>   event/opdl: move alignment attribute on types
>   event/octeontx: move alignment attribute on types
>   event/dsw: move alignment attribute on types
>   event/dlb2: move alignment attribute on types
>   event/cnxk: move alignment attribute on types
>   dma/skeleton: move alignment attribute on types
>   dma/ioat: move alignment attribute on types
>   dma/idxd: move alignment attribute on types
>   crypto/uadk: move alignment attribute on types
>   crypto/scheduler: move alignment attribute on types
>   crypto/qat: move alignment attribute on types
>   crypto/openssl: move alignment attribute on types
>   crypto/octeontx: move alignment attribute on types
>   crypto/null: move alignment attribute on types
>   crypto/mvsam: move alignment attribute on types
>   crypto/mlx5: move alignment attribute on types
>   crypto/ipsec_mb: move alignment attribute on types
>   crypto/cnxk: move alignment attribute on types
>   crypto/ccp: move alignment attribute on types
>   crypto/caam_jr: move alignment attribute on types
>   crypto/bcmfs: move alignment attribute on types
>   crypto/armv8: move alignment attribute on types
>   compress/zlib: move alignment attribute on types
>   compress/qat: move alignment attribute on types
>   compress/octeontx: move alignment attribute on types
>   compress/nitrox: move alignment attribute on types
>   compress/isal: move alignment attribute on types
>   common/qat: move alignment attribute on types
>   common/mlx5: move alignment attribute on types
>   common/idpf: move alignment attribute on types
>   common/cpt: move alignment attribute on types
>   bus/fslmc: move alignment attribute on types
>   baseband/turbo_sw: move alignment attribute on types
>   baseband/null: move alignment attribute on types
>   app/test: move alignment attribute on types
>   app/test-pipeline: move alignment attribute on types
>   app/test-mldev: move alignment attribute on types
>   app/test-flow-perf: move alignment attribute on types
>   app/test-eventdev: move alignment attribute on types
>   app/pdump: move alignment attribute on types
>   app/graph: move alignment attribute on types
>   bus/dpaa: move alignment attribute on types
>
>  app/graph/ethdev_rx.h  |  4 +-
>  app/graph/ethdev_rx_priv.h |  4 +-
>  app/pdump/main.c   |  4 +-
>  a

Re: [PATCH v7] ethdev: fix strict aliasing lead to link cannot be up

2024-04-19 Thread Ferruh Yigit
On 4/18/2024 8:28 AM, Chengwen Feng wrote:
> @@ -1701,12 +1696,10 @@ static inline void
>  rte_eth_linkstatus_get(const struct rte_eth_dev *dev,
>  struct rte_eth_link *link)
>  {
> - RTE_ATOMIC(uint64_t) *src = (uint64_t __rte_atomic 
> *)&(dev->data->dev_link);
> - uint64_t *dst = (uint64_t *)link;
> -
> - RTE_BUILD_BUG_ON(sizeof(*link) != sizeof(uint64_t));
> -
> - *dst = rte_atomic_load_explicit(src, rte_memory_order_seq_cst);
> + rte_atomic_store_explicit(&link->val64,
> +   
> rte_atomic_load_explicit(&dev->data->dev_link.val64,
> +
> rte_memory_order_seq_cst),
> +   rte_memory_order_seq_cst);
>

Hi Chengwen,

Overall looks good to me, but to increase readability what do you think
to extract function call in the function param, like
```
struct rte_eth_link new_link;
new_link->val64 = rte_atomic_load_explicit(..dev_link.val64, ..);
rte_atomic_store_explicit(&link->val64, new_link->val64, ..);
```


Re: [PATCH v7] ethdev: fix strict aliasing lead to link cannot be up

2024-04-19 Thread Ferruh Yigit
On 4/18/2024 8:28 AM, Chengwen Feng wrote:
> Fix a problem introduced by a compiler upgrade (from gcc10 to gcc12.3),
> which will lead the hns3 NIC can't link up. The root cause is strict
> aliasing violation in rte_eth_linkstatus_set() with hns3 driver, see
> [1] for more details.
> 
> This commit use union to avoid such aliasing violation.
> 
> Note: DPDK CI report compiler error (see [2] for more details):
> ../drivers/net/cxgbe/cxgbe_ethdev.c:214:9: error: missing braces around
> initializer [-Werror=missing-braces]
>   struct rte_eth_link new_link = { 0 };
> The same error with qos_sched example:
> ../examples/qos_sched/init.c:338:10: error: missing braces around
> initializer [-Werror=missing-braces]
>struct rte_eth_link link = {0};
> So this commit replace { 0 } with memset in cxgbe and qos_sched.
> 

As this commit is already fixing the build errors, not sure if there is
a value to provide reference to errors, you can briefly describe change
something like:
"The impacted components have been adapted to the struct change."


> [1] Strict aliasing problem with rte_eth_linkstatus_set()
> https://marc.info/?l=dpdk-dev&m=171274148514777&w=3
>

I wasn't aware marc.info, but for consistency you can use DPDK mail list
archive, inbox.dpdk.org, like:
https://inbox.dpdk.org/dev/8175c905-e661-b910-7f20-59b6ab605...@huawei.com/

> [2] https://mails.dpdk.org/archives/test-report/2024-April/637966.html
> 
> Cc: sta...@dpdk.org
> 
> Signed-off-by: Chengwen Feng 
> Signed-off-by: Dengdui Huang 
> Acked-by: Morten Brørup 



[PATCH v4 0/6] latencystats: cleanup

2024-04-19 Thread Stephen Hemminger
Latencystats uses variable length array and floating point when they
are not necessary to acheive the same result. While testing also noticed
that the code was computing wrong values on my test system, and that
include files were missing.

v4 - review feedback and fix pedantic warnings

Stephen Hemminger (6):
  latencystats: replace use of VLA
  latencystats: handle fractional cycles per ns
  latencystats: do not use floating point
  latencystats: fix log messages
  latencystats: update include files
  latencystats: fix for pedantic warnings

 lib/latencystats/rte_latencystats.c | 182 +++-
 1 file changed, 96 insertions(+), 86 deletions(-)

-- 
2.43.0



[PATCH v4 1/6] latencystats: replace use of VLA

2024-04-19 Thread Stephen Hemminger
The temporary array latencystats is not needed if the algorithm
is converted into one pass.

Signed-off-by: Stephen Hemminger 
Acked-by: Morten Brørup 
Acked-by: Tyler Retzlaff 
---
 lib/latencystats/rte_latencystats.c | 31 +++--
 1 file changed, 16 insertions(+), 15 deletions(-)

diff --git a/lib/latencystats/rte_latencystats.c 
b/lib/latencystats/rte_latencystats.c
index 4ea9b0d75b..9b345bfb33 100644
--- a/lib/latencystats/rte_latencystats.c
+++ b/lib/latencystats/rte_latencystats.c
@@ -157,9 +157,9 @@ calc_latency(uint16_t pid __rte_unused,
uint16_t nb_pkts,
void *_ __rte_unused)
 {
-   unsigned int i, cnt = 0;
+   unsigned int i;
uint64_t now;
-   float latency[nb_pkts];
+   float latency;
static float prev_latency;
/*
 * Alpha represents degree of weighting decrease in EWMA,
@@ -169,13 +169,14 @@ calc_latency(uint16_t pid __rte_unused,
const float alpha = 0.2;
 
now = rte_rdtsc();
-   for (i = 0; i < nb_pkts; i++) {
-   if (pkts[i]->ol_flags & timestamp_dynflag)
-   latency[cnt++] = now - *timestamp_dynfield(pkts[i]);
-   }
 
rte_spinlock_lock(&glob_stats->lock);
-   for (i = 0; i < cnt; i++) {
+   for (i = 0; i < nb_pkts; i++) {
+   if (!(pkts[i]->ol_flags & timestamp_dynflag))
+   continue;
+
+   latency = now - *timestamp_dynfield(pkts[i]);
+
/*
 * The jitter is calculated as statistical mean of interpacket
 * delay variation. The "jitter estimate" is computed by taking
@@ -187,22 +188,22 @@ calc_latency(uint16_t pid __rte_unused,
 * Reference: Calculated as per RFC 5481, sec 4.1,
 * RFC 3393 sec 4.5, RFC 1889 sec.
 */
-   glob_stats->jitter +=  (fabsf(prev_latency - latency[i])
+   glob_stats->jitter +=  (fabsf(prev_latency - latency)
- glob_stats->jitter)/16;
if (glob_stats->min_latency == 0)
-   glob_stats->min_latency = latency[i];
-   else if (latency[i] < glob_stats->min_latency)
-   glob_stats->min_latency = latency[i];
-   else if (latency[i] > glob_stats->max_latency)
-   glob_stats->max_latency = latency[i];
+   glob_stats->min_latency = latency;
+   else if (latency < glob_stats->min_latency)
+   glob_stats->min_latency = latency;
+   else if (latency > glob_stats->max_latency)
+   glob_stats->max_latency = latency;
/*
 * The average latency is measured using exponential moving
 * average, i.e. using EWMA
 * https://en.wikipedia.org/wiki/Moving_average
 */
glob_stats->avg_latency +=
-   alpha * (latency[i] - glob_stats->avg_latency);
-   prev_latency = latency[i];
+   alpha * (latency - glob_stats->avg_latency);
+   prev_latency = latency;
}
rte_spinlock_unlock(&glob_stats->lock);
 
-- 
2.43.0



[PATCH v4 2/6] latencystats: handle fractional cycles per ns

2024-04-19 Thread Stephen Hemminger
The timer_hz is not always an integral number of nanoseconds.
For examples, cycles per nanoseconds on my test system is 2.8.
Fix by using floating point where needed and calculate value once.

Signed-off-by: Stephen Hemminger 
---
 lib/latencystats/rte_latencystats.c | 17 ++---
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/lib/latencystats/rte_latencystats.c 
b/lib/latencystats/rte_latencystats.c
index 9b345bfb33..55a099c818 100644
--- a/lib/latencystats/rte_latencystats.c
+++ b/lib/latencystats/rte_latencystats.c
@@ -18,12 +18,7 @@
 /** Nano seconds per second */
 #define NS_PER_SEC 1E9
 
-/** Clock cycles per nano second */
-static uint64_t
-latencystat_cycles_per_ns(void)
-{
-   return rte_get_timer_hz() / NS_PER_SEC;
-}
+static double cycles_per_ns;
 
 RTE_LOG_REGISTER_DEFAULT(latencystat_logtype, INFO);
 #define RTE_LOGTYPE_LATENCY_STATS latencystat_logtype
@@ -89,8 +84,7 @@ rte_latencystats_update(void)
for (i = 0; i < NUM_LATENCY_STATS; i++) {
stats_ptr = RTE_PTR_ADD(glob_stats,
lat_stats_strings[i].offset);
-   values[i] = (uint64_t)floor((*stats_ptr)/
-   latencystat_cycles_per_ns());
+   values[i] = floor(*stats_ptr / cycles_per_ns);
}
 
ret = rte_metrics_update_values(RTE_METRICS_GLOBAL,
@@ -112,8 +106,7 @@ rte_latencystats_fill_values(struct rte_metric_value 
*values)
stats_ptr = RTE_PTR_ADD(glob_stats,
lat_stats_strings[i].offset);
values[i].key = i;
-   values[i].value = (uint64_t)floor((*stats_ptr)/
-   latencystat_cycles_per_ns());
+   values[i].value = floor(*stats_ptr / cycles_per_ns);
}
 }
 
@@ -235,9 +228,11 @@ rte_latencystats_init(uint64_t app_samp_intvl,
return -ENOMEM;
}
 
+   cycles_per_ns = (double)rte_get_timer_hz() / NS_PER_SEC;
+
glob_stats = mz->addr;
rte_spinlock_init(&glob_stats->lock);
-   samp_intvl = app_samp_intvl * latencystat_cycles_per_ns();
+   samp_intvl = (uint64_t)(app_samp_intvl * cycles_per_ns);
 
/** Register latency stats with stats library */
for (i = 0; i < NUM_LATENCY_STATS; i++)
-- 
2.43.0



[PATCH v4 3/6] latencystats: do not use floating point

2024-04-19 Thread Stephen Hemminger
The cycle counts do not need to be stored as floating point.
Instead keep track of latency in cycles, and convert to
nanoseconds when read.

Change Exponential Weighted Moving Average weight from .2 to .25
to avoid use of floating point for that.

The average latency took too long to "warm up".
Do what RFC 6298 suggests and initialize on first sample.

Signed-off-by: Stephen Hemminger 
Acked-by: Tyler Retzlaff 
---
 lib/latencystats/rte_latencystats.c | 88 +++--
 1 file changed, 45 insertions(+), 43 deletions(-)

diff --git a/lib/latencystats/rte_latencystats.c 
b/lib/latencystats/rte_latencystats.c
index 55a099c818..6ef8e344bf 100644
--- a/lib/latencystats/rte_latencystats.c
+++ b/lib/latencystats/rte_latencystats.c
@@ -4,6 +4,7 @@
 
 #include 
 
+#include 
 #include 
 #include 
 #include 
@@ -42,10 +43,10 @@ static uint64_t timer_tsc;
 static uint64_t prev_tsc;
 
 struct rte_latency_stats {
-   float min_latency; /**< Minimum latency in nano seconds */
-   float avg_latency; /**< Average latency in nano seconds */
-   float max_latency; /**< Maximum latency in nano seconds */
-   float jitter; /** Latency variation */
+   uint64_t min_latency; /**< Minimum latency */
+   uint64_t avg_latency; /**< Average latency */
+   uint64_t max_latency; /**< Maximum latency */
+   uint64_t jitter; /** Latency variation */
rte_spinlock_t lock; /** Latency calculation lock */
 };
 
@@ -77,13 +78,12 @@ int32_t
 rte_latencystats_update(void)
 {
unsigned int i;
-   float *stats_ptr = NULL;
uint64_t values[NUM_LATENCY_STATS] = {0};
int ret;
 
for (i = 0; i < NUM_LATENCY_STATS; i++) {
-   stats_ptr = RTE_PTR_ADD(glob_stats,
-   lat_stats_strings[i].offset);
+   const uint64_t *stats_ptr
+   = RTE_PTR_ADD(glob_stats, lat_stats_strings[i].offset);
values[i] = floor(*stats_ptr / cycles_per_ns);
}
 
@@ -100,11 +100,10 @@ static void
 rte_latencystats_fill_values(struct rte_metric_value *values)
 {
unsigned int i;
-   float *stats_ptr = NULL;
 
for (i = 0; i < NUM_LATENCY_STATS; i++) {
-   stats_ptr = RTE_PTR_ADD(glob_stats,
-   lat_stats_strings[i].offset);
+   const uint64_t *stats_ptr
+   = RTE_PTR_ADD(glob_stats, lat_stats_strings[i].offset);
values[i].key = i;
values[i].value = floor(*stats_ptr / cycles_per_ns);
}
@@ -151,15 +150,9 @@ calc_latency(uint16_t pid __rte_unused,
void *_ __rte_unused)
 {
unsigned int i;
-   uint64_t now;
-   float latency;
-   static float prev_latency;
-   /*
-* Alpha represents degree of weighting decrease in EWMA,
-* a constant smoothing factor between 0 and 1. The value
-* is used below for measuring average latency.
-*/
-   const float alpha = 0.2;
+   uint64_t now, latency;
+   static uint64_t prev_latency;
+   static bool first_sample = true;
 
now = rte_rdtsc();
 
@@ -170,32 +163,41 @@ calc_latency(uint16_t pid __rte_unused,
 
latency = now - *timestamp_dynfield(pkts[i]);
 
-   /*
-* The jitter is calculated as statistical mean of interpacket
-* delay variation. The "jitter estimate" is computed by taking
-* the absolute values of the ipdv sequence and applying an
-* exponential filter with parameter 1/16 to generate the
-* estimate. i.e J=J+(|D(i-1,i)|-J)/16. Where J is jitter,
-* D(i-1,i) is difference in latency of two consecutive packets
-* i-1 and i.
-* Reference: Calculated as per RFC 5481, sec 4.1,
-* RFC 3393 sec 4.5, RFC 1889 sec.
-*/
-   glob_stats->jitter +=  (fabsf(prev_latency - latency)
-   - glob_stats->jitter)/16;
-   if (glob_stats->min_latency == 0)
-   glob_stats->min_latency = latency;
-   else if (latency < glob_stats->min_latency)
+   if (unlikely(first_sample)) {
+   first_sample = false;
+
glob_stats->min_latency = latency;
-   else if (latency > glob_stats->max_latency)
glob_stats->max_latency = latency;
-   /*
-* The average latency is measured using exponential moving
-* average, i.e. using EWMA
-* https://en.wikipedia.org/wiki/Moving_average
-*/
-   glob_stats->avg_latency +=
-   alpha * (latency - glob_stats->avg_latency);
+   glob_stats->avg_latency = latency;
+   glob_stats->jitter = latency / 2;
+   } else {
+ 

[PATCH v4 4/6] latencystats: fix log messages

2024-04-19 Thread Stephen Hemminger
All messages that because of an error should be at log level
NOTICE or above. Do not break log messages across lines.

Signed-off-by: Stephen Hemminger 
Acked-by: Tyler Retzlaff 
---
 lib/latencystats/rte_latencystats.c | 30 ++---
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/lib/latencystats/rte_latencystats.c 
b/lib/latencystats/rte_latencystats.c
index 6ef8e344bf..8aff96a449 100644
--- a/lib/latencystats/rte_latencystats.c
+++ b/lib/latencystats/rte_latencystats.c
@@ -243,7 +243,7 @@ rte_latencystats_init(uint64_t app_samp_intvl,
latency_stats_index = rte_metrics_reg_names(ptr_strings,
NUM_LATENCY_STATS);
if (latency_stats_index < 0) {
-   LATENCY_STATS_LOG(DEBUG,
+   LATENCY_STATS_LOG(ERR,
"Failed to register latency stats names");
return -1;
}
@@ -263,7 +263,7 @@ rte_latencystats_init(uint64_t app_samp_intvl,
 
ret = rte_eth_dev_info_get(pid, &dev_info);
if (ret != 0) {
-   LATENCY_STATS_LOG(INFO,
+   LATENCY_STATS_LOG(NOTICE,
"Error during getting device (port %u) info: 
%s",
pid, strerror(-ret));
 
@@ -275,18 +275,18 @@ rte_latencystats_init(uint64_t app_samp_intvl,
cbs->cb = rte_eth_add_first_rx_callback(pid, qid,
add_time_stamps, user_cb);
if (!cbs->cb)
-   LATENCY_STATS_LOG(INFO, "Failed to "
-   "register Rx callback for pid=%d, "
-   "qid=%d", pid, qid);
+   LATENCY_STATS_LOG(NOTICE,
+   "Failed to register Rx callback for 
pid=%u, qid=%u",
+   pid, qid);
}
for (qid = 0; qid < dev_info.nb_tx_queues; qid++) {
cbs = &tx_cbs[pid][qid];
cbs->cb =  rte_eth_add_tx_callback(pid, qid,
calc_latency, user_cb);
if (!cbs->cb)
-   LATENCY_STATS_LOG(INFO, "Failed to "
-   "register Tx callback for pid=%d, "
-   "qid=%d", pid, qid);
+   LATENCY_STATS_LOG(NOTICE,
+   "Failed to register Tx callback for 
pid=%u, qid=%u",
+   pid, qid);
}
}
return 0;
@@ -307,7 +307,7 @@ rte_latencystats_uninit(void)
 
ret = rte_eth_dev_info_get(pid, &dev_info);
if (ret != 0) {
-   LATENCY_STATS_LOG(INFO,
+   LATENCY_STATS_LOG(NOTICE,
"Error during getting device (port %u) info: 
%s",
pid, strerror(-ret));
 
@@ -318,17 +318,17 @@ rte_latencystats_uninit(void)
cbs = &rx_cbs[pid][qid];
ret = rte_eth_remove_rx_callback(pid, qid, cbs->cb);
if (ret)
-   LATENCY_STATS_LOG(INFO, "failed to "
-   "remove Rx callback for pid=%d, "
-   "qid=%d", pid, qid);
+   LATENCY_STATS_LOG(NOTICE,
+   "Failed to remove Rx callback for 
pid=%u, qid=%u",
+   pid, qid);
}
for (qid = 0; qid < dev_info.nb_tx_queues; qid++) {
cbs = &tx_cbs[pid][qid];
ret = rte_eth_remove_tx_callback(pid, qid, cbs->cb);
if (ret)
-   LATENCY_STATS_LOG(INFO, "failed to "
-   "remove Tx callback for pid=%d, "
-   "qid=%d", pid, qid);
+   LATENCY_STATS_LOG(NOTICE,
+   "Failed to remove Tx callback for 
pid=%u, qid=%u",
+   pid, qid);
}
}
 
-- 
2.43.0



[PATCH v4 5/6] latencystats: update include files

2024-04-19 Thread Stephen Hemminger
Include what is used here.

Signed-off-by: Stephen Hemminger 
---
 lib/latencystats/rte_latencystats.c | 18 +-
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/lib/latencystats/rte_latencystats.c 
b/lib/latencystats/rte_latencystats.c
index 8aff96a449..5db896ac7b 100644
--- a/lib/latencystats/rte_latencystats.c
+++ b/lib/latencystats/rte_latencystats.c
@@ -2,17 +2,25 @@
  * Copyright(c) 2018 Intel Corporation
  */
 
+#include 
 #include 
+#include 
+#include 
+#include 
 
 #include 
-#include 
-#include 
-#include 
 #include 
+#include 
+#include 
 #include 
-#include 
-#include 
 #include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
 
 #include "rte_latencystats.h"
 
-- 
2.43.0



[PATCH v4 6/6] latencystats: fix for pedantic warnings

2024-04-19 Thread Stephen Hemminger
ISO C does not allow casting function pointer to void *.
Resolve by enforcing the reserved argument.
The user_cb argument for rte_latencystats_init() was not
implemented, and had to be NULL anyway.

The log type is local to this function and RTE_LOG_REGISTER_DEFAULT
alread has a semicolon. So adding one here causes a warning
if compiler is set to pedantic.

Signed-off-by: Stephen Hemminger 
---
 lib/latencystats/rte_latencystats.c | 10 +++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/lib/latencystats/rte_latencystats.c 
b/lib/latencystats/rte_latencystats.c
index 5db896ac7b..f60c893199 100644
--- a/lib/latencystats/rte_latencystats.c
+++ b/lib/latencystats/rte_latencystats.c
@@ -29,7 +29,7 @@
 
 static double cycles_per_ns;
 
-RTE_LOG_REGISTER_DEFAULT(latencystat_logtype, INFO);
+static RTE_LOG_REGISTER_DEFAULT(latencystat_logtype, INFO)
 #define RTE_LOGTYPE_LATENCY_STATS latencystat_logtype
 #define LATENCY_STATS_LOG(level, ...) \
RTE_LOG_LINE(level, LATENCY_STATS, "" __VA_ARGS__)
@@ -229,6 +229,10 @@ rte_latencystats_init(uint64_t app_samp_intvl,
if (rte_memzone_lookup(MZ_RTE_LATENCY_STATS))
return -EEXIST;
 
+   /** Reserved for possible future use */
+   if (user_cb != NULL)
+   return -ENOTSUP;
+
/** Allocate stats in shared memory fo multi process support */
mz = rte_memzone_reserve(MZ_RTE_LATENCY_STATS, sizeof(*glob_stats),
rte_socket_id(), flags);
@@ -281,7 +285,7 @@ rte_latencystats_init(uint64_t app_samp_intvl,
for (qid = 0; qid < dev_info.nb_rx_queues; qid++) {
cbs = &rx_cbs[pid][qid];
cbs->cb = rte_eth_add_first_rx_callback(pid, qid,
-   add_time_stamps, user_cb);
+   add_time_stamps, NULL);
if (!cbs->cb)
LATENCY_STATS_LOG(NOTICE,
"Failed to register Rx callback for 
pid=%u, qid=%u",
@@ -290,7 +294,7 @@ rte_latencystats_init(uint64_t app_samp_intvl,
for (qid = 0; qid < dev_info.nb_tx_queues; qid++) {
cbs = &tx_cbs[pid][qid];
cbs->cb =  rte_eth_add_tx_callback(pid, qid,
-   calc_latency, user_cb);
+   calc_latency, NULL);
if (!cbs->cb)
LATENCY_STATS_LOG(NOTICE,
"Failed to register Tx callback for 
pid=%u, qid=%u",
-- 
2.43.0



RE: [PATCH v4 3/6] latencystats: do not use floating point

2024-04-19 Thread Morten Brørup
> + if (unlikely(first_sample)) {
> + first_sample = false;
> +
>   glob_stats->min_latency = latency;
> - else if (latency > glob_stats->max_latency)
>   glob_stats->max_latency = latency;
> - /*
> -  * The average latency is measured using exponential moving
> -  * average, i.e. using EWMA
> -  * https://en.wikipedia.org/wiki/Moving_average
> -  */
> - glob_stats->avg_latency +=
> - alpha * (latency - glob_stats->avg_latency);
> + glob_stats->avg_latency = latency;
> + glob_stats->jitter = latency / 2;

Setting jitter at first sample as latency / 2 is wrong.
Jitter should remain zero at first sample.

> + } else {
> + /*
> +  * The jitter is calculated as statistical mean of
> interpacket
> +  * delay variation. The "jitter estimate" is computed
> by taking
> +  * the absolute values of the ipdv sequence and
> applying an
> +  * exponential filter with parameter 1/16 to generate
> the
> +  * estimate. i.e J=J+(|D(i-1,i)|-J)/16. Where J is
> jitter,
> +  * D(i-1,i) is difference in latency of two
> consecutive packets
> +  * i-1 and i.
> +  * Reference: Calculated as per RFC 5481, sec 4.1,
> +  * RFC 3393 sec 4.5, RFC 1889 sec.
> +  */
> + glob_stats->jitter += ((prev_latency - latency)
> +- glob_stats->jitter) / 16;

With jitter remaining zero at first sample,
Acked-by: Morten Brørup 



[PATCH 0/6] crypto/ionic: introduce AMD Pensando ionic crypto driver

2024-04-19 Thread Andrew Boyer
This patchset introduces a new crypto PMD for AMD Pensando hardware
accelerators. It allows applications running directly on the AMD Pensando
DSC to offload cryptographic operations to hardware cryptographic blocks.

Andrew Boyer (6):
  crypto/ionic: introduce AMD Pensando ionic crypto driver
  crypto/ionic: add device and admin command handlers
  common/ionic: add crypto vdev support
  crypto/ionic: add device object and vdev support
  crypto/ionic: add datapath and capabilities support
  crypto/ionic: add documentation and connect to build

 MAINTAINERS  |7 +
 doc/guides/cryptodevs/features/ionic.ini |   40 +
 doc/guides/cryptodevs/index.rst  |1 +
 doc/guides/cryptodevs/ionic.rst  |   39 +
 drivers/common/ionic/ionic_common.h  |2 +
 drivers/common/ionic/ionic_common_uio.c  |   48 +-
 drivers/common/ionic/version.map |1 +
 drivers/crypto/ionic/ionic_crypto.h  |  361 
 drivers/crypto/ionic/ionic_crypto_caps.c |   55 ++
 drivers/crypto/ionic/ionic_crypto_cmds.c |  651 ++
 drivers/crypto/ionic/ionic_crypto_if.h   | 1021 ++
 drivers/crypto/ionic/ionic_crypto_main.c |  993 +
 drivers/crypto/ionic/ionic_crypto_ops.c  |  606 +
 drivers/crypto/ionic/ionic_crypto_vdev.c |  128 +++
 drivers/crypto/ionic/meson.build |   16 +
 drivers/crypto/meson.build   |1 +
 16 files changed, 3969 insertions(+), 1 deletion(-)
 create mode 100644 doc/guides/cryptodevs/features/ionic.ini
 create mode 100644 doc/guides/cryptodevs/ionic.rst
 create mode 100644 drivers/crypto/ionic/ionic_crypto.h
 create mode 100644 drivers/crypto/ionic/ionic_crypto_caps.c
 create mode 100644 drivers/crypto/ionic/ionic_crypto_cmds.c
 create mode 100644 drivers/crypto/ionic/ionic_crypto_if.h
 create mode 100644 drivers/crypto/ionic/ionic_crypto_main.c
 create mode 100644 drivers/crypto/ionic/ionic_crypto_ops.c
 create mode 100644 drivers/crypto/ionic/ionic_crypto_vdev.c
 create mode 100644 drivers/crypto/ionic/meson.build

-- 
2.17.1



[PATCH 1/6] crypto/ionic: introduce AMD Pensando ionic crypto driver

2024-04-19 Thread Andrew Boyer
Introduce a new crypto PMD for AMD Pensando hardware accelerators. It
allows applications running directly on the AMD Pensando DSC to offload
cryptographic operations to hardware cryptographic blocks.

This commit adds the firmware interface definition file.

Signed-off-by: Andrew Boyer 
---
 drivers/crypto/ionic/ionic_crypto_if.h | 1021 
 1 file changed, 1021 insertions(+)
 create mode 100644 drivers/crypto/ionic/ionic_crypto_if.h

diff --git a/drivers/crypto/ionic/ionic_crypto_if.h 
b/drivers/crypto/ionic/ionic_crypto_if.h
new file mode 100644
index 00..ea418f3d4b
--- /dev/null
+++ b/drivers/crypto/ionic/ionic_crypto_if.h
@@ -0,0 +1,1021 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2021-2024 Advanced Micro Devices, Inc.
+ */
+
+#ifndef _IONIC_CRYPTO_IF_H_
+#define _IONIC_CRYPTO_IF_H_
+
+#define IOCPT_DEV_INFO_SIGNATURE   0x43585660  /* 'CRPT' */
+#define IOCPT_DEV_INFO_VERSION 1
+#define IOCPT_IFNAMSIZ 16
+
+/**
+ * enum iocpt_cmd_opcode - Device commands
+ */
+enum iocpt_cmd_opcode {
+   IOCPT_CMD_NOP   = 0,/* D, A */
+
+   /* Device commands */
+   IOCPT_CMD_IDENTIFY  = 1,/* D */
+   IOCPT_CMD_RESET = 3,/* D */
+
+   /* LIF commands */
+   IOCPT_CMD_LIF_IDENTIFY  = 20,   /* D */
+   IOCPT_CMD_LIF_INIT  = 21,   /* D */
+   IOCPT_CMD_LIF_RESET = 22,   /* D */
+   IOCPT_CMD_LIF_GETATTR   = 23,   /* D, A */
+   IOCPT_CMD_LIF_SETATTR   = 24,   /* D, A */
+
+   /* Queue commands */
+   IOCPT_CMD_Q_IDENTIFY= 39,   /* D, A */
+   IOCPT_CMD_Q_INIT= 40,   /* D, A */
+   IOCPT_CMD_Q_CONTROL = 41,   /* D, A */
+
+   /* Session commands */
+   IOCPT_CMD_SESS_CONTROL  = 45,   /* D, A */
+};
+
+/**
+ * enum iocpt_status_code - Device command return codes
+ */
+enum iocpt_status_code {
+   IOCPT_RC_SUCCESS= 0,/* Success */
+   IOCPT_RC_EVERSION   = 1,/* Incorrect version for request */
+   IOCPT_RC_EOPCODE= 2,/* Invalid cmd opcode */
+   IOCPT_RC_EIO= 3,/* I/O error */
+   IOCPT_RC_EPERM  = 4,/* Permission denied */
+   IOCPT_RC_EQID   = 5,/* Bad qid */
+   IOCPT_RC_EQTYPE = 6,/* Bad qtype */
+   IOCPT_RC_ENOENT = 7,/* No such element */
+   IOCPT_RC_EINTR  = 8,/* Operation interrupted */
+   IOCPT_RC_EAGAIN = 9,/* Try again */
+   IOCPT_RC_ENOMEM = 10,   /* Out of memory */
+   IOCPT_RC_EFAULT = 11,   /* Bad address */
+   IOCPT_RC_EBUSY  = 12,   /* Device or resource busy */
+   IOCPT_RC_EEXIST = 13,   /* Object already exists */
+   IOCPT_RC_EINVAL = 14,   /* Invalid argument */
+   IOCPT_RC_ENOSPC = 15,   /* No space left or alloc failure */
+   IOCPT_RC_ERANGE = 16,   /* Parameter out of range */
+   IOCPT_RC_BAD_ADDR   = 17,   /* Descriptor contains a bad ptr */
+   IOCPT_RC_DEV_CMD= 18,   /* Device cmd attempted on AdminQ */
+   IOCPT_RC_ENOSUPP= 19,   /* Operation not supported */
+   IOCPT_RC_ERROR  = 29,   /* Generic error */
+};
+
+enum iocpt_notifyq_opcode {
+   IOCPT_EVENT_RESET   = 1,
+   IOCPT_EVENT_HEARTBEAT   = 2,
+   IOCPT_EVENT_LOG = 3,
+};
+
+enum iocpt_lif_type {
+   IOCPT_LIF_TYPE_DEFAULT = 0,
+};
+
+/**
+ * struct iocpt_admin_cmd - General admin command format
+ * @opcode:Opcode for the command
+ * @lif_index: LIF index
+ * @cmd_data:  Opcode-specific command bytes
+ */
+struct iocpt_admin_cmd {
+   u8 opcode;
+   u8 rsvd;
+   __le16 lif_index;
+   u8 cmd_data[60];
+};
+
+/**
+ * struct iocpt_admin_comp - General admin command completion format
+ * @status: Status of the command (enum iocpt_status_code)
+ * @comp_index: Index in the descriptor ring for which this is the completion
+ * @cmd_data:   Command-specific bytes
+ * @color:  Color bit (Always 0 for commands issued to the
+ *  Device Cmd Registers)
+ */
+struct iocpt_admin_comp {
+   u8 status;
+   u8 rsvd;
+   __le16 comp_index;
+   u8 cmd_data[11];
+   u8 color;
+#define IOCPT_COMP_COLOR_MASK  0x80
+};
+
+static inline u8 iocpt_color_match(u8 color, u8 done_color)
+{
+   return (!!(color & IOCPT_COMP_COLOR_MASK)) == done_color;
+}
+
+/**
+ * struct iocpt_nop_cmd - NOP command
+ * @opcode:Opcode
+ */
+struct iocpt_nop_cmd {
+   u8 opcode;
+   u8 rsvd[63];
+};
+
+/**
+ * struct iocpt_nop_comp - NOP command completion
+ * @status:Status of the command (enum iocpt_status_code)
+ */
+stru

[PATCH 2/6] crypto/ionic: add device and admin command handlers

2024-04-19 Thread Andrew Boyer
This defines the handlers used for device (register-based) and
admin (adminq-based) commands.

Signed-off-by: Andrew Boyer 
---
 drivers/crypto/ionic/ionic_crypto.h  | 210 
 drivers/crypto/ionic/ionic_crypto_cmds.c | 651 +++
 drivers/crypto/ionic/ionic_crypto_main.c |  42 ++
 drivers/crypto/ionic/meson.build |  12 +
 4 files changed, 915 insertions(+)
 create mode 100644 drivers/crypto/ionic/ionic_crypto.h
 create mode 100644 drivers/crypto/ionic/ionic_crypto_cmds.c
 create mode 100644 drivers/crypto/ionic/ionic_crypto_main.c
 create mode 100644 drivers/crypto/ionic/meson.build

diff --git a/drivers/crypto/ionic/ionic_crypto.h 
b/drivers/crypto/ionic/ionic_crypto.h
new file mode 100644
index 00..958e611337
--- /dev/null
+++ b/drivers/crypto/ionic/ionic_crypto.h
@@ -0,0 +1,210 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2021-2024 Advanced Micro Devices, Inc.
+ */
+
+#ifndef _IONIC_CRYPTO_H_
+#define _IONIC_CRYPTO_H_
+
+#include 
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "ionic_common.h"
+#include "ionic_crypto_if.h"
+#include "ionic_regs.h"
+
+#define IOCPT_ADMINQ_LENGTH16  /* must be a power of two */
+
+#define IOCPT_CRYPTOQ_WAIT 10  /* 1s */
+
+extern int iocpt_logtype;
+#define RTE_LOGTYPE_IOCPT iocpt_logtype
+
+#define IOCPT_PRINT(level, ...)
\
+   RTE_LOG_LINE_PREFIX(level, IOCPT, "%s(): ", __func__, __VA_ARGS__)
+
+#define IOCPT_PRINT_CALL() IOCPT_PRINT(DEBUG, " >>")
+
+struct iocpt_qtype_info {
+   uint8_t  version;
+   uint8_t  supported;
+   uint64_t features;
+   uint16_t desc_sz;
+   uint16_t comp_sz;
+   uint16_t sg_desc_sz;
+   uint16_t max_sg_elems;
+   uint16_t sg_desc_stride;
+};
+
+#define IOCPT_Q_F_INITED   BIT(0)
+#define IOCPT_Q_F_DEFERRED BIT(1)
+#define IOCPT_Q_F_SG   BIT(2)
+
+#define Q_NEXT_TO_POST(_q, _n) (((_q)->head_idx + (_n)) & ((_q)->size_mask))
+#define Q_NEXT_TO_SRVC(_q, _n) (((_q)->tail_idx + (_n)) & ((_q)->size_mask))
+
+#define IOCPT_INFO_SZ(_q)  ((_q)->num_segs * sizeof(void *))
+#define IOCPT_INFO_IDX(_q, _i) ((_i) * (_q)->num_segs)
+#define IOCPT_INFO_PTR(_q, _i) (&(_q)->info[IOCPT_INFO_IDX((_q), _i)])
+
+struct iocpt_queue {
+   uint16_t num_descs;
+   uint16_t num_segs;
+   uint16_t head_idx;
+   uint16_t tail_idx;
+   uint16_t size_mask;
+   uint8_t type;
+   uint8_t hw_type;
+   void *base;
+   void *sg_base;
+   struct ionic_doorbell __iomem *db;
+   void **info;
+
+   uint32_t index;
+   uint32_t hw_index;
+   rte_iova_t base_pa;
+   rte_iova_t sg_base_pa;
+};
+
+struct iocpt_cq {
+   uint16_t tail_idx;
+   uint16_t num_descs;
+   uint16_t size_mask;
+   bool done_color;
+   void *base;
+   rte_iova_t base_pa;
+};
+
+#define IOCPT_COMMON_FIELDS\
+   struct iocpt_queue q;   \
+   struct iocpt_cq cq; \
+   struct iocpt_dev *dev;  \
+   const struct rte_memzone *base_z;   \
+   void *base; \
+   rte_iova_t base_pa
+
+struct iocpt_common_q {
+   IOCPT_COMMON_FIELDS;
+};
+
+struct iocpt_admin_q {
+   IOCPT_COMMON_FIELDS;
+
+   uint16_t flags;
+};
+
+#define IOCPT_DEV_F_INITED BIT(0)
+#define IOCPT_DEV_F_UP BIT(1)
+#define IOCPT_DEV_F_FW_RESET   BIT(2)
+
+/* Combined dev / LIF object */
+struct iocpt_dev {
+   const char *name;
+   char fw_version[IOCPT_FWVERS_BUFLEN];
+   struct iocpt_identity ident;
+
+   void *bus_dev;
+   struct rte_cryptodev *crypto_dev;
+
+   union iocpt_dev_info_regs __iomem *dev_info;
+   union iocpt_dev_cmd_regs __iomem *dev_cmd;
+
+   struct ionic_doorbell __iomem *db_pages;
+   struct ionic_intr __iomem *intr_ctrl;
+
+   uint32_t max_qps;
+   uint32_t max_sessions;
+   uint16_t state;
+   uint8_t driver_id;
+   uint8_t socket_id;
+
+   rte_spinlock_t adminq_lock;
+   rte_spinlock_t adminq_service_lock;
+
+   struct iocpt_admin_q *adminq;
+
+   uint64_t features;
+   uint32_t hw_features;
+
+   uint32_t info_sz;
+   struct iocpt_lif_info *info;
+   rte_iova_t info_pa;
+   const struct rte_memzone *info_z;
+
+   struct iocpt_qtype_info qtype_info[IOCPT_QTYPE_MAX];
+   uint8_t qtype_ver[IOCPT_QTYPE_MAX];
+
+   struct rte_cryptodev_stats stats_base;
+};
+
+/** iocpt_admin_ctx - Admin command context.
+ * @pending_work:  Flag that indicates a completion.
+ * @cmd:   Admin command (64B) to be copied to the queue.
+ * @comp:  Admin completion (16B) copied from the queue.
+ */
+struct iocpt_admin_ctx {
+   bool pending_work;
+   union iocpt_adminq_cmd cmd;

[PATCH 3/6] common/ionic: add crypto vdev support

2024-04-19 Thread Andrew Boyer
This adds support for cryptodevs to the common ionic library.

Signed-off-by: Andrew Boyer 
---
 drivers/common/ionic/ionic_common.h |  2 ++
 drivers/common/ionic/ionic_common_uio.c | 48 -
 drivers/common/ionic/version.map|  1 +
 3 files changed, 50 insertions(+), 1 deletion(-)

diff --git a/drivers/common/ionic/ionic_common.h 
b/drivers/common/ionic/ionic_common.h
index eb4850e24c..c4a15fdf2b 100644
--- a/drivers/common/ionic/ionic_common.h
+++ b/drivers/common/ionic/ionic_common.h
@@ -32,6 +32,8 @@ struct ionic_dev_bar {
 
 __rte_internal
 void ionic_uio_scan_mnet_devices(void);
+__rte_internal
+void ionic_uio_scan_mcrypt_devices(void);
 
 __rte_internal
 void ionic_uio_get_rsrc(const char *name, int idx, struct ionic_dev_bar *bar);
diff --git a/drivers/common/ionic/ionic_common_uio.c 
b/drivers/common/ionic/ionic_common_uio.c
index e5c73faf96..c647b22eaf 100644
--- a/drivers/common/ionic/ionic_common_uio.c
+++ b/drivers/common/ionic/ionic_common_uio.c
@@ -23,10 +23,12 @@
 
 #define IONIC_MDEV_UNK  "mdev_unknown"
 #define IONIC_MNIC  "cpu_mnic"
+#define IONIC_MCRYPT"cpu_mcrypt"
 
 #define IONIC_MAX_NAME_LEN  20
 #define IONIC_MAX_MNETS 5
-#define IONIC_MAX_DEVICES   (IONIC_MAX_MNETS)
+#define IONIC_MAX_MCPTS 1
+#define IONIC_MAX_DEVICES   (IONIC_MAX_MNETS + IONIC_MAX_MCPTS)
 #define IONIC_MAX_U16_IDX   0x
 #define IONIC_UIO_MAX_TRIES 32
 
@@ -49,6 +51,7 @@ struct ionic_map_tbl ionic_mdev_map[IONIC_MAX_DEVICES] = {
{ "net_ionic2", 2, IONIC_MAX_U16_IDX, IONIC_MDEV_UNK },
{ "net_ionic3", 3, IONIC_MAX_U16_IDX, IONIC_MDEV_UNK },
{ "net_ionic4", 4, IONIC_MAX_U16_IDX, IONIC_MDEV_UNK },
+   { "crypto_ionic0", 5, IONIC_MAX_U16_IDX, IONIC_MDEV_UNK },
 };
 
 struct uio_name {
@@ -143,6 +146,49 @@ ionic_uio_scan_mnet_devices(void)
}
 }
 
+void
+ionic_uio_scan_mcrypt_devices(void)
+{
+   struct ionic_map_tbl *map;
+   char devname[IONIC_MAX_NAME_LEN];
+   struct uio_name name_cache[IONIC_MAX_DEVICES];
+   bool done;
+   int mdev_idx = 0;
+   int uio_idx;
+   int i;
+   static bool scan_done;
+
+   if (scan_done)
+   return;
+
+   scan_done = true;
+
+   uio_fill_name_cache(name_cache, IONIC_MCRYPT);
+
+   for (i = IONIC_MAX_MNETS; i < IONIC_MAX_DEVICES; i++) {
+   done = false;
+
+   while (!done) {
+   if (mdev_idx > IONIC_MAX_MDEV_SCAN)
+   break;
+
+   /* Look for a matching mcrypt */
+   snprintf(devname, IONIC_MAX_NAME_LEN,
+   IONIC_MCRYPT "%d", mdev_idx);
+   uio_idx = uio_get_idx_for_devname(name_cache, devname);
+   if (uio_idx >= 0) {
+   map = &ionic_mdev_map[i];
+   map->uio_idx = (uint16_t)uio_idx;
+   strlcpy(map->mdev_name, devname,
+   IONIC_MAX_NAME_LEN);
+   done = true;
+   }
+
+   mdev_idx++;
+   }
+   }
+}
+
 static int
 uio_get_multi_dev_uionum(const char *name)
 {
diff --git a/drivers/common/ionic/version.map b/drivers/common/ionic/version.map
index 484330c437..db532d4ffc 100644
--- a/drivers/common/ionic/version.map
+++ b/drivers/common/ionic/version.map
@@ -2,6 +2,7 @@ INTERNAL {
global:
 
ionic_uio_scan_mnet_devices;
+   ionic_uio_scan_mcrypt_devices;
ionic_uio_get_rsrc;
ionic_uio_rel_rsrc;
 
-- 
2.17.1



[PATCH 4/6] crypto/ionic: add device object and vdev support

2024-04-19 Thread Andrew Boyer
This defines the main device object routines and the vdev
support code. The vdev code uses the common library.

Signed-off-by: Andrew Boyer 
---
 drivers/crypto/ionic/ionic_crypto.h  |  89 
 drivers/crypto/ionic/ionic_crypto_main.c | 538 +++
 drivers/crypto/ionic/ionic_crypto_vdev.c | 128 ++
 drivers/crypto/ionic/meson.build |   2 +
 4 files changed, 757 insertions(+)
 create mode 100644 drivers/crypto/ionic/ionic_crypto_vdev.c

diff --git a/drivers/crypto/ionic/ionic_crypto.h 
b/drivers/crypto/ionic/ionic_crypto.h
index 958e611337..d048f7aa51 100644
--- a/drivers/crypto/ionic/ionic_crypto.h
+++ b/drivers/crypto/ionic/ionic_crypto.h
@@ -20,6 +20,11 @@
 #include "ionic_crypto_if.h"
 #include "ionic_regs.h"
 
+/* Devargs */
+/* NONE */
+
+#define IOCPT_MAX_RING_DESC32768
+#define IOCPT_MIN_RING_DESC16
 #define IOCPT_ADMINQ_LENGTH16  /* must be a power of two */
 
 #define IOCPT_CRYPTOQ_WAIT 10  /* 1s */
@@ -32,6 +37,64 @@ extern int iocpt_logtype;
 
 #define IOCPT_PRINT_CALL() IOCPT_PRINT(DEBUG, " >>")
 
+static inline void iocpt_struct_size_checks(void)
+{
+   RTE_BUILD_BUG_ON(sizeof(struct ionic_doorbell) != 8);
+   RTE_BUILD_BUG_ON(sizeof(struct ionic_intr) != 32);
+   RTE_BUILD_BUG_ON(sizeof(struct ionic_intr_status) != 8);
+
+   RTE_BUILD_BUG_ON(sizeof(union iocpt_dev_regs) != 4096);
+   RTE_BUILD_BUG_ON(sizeof(union iocpt_dev_info_regs) != 2048);
+   RTE_BUILD_BUG_ON(sizeof(union iocpt_dev_cmd_regs) != 2048);
+
+   RTE_BUILD_BUG_ON(sizeof(struct iocpt_admin_cmd) != 64);
+   RTE_BUILD_BUG_ON(sizeof(struct iocpt_admin_comp) != 16);
+   RTE_BUILD_BUG_ON(sizeof(struct iocpt_nop_cmd) != 64);
+   RTE_BUILD_BUG_ON(sizeof(struct iocpt_nop_comp) != 16);
+
+   /* Device commands */
+   RTE_BUILD_BUG_ON(sizeof(struct iocpt_dev_identify_cmd) != 64);
+   RTE_BUILD_BUG_ON(sizeof(struct iocpt_dev_identify_comp) != 16);
+   RTE_BUILD_BUG_ON(sizeof(struct iocpt_dev_reset_cmd) != 64);
+   RTE_BUILD_BUG_ON(sizeof(struct iocpt_dev_reset_comp) != 16);
+
+   /* LIF commands */
+   RTE_BUILD_BUG_ON(sizeof(struct iocpt_lif_identify_cmd) != 64);
+   RTE_BUILD_BUG_ON(sizeof(struct iocpt_lif_identify_comp) != 16);
+   RTE_BUILD_BUG_ON(sizeof(struct iocpt_lif_init_cmd) != 64);
+   RTE_BUILD_BUG_ON(sizeof(struct iocpt_lif_init_comp) != 16);
+   RTE_BUILD_BUG_ON(sizeof(struct iocpt_lif_reset_cmd) != 64);
+   RTE_BUILD_BUG_ON(sizeof(struct iocpt_lif_getattr_cmd) != 64);
+   RTE_BUILD_BUG_ON(sizeof(struct iocpt_lif_getattr_comp) != 16);
+   RTE_BUILD_BUG_ON(sizeof(struct iocpt_lif_setattr_cmd) != 64);
+   RTE_BUILD_BUG_ON(sizeof(struct iocpt_lif_setattr_comp) != 16);
+
+   /* Queue commands */
+   RTE_BUILD_BUG_ON(sizeof(struct iocpt_q_identify_cmd) != 64);
+   RTE_BUILD_BUG_ON(sizeof(struct iocpt_q_identify_comp) != 16);
+   RTE_BUILD_BUG_ON(sizeof(struct iocpt_q_init_cmd) != 64);
+   RTE_BUILD_BUG_ON(sizeof(struct iocpt_q_init_comp) != 16);
+   RTE_BUILD_BUG_ON(sizeof(struct iocpt_q_control_cmd) != 64);
+
+   /* Crypto */
+   RTE_BUILD_BUG_ON(sizeof(struct iocpt_crypto_desc) != 32);
+   RTE_BUILD_BUG_ON(sizeof(struct iocpt_crypto_sg_desc) != 256);
+   RTE_BUILD_BUG_ON(sizeof(struct iocpt_crypto_comp) != 16);
+}
+
+struct iocpt_dev_bars {
+   struct ionic_dev_bar bar[IONIC_BARS_MAX];
+   uint32_t num_bars;
+};
+
+/* Queue watchdog */
+#define IOCPT_Q_WDOG_SESS_IDX  0
+#define IOCPT_Q_WDOG_KEY_LEN   16
+#define IOCPT_Q_WDOG_IV_LEN12
+#define IOCPT_Q_WDOG_PLD_LEN   4
+#define IOCPT_Q_WDOG_TAG_LEN   16
+#define IOCPT_Q_WDOG_OP_TYPE   RTE_CRYPTO_OP_TYPE_UNDEFINED
+
 struct iocpt_qtype_info {
uint8_t  version;
uint8_t  supported;
@@ -108,8 +171,10 @@ struct iocpt_admin_q {
 struct iocpt_dev {
const char *name;
char fw_version[IOCPT_FWVERS_BUFLEN];
+   struct iocpt_dev_bars bars;
struct iocpt_identity ident;
 
+   const struct iocpt_dev_intf *intf;
void *bus_dev;
struct rte_cryptodev *crypto_dev;
 
@@ -130,6 +195,8 @@ struct iocpt_dev {
 
struct iocpt_admin_q *adminq;
 
+   struct rte_bitmap  *sess_bm;/* SET bit indicates index is free */
+
uint64_t features;
uint32_t hw_features;
 
@@ -144,6 +211,20 @@ struct iocpt_dev {
struct rte_cryptodev_stats stats_base;
 };
 
+struct iocpt_dev_intf {
+   int  (*setup_bars)(struct iocpt_dev *dev);
+   void (*unmap_bars)(struct iocpt_dev *dev);
+};
+
+static inline int
+iocpt_setup_bars(struct iocpt_dev *dev)
+{
+   if (dev->intf->setup_bars == NULL)
+   return -EINVAL;
+
+   return (*dev->intf->setup_bars)(dev);
+}
+
 /** iocpt_admin_ctx - Admin command context.
  * @pending_work:  Flag that indicates a completion.
  * @cmd:   Admin command (64B) to be copied to t

[PATCH 6/6] crypto/ionic: add documentation and connect to build

2024-04-19 Thread Andrew Boyer
Add a features list and guide for the ionic crypto PMD.
Hook the new PMD up to the build.

Signed-off-by: Andrew Boyer 
---
 MAINTAINERS  |  7 +
 doc/guides/cryptodevs/features/ionic.ini | 40 
 doc/guides/cryptodevs/index.rst  |  1 +
 doc/guides/cryptodevs/ionic.rst  | 39 +++
 drivers/crypto/meson.build   |  1 +
 5 files changed, 88 insertions(+)
 create mode 100644 doc/guides/cryptodevs/features/ionic.ini
 create mode 100644 doc/guides/cryptodevs/ionic.rst

diff --git a/MAINTAINERS b/MAINTAINERS
index 7abb3aee49..7cf999371c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1075,6 +1075,13 @@ F: drivers/crypto/ccp/
 F: doc/guides/cryptodevs/ccp.rst
 F: doc/guides/cryptodevs/features/ccp.ini
 
+AMD Pensando ionic crypto
+M: Andrew Boyer 
+F: drivers/crypto/ionic/
+F: drivers/common/ionic/
+F: doc/guides/cryptodevs/ionic.rst
+F: doc/guides/cryptodevs/features/ionic.ini
+
 ARMv8 Crypto
 M: Ruifeng Wang 
 F: drivers/crypto/armv8/
diff --git a/doc/guides/cryptodevs/features/ionic.ini 
b/doc/guides/cryptodevs/features/ionic.ini
new file mode 100644
index 00..d3e00bd795
--- /dev/null
+++ b/doc/guides/cryptodevs/features/ionic.ini
@@ -0,0 +1,40 @@
+;
+; Supported features of the 'ionic' crypto driver.
+;
+; Refer to default.ini for the full list of available PMD features.
+;
+[Features]
+Symmetric crypto   = Y
+HW Accelerated = Y
+In Place SGL   = Y
+OOP SGL In LB  Out = Y
+OOP SGL In SGL Out = Y
+OOP LB  In LB  Out = Y
+
+;
+; Supported crypto algorithms of 'ionic' crypto driver.
+;
+[Cipher]
+
+;
+; Supported authentication algorithms of 'ionic' crypto driver.
+;
+[Auth]
+
+;
+; Supported AEAD algorithms of 'ionic' crypto driver.
+;
+[AEAD]
+AES GCM (128) = Y
+AES GCM (256) = Y
+
+;
+; Supported Asymmetric algorithms of the 'ionic' crypto driver.
+;
+[Asymmetric]
+
+;
+; Supported Operating systems of the 'ionic' crypto driver.
+;
+[OS]
+Linux = Y
diff --git a/doc/guides/cryptodevs/index.rst b/doc/guides/cryptodevs/index.rst
index cb4ce227e9..1e57a9fe86 100644
--- a/doc/guides/cryptodevs/index.rst
+++ b/doc/guides/cryptodevs/index.rst
@@ -20,6 +20,7 @@ Crypto Device Drivers
 cnxk
 dpaa2_sec
 dpaa_sec
+ionic
 kasumi
 octeontx
 openssl
diff --git a/doc/guides/cryptodevs/ionic.rst b/doc/guides/cryptodevs/ionic.rst
new file mode 100644
index 00..9d557f7cc2
--- /dev/null
+++ b/doc/guides/cryptodevs/ionic.rst
@@ -0,0 +1,39 @@
+..  SPDX-License-Identifier: BSD-3-Clause
+Copyright 2021-2024 Advanced Micro Devices, Inc.
+
+IONIC Crypto Driver
+===
+
+The ionic crypto driver provides support for offloading cryptographic 
operations
+to hardware cryptographic blocks on AMD Pensando server adapters.
+It currently supports the below models:
+
+- DSC-25 dual-port 25G Distributed Services Card `(pdf) 
`__
+- DSC-100 dual-port 100G Distributed Services Card `(pdf) 
`__
+- DSC-200 dual-port 200G Distributed Services Card `(pdf) 
`__
+
+Please visit the AMD Pensando web site at 
https://www.amd.com/en/accelerators/pensando for more information.
+
+Device Support
+--
+
+The ionic crypto PMD currently supports running directly on the device's 
embedded
+processors. It does not yet support host-side access via PCI.
+For help running the PMD, please contact AMD Pensando support.
+
+Runtime Configuration
+-
+
+None
+
+Features
+
+
+The ionic crypto PMD has support for:
+
+Symmetric Crypto Algorithms
+~~~
+
+AEAD algorithms:
+
+* ``RTE_CRYPTO_AEAD_AES_GCM``
diff --git a/drivers/crypto/meson.build b/drivers/crypto/meson.build
index ee5377deff..e799861bb6 100644
--- a/drivers/crypto/meson.build
+++ b/drivers/crypto/meson.build
@@ -10,6 +10,7 @@ drivers = [
 'cnxk',
 'dpaa_sec',
 'dpaa2_sec',
+'ionic',
 'ipsec_mb',
 'mlx5',
 'mvsam',
-- 
2.17.1



[PATCH 5/6] crypto/ionic: add datapath and capabilities support

2024-04-19 Thread Andrew Boyer
This defines the main datapath and reports the device
capabilities to the stack.

Signed-off-by: Andrew Boyer 
---
 drivers/crypto/ionic/ionic_crypto.h  |  62 +++
 drivers/crypto/ionic/ionic_crypto_caps.c |  55 ++
 drivers/crypto/ionic/ionic_crypto_main.c | 417 +++-
 drivers/crypto/ionic/ionic_crypto_ops.c  | 606 +++
 drivers/crypto/ionic/meson.build |   2 +
 5 files changed, 1140 insertions(+), 2 deletions(-)
 create mode 100644 drivers/crypto/ionic/ionic_crypto_caps.c
 create mode 100644 drivers/crypto/ionic/ionic_crypto_ops.c

diff --git a/drivers/crypto/ionic/ionic_crypto.h 
b/drivers/crypto/ionic/ionic_crypto.h
index d048f7aa51..db87ea0490 100644
--- a/drivers/crypto/ionic/ionic_crypto.h
+++ b/drivers/crypto/ionic/ionic_crypto.h
@@ -37,6 +37,8 @@ extern int iocpt_logtype;
 
 #define IOCPT_PRINT_CALL() IOCPT_PRINT(DEBUG, " >>")
 
+const struct rte_cryptodev_capabilities *iocpt_get_caps(uint64_t flags);
+
 static inline void iocpt_struct_size_checks(void)
 {
RTE_BUILD_BUG_ON(sizeof(struct ionic_doorbell) != 8);
@@ -163,6 +165,50 @@ struct iocpt_admin_q {
uint16_t flags;
 };
 
+struct iocpt_crypto_q {
+   /* cacheline0, cacheline1 */
+   IOCPT_COMMON_FIELDS;
+
+   /* cacheline2 */
+   uint64_t last_wdog_cycles;
+   uint16_t flags;
+
+   /* cacheline3 */
+   struct rte_cryptodev_stats stats;
+
+   uint64_t enqueued_wdogs;
+   uint64_t dequeued_wdogs;
+   uint8_t wdog_iv[IOCPT_Q_WDOG_IV_LEN];
+   uint8_t wdog_pld[IOCPT_Q_WDOG_PLD_LEN];
+   uint8_t wdog_tag[IOCPT_Q_WDOG_TAG_LEN];
+};
+
+#define IOCPT_S_F_INITED   BIT(0)
+
+struct iocpt_session_priv {
+   struct iocpt_dev *dev;
+
+   uint32_t index;
+
+   uint16_t iv_offset;
+   uint16_t iv_length;
+   uint16_t digest_length;
+   uint16_t aad_length;
+
+   uint8_t flags;
+   uint8_t op;
+   uint8_t type;
+
+   uint16_t key_len;
+   uint8_t key[IOCPT_SESS_KEY_LEN_MAX_SYMM];
+};
+
+static inline uint32_t
+iocpt_session_size(void)
+{
+   return sizeof(struct iocpt_session_priv);
+}
+
 #define IOCPT_DEV_F_INITED BIT(0)
 #define IOCPT_DEV_F_UP BIT(1)
 #define IOCPT_DEV_F_FW_RESET   BIT(2)
@@ -194,6 +240,7 @@ struct iocpt_dev {
rte_spinlock_t adminq_service_lock;
 
struct iocpt_admin_q *adminq;
+   struct iocpt_crypto_q **cryptoqs;
 
struct rte_bitmap  *sess_bm;/* SET bit indicates index is free */
 
@@ -242,6 +289,9 @@ int iocpt_probe(void *bus_dev, struct rte_device *rte_dev,
 int iocpt_remove(struct rte_device *rte_dev);
 
 void iocpt_configure(struct iocpt_dev *dev);
+int iocpt_assign_ops(struct rte_cryptodev *cdev);
+int iocpt_start(struct iocpt_dev *dev);
+void iocpt_stop(struct iocpt_dev *dev);
 void iocpt_deinit(struct iocpt_dev *dev);
 
 int iocpt_dev_identify(struct iocpt_dev *dev);
@@ -251,6 +301,14 @@ void iocpt_dev_reset(struct iocpt_dev *dev);
 
 int iocpt_adminq_post_wait(struct iocpt_dev *dev, struct iocpt_admin_ctx *ctx);
 
+int iocpt_cryptoq_alloc(struct iocpt_dev *dev, uint32_t socket_id,
+   uint32_t index, uint16_t ndescs);
+void iocpt_cryptoq_free(struct iocpt_crypto_q *cptq);
+
+int iocpt_session_init(struct iocpt_session_priv *priv);
+int iocpt_session_update(struct iocpt_session_priv *priv);
+void iocpt_session_deinit(struct iocpt_session_priv *priv);
+
 struct ionic_doorbell __iomem *iocpt_db_map(struct iocpt_dev *dev,
struct iocpt_queue *q);
 
@@ -259,6 +317,10 @@ typedef bool (*iocpt_cq_cb)(struct iocpt_cq *cq, uint16_t 
cq_desc_index,
 uint32_t iocpt_cq_service(struct iocpt_cq *cq, uint32_t work_to_do,
iocpt_cq_cb cb, void *cb_arg);
 
+void iocpt_get_stats(const struct iocpt_dev *dev,
+   struct rte_cryptodev_stats *stats);
+void iocpt_reset_stats(struct iocpt_dev *dev);
+
 static inline uint16_t
 iocpt_q_space_avail(struct iocpt_queue *q)
 {
diff --git a/drivers/crypto/ionic/ionic_crypto_caps.c 
b/drivers/crypto/ionic/ionic_crypto_caps.c
new file mode 100644
index 00..da5a69be3d
--- /dev/null
+++ b/drivers/crypto/ionic/ionic_crypto_caps.c
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2021-2024 Advanced Micro Devices, Inc.
+ */
+
+#include 
+
+#include "ionic_crypto.h"
+
+static const struct rte_cryptodev_capabilities iocpt_sym_caps[] = {
+   {   /* AES GCM */
+   .op = RTE_CRYPTO_OP_TYPE_SYMMETRIC,
+   {.sym = {
+   .xform_type = RTE_CRYPTO_SYM_XFORM_AEAD,
+   {.aead = {
+   .algo = RTE_CRYPTO_AEAD_AES_GCM,
+   .block_size = 16,
+   .key_size = {
+   .min = 16,
+   .max = 32,
+   .increment = 16
+   },
+   .digest_size = {
+   

[PATCH] net/ionic: clean up logging issues

2024-04-19 Thread Andrew Boyer
Switch to the new RTE_LOG_LINE_PREFIX logging macro. While here, fix up
some trailing-newline issues reported by the new macro.

Signed-off-by: Andrew Boyer 
---
 drivers/net/ionic/ionic_dev_pci.c |  7 +++
 drivers/net/ionic/ionic_lif.c |  8 
 drivers/net/ionic/ionic_logs.h| 14 +++---
 3 files changed, 10 insertions(+), 19 deletions(-)

diff --git a/drivers/net/ionic/ionic_dev_pci.c 
b/drivers/net/ionic/ionic_dev_pci.c
index 2d7b4f223e..76235cad51 100644
--- a/drivers/net/ionic/ionic_dev_pci.c
+++ b/drivers/net/ionic/ionic_dev_pci.c
@@ -43,13 +43,12 @@ ionic_pci_setup(struct ionic_adapter *adapter)
 
/* BAR0: dev_cmd and interrupts */
if (num_bars < 1) {
-   IONIC_PRINT(ERR, "No bars found, aborting\n");
+   IONIC_PRINT(ERR, "No bars found, aborting");
return -EFAULT;
}
 
if (bar->len < IONIC_BAR0_SIZE) {
-   IONIC_PRINT(ERR,
-   "Resource bar size %lu too small, aborting\n",
+   IONIC_PRINT(ERR, "Resource bar size %lu too small, aborting",
bar->len);
return -EFAULT;
}
@@ -84,7 +83,7 @@ ionic_pci_setup(struct ionic_adapter *adapter)
/* BAR1: doorbells */
bar++;
if (num_bars < IONIC_BARS_MIN) {
-   IONIC_PRINT(ERR, "Doorbell bar missing, aborting\n");
+   IONIC_PRINT(ERR, "Doorbell bar missing, aborting");
return -EFAULT;
}
 
diff --git a/drivers/net/ionic/ionic_lif.c b/drivers/net/ionic/ionic_lif.c
index 7f02b67610..b4dc118fef 100644
--- a/drivers/net/ionic/ionic_lif.c
+++ b/drivers/net/ionic/ionic_lif.c
@@ -990,13 +990,13 @@ ionic_lif_queue_identify(struct ionic_lif *lif)
qtype, ionic_qtype_vers[qtype]);
err = ionic_dev_cmd_wait_check(idev, IONIC_DEVCMD_TIMEOUT);
if (err == -EINVAL) {
-   IONIC_PRINT(ERR, "qtype %d not supported\n", qtype);
+   IONIC_PRINT(ERR, "qtype %d not supported", qtype);
continue;
} else if (err == -EIO) {
-   IONIC_PRINT(ERR, "q_ident failed, older FW\n");
+   IONIC_PRINT(ERR, "q_ident failed, older FW");
return;
} else if (err) {
-   IONIC_PRINT(ERR, "q_ident failed, qtype %d: %d\n",
+   IONIC_PRINT(ERR, "q_ident failed, qtype %d: %d",
qtype, err);
return;
}
@@ -1380,7 +1380,7 @@ ionic_lif_handle_fw_down(struct ionic_lif *lif)
 
if (lif->state & IONIC_LIF_F_UP) {
IONIC_PRINT(NOTICE,
-   "Surprise FW stop, stopping %s\n", lif->name);
+   "Surprise FW stop, stopping %s", lif->name);
ionic_lif_stop(lif);
}
 
diff --git a/drivers/net/ionic/ionic_logs.h b/drivers/net/ionic/ionic_logs.h
index c10b06c051..739de00af8 100644
--- a/drivers/net/ionic/ionic_logs.h
+++ b/drivers/net/ionic/ionic_logs.h
@@ -8,19 +8,11 @@
 #include 
 
 extern int ionic_logtype;
+#define RTE_LOGTYPE_IONIC ionic_logtype
 
-#define IONIC_PRINT(level, fmt, args...) rte_log(RTE_LOG_ ## level, \
-   ionic_logtype, "%s(): " fmt "\n", __func__, ##args)
+#define IONIC_PRINT(level, ...) \
+   RTE_LOG_LINE_PREFIX(level, IONIC, "%s(): ", __func__, __VA_ARGS__)
 
 #define IONIC_PRINT_CALL() IONIC_PRINT(DEBUG, " >>")
 
-#ifndef IONIC_WARN_ON
-#define IONIC_WARN_ON(x) do { \
-   int ret = !!(x); \
-   if (unlikely(ret)) \
-   IONIC_PRINT(WARNING, "WARN_ON: \"" #x "\" at %s:%d\n", \
-   __func__, __LINE__); \
-} while (0)
-#endif
-
 #endif /* _IONIC_LOGS_H_ */
-- 
2.17.1



Re: [PATCH] net/ionic: clean up logging issues

2024-04-19 Thread Stephen Hemminger
On Fri, 19 Apr 2024 13:25:56 -0700
Andrew Boyer  wrote:

> Switch to the new RTE_LOG_LINE_PREFIX logging macro. While here, fix up
> some trailing-newline issues reported by the new macro.
> 
> Signed-off-by: Andrew Boyer 

Acked-by: Stephen Hemminger 


Re: [PATCH v4 3/6] latencystats: do not use floating point

2024-04-19 Thread Stephen Hemminger
On Fri, 19 Apr 2024 20:49:56 +0200
Morten Brørup  wrote:

> > -   /*
> > -* The average latency is measured using exponential moving
> > -* average, i.e. using EWMA
> > -* https://en.wikipedia.org/wiki/Moving_average
> > -*/
> > -   glob_stats->avg_latency +=
> > -   alpha * (latency - glob_stats->avg_latency);
> > +   glob_stats->avg_latency = latency;
> > +   glob_stats->jitter = latency / 2;  
> 
> Setting jitter at first sample as latency / 2 is wrong.
> Jitter should remain zero at first sample.

Chose that because it is what the TCP RFC does.
RFC 6298

   (2.2) When the first RTT measurement R is made, the host MUST set

SRTT <- R
RTTVAR <- R/2
RTO <- SRTT + max (G, K*RTTVAR)

The problem is that the smoothing constant in this code is quite small.
Also, the TCP RFC has, not sure if matters.

   (2.3) When a subsequent RTT measurement R' is made, a host MUST set

RTTVAR <- (1 - beta) * RTTVAR + beta * |SRTT - R'|
SRTT <- (1 - alpha) * SRTT + alpha * R'

 The value of SRTT used in the update to RTTVAR is its value
 before updating SRTT itself using the second assignment.  That
 is, updating RTTVAR and SRTT MUST be computed in the above
 order.

 The above SHOULD be computed using alpha=1/8 and beta=1/4 (as
 suggested in [JK88]).

 After the computation, a host MUST update
 RTO <- SRTT + max (G, K*RTTVAR)


[PATCH v4 00/45] use stdatomic API

2024-04-19 Thread Tyler Retzlaff
s series converts all non-generic built atomics to use the rte_atomic
macros that allow optional enablement of standard C11 atomics.

Use of generic atomics for non-scalar types are not converted in this
change and will be evaluated as a part of a separate series.

Note if this series ends up requiring too much rebasing due to tree
churn before it is merged i will break it up into smaller series.

v4:
  * rebase after merge of move alignment attribute on types for MSVC,
no other changes.

v3:
  * event/dsw, wrap all lines <= 80 chars, align arguments to
opening parenthesis.
  * event/dlb2, wrap changed lines <= 80 chars, remove comments
referencing gcc __atomic built-ins.
  * bus/vmbus, remove comment referencing gcc atomic built-ins,
fix mistake where monitor_mask was declared RTE_ATOMIC(uint32_t),
fix mistake where pending was not declared RTE_ATOMIC(uint32_t),
remove now unnecessary cast to __rte_atomic of pending (since
the field is now properly declare RTE_ATOMIC).

v2:
  * drop the net/sfc driver from the series. the sfc driver
uses generic __atomic_store not handled by the current macros.
the cases where generic __atomic_xxx are used on objects that
can't be accepted by __atomic_xxx_n will be addressed in a
separate series.

Tyler Retzlaff (45):
  net/mlx5: use rte stdatomic API
  net/ixgbe: use rte stdatomic API
  net/iavf: use rte stdatomic API
  net/ice: use rte stdatomic API
  net/i40e: use rte stdatomic API
  net/hns3: use rte stdatomic API
  net/bnxt: use rte stdatomic API
  net/cpfl: use rte stdatomic API
  net/af_xdp: use rte stdatomic API
  net/octeon_ep: use rte stdatomic API
  net/octeontx: use rte stdatomic API
  net/cxgbe: use rte stdatomic API
  net/gve: use rte stdatomic API
  net/memif: use rte stdatomic API
  net/thunderx: use rte stdatomic API
  net/virtio: use rte stdatomic API
  net/hinic: use rte stdatomic API
  net/idpf: use rte stdatomic API
  net/qede: use rte stdatomic API
  net/ring: use rte stdatomic API
  vdpa/mlx5: use rte stdatomic API
  raw/ifpga: use rte stdatomic API
  event/opdl: use rte stdatomic API
  event/octeontx: use rte stdatomic API
  event/dsw: use rte stdatomic API
  dma/skeleton: use rte stdatomic API
  crypto/octeontx: use rte stdatomic API
  common/mlx5: use rte stdatomic API
  common/idpf: use rte stdatomic API
  common/iavf: use rte stdatomic API
  baseband/acc: use rte stdatomic API
  net/txgbe: use rte stdatomic API
  net/null: use rte stdatomic API
  event/dlb2: use rte stdatomic API
  dma/idxd: use rte stdatomic API
  crypto/ccp: use rte stdatomic API
  common/cpt: use rte stdatomic API
  bus/vmbus: use rte stdatomic API
  examples: use rte stdatomic API
  app/dumpcap: use rte stdatomic API
  app/test: use rte stdatomic API
  app/test-eventdev: use rte stdatomic API
  app/test-crypto-perf: use rte stdatomic API
  app/test-compress-perf: use rte stdatomic API
  app/test-bbdev: use rte stdatomic API

 app/dumpcap/main.c |  12 +-
 app/test-bbdev/test_bbdev_perf.c   | 183 +
 app/test-compress-perf/comp_perf_test_common.h |   2 +-
 app/test-compress-perf/comp_perf_test_cyclecount.c |   4 +-
 app/test-compress-perf/comp_perf_test_throughput.c |  10 +-
 app/test-compress-perf/comp_perf_test_verify.c |   6 +-
 app/test-crypto-perf/cperf_test_latency.c  |   6 +-
 app/test-crypto-perf/cperf_test_pmd_cyclecount.c   |  10 +-
 app/test-crypto-perf/cperf_test_throughput.c   |  10 +-
 app/test-crypto-perf/cperf_test_verify.c   |  10 +-
 app/test-eventdev/test_order_atq.c |   4 +-
 app/test-eventdev/test_order_common.c  |   5 +-
 app/test-eventdev/test_order_common.h  |   8 +-
 app/test-eventdev/test_order_queue.c   |   4 +-
 app/test-eventdev/test_perf_common.h   |   6 +-
 app/test/test_bpf.c|  46 --
 app/test/test_distributor.c| 114 ++---
 app/test/test_distributor_perf.c   |   4 +-
 app/test/test_func_reentrancy.c|  28 ++--
 app/test/test_hash_multiwriter.c   |  16 +-
 app/test/test_hash_readwrite.c |  74 -
 app/test/test_hash_readwrite_lf_perf.c |  88 +-
 app/test/test_lcores.c |  25 +--
 app/test/test_lpm_perf.c   |  14 +-
 app/test/test_mcslock.c|  12 +-
 app/test/test_mempool_perf.c   |   9 +-
 app/test/test_pflock.c |  13 +-
 app/test/test_pmd_perf.c   |  10 +-
 app/test/test_rcu_qsbr_perf.c  | 114 ++---
 app/test/test_ring_perf.c  |  11 +-
 app/test/test_ring_stress_impl.h   |  10 +-
 app/test/test_rwlock.c |   9 +-
 app/test/test_seqlock.c  

[PATCH v4 02/45] net/ixgbe: use rte stdatomic API

2024-04-19 Thread Tyler Retzlaff
Replace the use of gcc builtin __atomic_xxx intrinsics with
corresponding rte_atomic_xxx optional rte stdatomic API.

Signed-off-by: Tyler Retzlaff 
Acked-by: Stephen Hemminger 
---
 drivers/net/ixgbe/ixgbe_ethdev.c | 14 --
 drivers/net/ixgbe/ixgbe_ethdev.h |  2 +-
 drivers/net/ixgbe/ixgbe_rxtx.c   |  4 ++--
 3 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c b/drivers/net/ixgbe/ixgbe_ethdev.c
index c61c52b..e63ae1a 100644
--- a/drivers/net/ixgbe/ixgbe_ethdev.c
+++ b/drivers/net/ixgbe/ixgbe_ethdev.c
@@ -1130,7 +1130,7 @@ struct rte_ixgbe_xstats_name_off {
}
 
/* NOTE: review for potential ordering optimization */
-   __atomic_clear(&ad->link_thread_running, __ATOMIC_SEQ_CST);
+   rte_atomic_store_explicit(&ad->link_thread_running, 0, 
rte_memory_order_seq_cst);
ixgbe_parse_devargs(eth_dev->data->dev_private,
pci_dev->device.devargs);
rte_eth_copy_pci_info(eth_dev, pci_dev);
@@ -1638,7 +1638,7 @@ static int ixgbe_l2_tn_filter_init(struct rte_eth_dev 
*eth_dev)
}
 
/* NOTE: review for potential ordering optimization */
-   __atomic_clear(&ad->link_thread_running, __ATOMIC_SEQ_CST);
+   rte_atomic_store_explicit(&ad->link_thread_running, 0, 
rte_memory_order_seq_cst);
ixgbevf_parse_devargs(eth_dev->data->dev_private,
  pci_dev->device.devargs);
 
@@ -4203,7 +4203,7 @@ static int ixgbevf_dev_xstats_get_names(__rte_unused 
struct rte_eth_dev *dev,
uint32_t timeout = timeout_ms ? timeout_ms : WARNING_TIMEOUT;
 
/* NOTE: review for potential ordering optimization */
-   while (__atomic_load_n(&ad->link_thread_running, __ATOMIC_SEQ_CST)) {
+   while (rte_atomic_load_explicit(&ad->link_thread_running, 
rte_memory_order_seq_cst)) {
msec_delay(1);
timeout--;
 
@@ -4240,7 +4240,7 @@ static int ixgbevf_dev_xstats_get_names(__rte_unused 
struct rte_eth_dev *dev,
 
intr->flags &= ~IXGBE_FLAG_NEED_LINK_CONFIG;
/* NOTE: review for potential ordering optimization */
-   __atomic_clear(&ad->link_thread_running, __ATOMIC_SEQ_CST);
+   rte_atomic_store_explicit(&ad->link_thread_running, 0, 
rte_memory_order_seq_cst);
return 0;
 }
 
@@ -4336,7 +4336,8 @@ static int ixgbevf_dev_xstats_get_names(__rte_unused 
struct rte_eth_dev *dev,
if (ixgbe_get_media_type(hw) == ixgbe_media_type_fiber) {
ixgbe_dev_wait_setup_link_complete(dev, 0);
/* NOTE: review for potential ordering optimization */
-   if (!__atomic_test_and_set(&ad->link_thread_running, 
__ATOMIC_SEQ_CST)) {
+   if 
(!rte_atomic_exchange_explicit(&ad->link_thread_running, 1,
+   rte_memory_order_seq_cst)) {
/* To avoid race condition between threads, set
 * the IXGBE_FLAG_NEED_LINK_CONFIG flag only
 * when there is no link thread running.
@@ -4348,7 +4349,8 @@ static int ixgbevf_dev_xstats_get_names(__rte_unused 
struct rte_eth_dev *dev,
PMD_DRV_LOG(ERR,
"Create link thread failed!");
/* NOTE: review for potential ordering 
optimization */
-   
__atomic_clear(&ad->link_thread_running, __ATOMIC_SEQ_CST);
+   
rte_atomic_store_explicit(&ad->link_thread_running, 0,
+   rte_memory_order_seq_cst);
}
} else {
PMD_DRV_LOG(ERR,
diff --git a/drivers/net/ixgbe/ixgbe_ethdev.h b/drivers/net/ixgbe/ixgbe_ethdev.h
index 22fc3be..8ad841e 100644
--- a/drivers/net/ixgbe/ixgbe_ethdev.h
+++ b/drivers/net/ixgbe/ixgbe_ethdev.h
@@ -511,7 +511,7 @@ struct ixgbe_adapter {
 */
uint8_t pflink_fullchk;
uint8_t mac_ctrl_frame_fwd;
-   bool link_thread_running;
+   RTE_ATOMIC(bool) link_thread_running;
rte_thread_t link_thread_tid;
 };
 
diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c
index 3d39eaa..0d42fd8 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx.c
+++ b/drivers/net/ixgbe/ixgbe_rxtx.c
@@ -1831,7 +1831,7 @@ const alignas(RTE_CACHE_LINE_SIZE) uint32_t
 * Use acquire fence to ensure that status_error which includes
 * DD bit is loaded before loading of other descriptor words.
 */
-   rte_atomic_thread_fence(__ATOMIC_ACQUIRE);
+   rte_atomic_thread_fence(rte_memory_order_acquire);
 
rxd = *rxdp;
 
@@ -2114,7 +2114,7 @@ const alignas(RTE_CACHE_LINE_SIZE) uint32_t
 * Use acquire fence to ensure that status_error which includes

[PATCH v4 01/45] net/mlx5: use rte stdatomic API

2024-04-19 Thread Tyler Retzlaff
Replace the use of gcc builtin __atomic_xxx intrinsics with
corresponding rte_atomic_xxx optional rte stdatomic API.

Signed-off-by: Tyler Retzlaff 
Acked-by: Stephen Hemminger 
---
 drivers/net/mlx5/linux/mlx5_ethdev_os.c |   6 +-
 drivers/net/mlx5/linux/mlx5_verbs.c |   9 ++-
 drivers/net/mlx5/mlx5.c |   9 ++-
 drivers/net/mlx5/mlx5.h |  66 -
 drivers/net/mlx5/mlx5_flow.c|  37 +-
 drivers/net/mlx5/mlx5_flow.h|   8 +-
 drivers/net/mlx5/mlx5_flow_aso.c|  43 ++-
 drivers/net/mlx5/mlx5_flow_dv.c | 126 
 drivers/net/mlx5/mlx5_flow_flex.c   |  14 ++--
 drivers/net/mlx5/mlx5_flow_hw.c |  61 +---
 drivers/net/mlx5/mlx5_flow_meter.c  |  30 
 drivers/net/mlx5/mlx5_flow_quota.c  |  32 
 drivers/net/mlx5/mlx5_hws_cnt.c |  71 +-
 drivers/net/mlx5/mlx5_hws_cnt.h |  10 +--
 drivers/net/mlx5/mlx5_rx.h  |  14 ++--
 drivers/net/mlx5/mlx5_rxq.c |  30 
 drivers/net/mlx5/mlx5_trigger.c |   2 +-
 drivers/net/mlx5/mlx5_tx.h  |  18 ++---
 drivers/net/mlx5/mlx5_txpp.c|  84 ++---
 drivers/net/mlx5/mlx5_txq.c |  12 +--
 drivers/net/mlx5/mlx5_utils.c   |  10 +--
 drivers/net/mlx5/mlx5_utils.h   |   4 +-
 22 files changed, 351 insertions(+), 345 deletions(-)

diff --git a/drivers/net/mlx5/linux/mlx5_ethdev_os.c 
b/drivers/net/mlx5/linux/mlx5_ethdev_os.c
index 40ea9d2..70bba6c 100644
--- a/drivers/net/mlx5/linux/mlx5_ethdev_os.c
+++ b/drivers/net/mlx5/linux/mlx5_ethdev_os.c
@@ -1918,9 +1918,9 @@ int mlx5_txpp_map_hca_bar(struct rte_eth_dev *dev)
return -ENOTSUP;
}
/* Check there is no concurrent mapping in other thread. */
-   if (!__atomic_compare_exchange_n(&ppriv->hca_bar, &expected,
-base, false,
-__ATOMIC_RELAXED, __ATOMIC_RELAXED))
+   if (!rte_atomic_compare_exchange_strong_explicit(&ppriv->hca_bar, 
&expected,
+base,
+rte_memory_order_relaxed, 
rte_memory_order_relaxed))
rte_mem_unmap(base, MLX5_ST_SZ_BYTES(initial_seg));
return 0;
 }
diff --git a/drivers/net/mlx5/linux/mlx5_verbs.c 
b/drivers/net/mlx5/linux/mlx5_verbs.c
index b54f3cc..63da8f4 100644
--- a/drivers/net/mlx5/linux/mlx5_verbs.c
+++ b/drivers/net/mlx5/linux/mlx5_verbs.c
@@ -1117,7 +1117,7 @@
return 0;
}
/* Only need to check refcnt, 0 after "sh" is allocated. */
-   if (!!(__atomic_fetch_add(&sh->self_lb.refcnt, 1, __ATOMIC_RELAXED))) {
+   if (!!(rte_atomic_fetch_add_explicit(&sh->self_lb.refcnt, 1, 
rte_memory_order_relaxed))) {
MLX5_ASSERT(sh->self_lb.ibv_cq && sh->self_lb.qp);
priv->lb_used = 1;
return 0;
@@ -1163,7 +1163,7 @@
claim_zero(mlx5_glue->destroy_cq(sh->self_lb.ibv_cq));
sh->self_lb.ibv_cq = NULL;
}
-   __atomic_fetch_sub(&sh->self_lb.refcnt, 1, __ATOMIC_RELAXED);
+   rte_atomic_fetch_sub_explicit(&sh->self_lb.refcnt, 1, 
rte_memory_order_relaxed);
return -rte_errno;
 #else
RTE_SET_USED(dev);
@@ -1186,8 +1186,9 @@
 
if (!priv->lb_used)
return;
-   MLX5_ASSERT(__atomic_load_n(&sh->self_lb.refcnt, __ATOMIC_RELAXED));
-   if (!(__atomic_fetch_sub(&sh->self_lb.refcnt, 1, __ATOMIC_RELAXED) - 
1)) {
+   MLX5_ASSERT(rte_atomic_load_explicit(&sh->self_lb.refcnt, 
rte_memory_order_relaxed));
+   if (!(rte_atomic_fetch_sub_explicit(&sh->self_lb.refcnt, 1,
+   rte_memory_order_relaxed) - 1)) {
if (sh->self_lb.qp) {
claim_zero(mlx5_glue->destroy_qp(sh->self_lb.qp));
sh->self_lb.qp = NULL;
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index d1a6382..2ff94db 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -855,8 +855,8 @@
ct_pool = mng->pools[idx];
for (i = 0; i < MLX5_ASO_CT_ACTIONS_PER_POOL; i++) {
ct = &ct_pool->actions[i];
-   val = __atomic_fetch_sub(&ct->refcnt, 1,
-__ATOMIC_RELAXED);
+   val = rte_atomic_fetch_sub_explicit(&ct->refcnt, 1,
+rte_memory_order_relaxed);
MLX5_ASSERT(val == 1);
if (val > 1)
cnt++;
@@ -1082,7 +1082,8 @@
DRV_LOG(ERR, "Dynamic flex parser is not supported on HWS");
return -ENOTSUP;
}
-   if (__atomic_fetch_add(&priv->sh->srh_flex_parser.refcnt, 1, 
__ATOMIC_RELAXED) + 1 > 1)
+   

[PATCH v4 04/45] net/ice: use rte stdatomic API

2024-04-19 Thread Tyler Retzlaff
Replace the use of gcc builtin __atomic_xxx intrinsics with
corresponding rte_atomic_xxx optional rte stdatomic API.

Signed-off-by: Tyler Retzlaff 
Acked-by: Stephen Hemminger 
---
 drivers/net/ice/base/ice_osdep.h |  4 ++--
 drivers/net/ice/ice_dcf.c|  6 +++---
 drivers/net/ice/ice_dcf.h|  2 +-
 drivers/net/ice/ice_dcf_ethdev.c |  8 
 drivers/net/ice/ice_dcf_parent.c | 16 
 drivers/net/ice/ice_ethdev.c | 12 ++--
 drivers/net/ice/ice_ethdev.h |  2 +-
 7 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/drivers/net/ice/base/ice_osdep.h b/drivers/net/ice/base/ice_osdep.h
index 0e14b93..c17f1bf 100644
--- a/drivers/net/ice/base/ice_osdep.h
+++ b/drivers/net/ice/base/ice_osdep.h
@@ -235,7 +235,7 @@ struct ice_lock {
 ice_alloc_dma_mem(__rte_unused struct ice_hw *hw,
  struct ice_dma_mem *mem, u64 size)
 {
-   static uint64_t ice_dma_memzone_id;
+   static RTE_ATOMIC(uint64_t) ice_dma_memzone_id;
const struct rte_memzone *mz = NULL;
char z_name[RTE_MEMZONE_NAMESIZE];
 
@@ -243,7 +243,7 @@ struct ice_lock {
return NULL;
 
snprintf(z_name, sizeof(z_name), "ice_dma_%" PRIu64,
-   __atomic_fetch_add(&ice_dma_memzone_id, 1, __ATOMIC_RELAXED));
+   rte_atomic_fetch_add_explicit(&ice_dma_memzone_id, 1, 
rte_memory_order_relaxed));
mz = rte_memzone_reserve_bounded(z_name, size, SOCKET_ID_ANY, 0,
 0, RTE_PGSIZE_2M);
if (!mz)
diff --git a/drivers/net/ice/ice_dcf.c b/drivers/net/ice/ice_dcf.c
index 7f8f516..204d4ea 100644
--- a/drivers/net/ice/ice_dcf.c
+++ b/drivers/net/ice/ice_dcf.c
@@ -764,7 +764,7 @@ struct virtchnl_proto_hdrs ice_dcf_inner_ipv6_sctp_tmplt = {
rte_spinlock_init(&hw->vc_cmd_queue_lock);
TAILQ_INIT(&hw->vc_cmd_queue);
 
-   __atomic_store_n(&hw->vsi_update_thread_num, 0, __ATOMIC_RELAXED);
+   rte_atomic_store_explicit(&hw->vsi_update_thread_num, 0, 
rte_memory_order_relaxed);
 
hw->arq_buf = rte_zmalloc("arq_buf", ICE_DCF_AQ_BUF_SZ, 0);
if (hw->arq_buf == NULL) {
@@ -888,8 +888,8 @@ struct virtchnl_proto_hdrs ice_dcf_inner_ipv6_sctp_tmplt = {
 ice_dcf_dev_interrupt_handler, hw);
 
/* Wait for all `ice-thread` threads to exit. */
-   while (__atomic_load_n(&hw->vsi_update_thread_num,
-   __ATOMIC_ACQUIRE) != 0)
+   while (rte_atomic_load_explicit(&hw->vsi_update_thread_num,
+   rte_memory_order_acquire) != 0)
rte_delay_ms(ICE_DCF_CHECK_INTERVAL);
 
ice_dcf_mode_disable(hw);
diff --git a/drivers/net/ice/ice_dcf.h b/drivers/net/ice/ice_dcf.h
index aa2a723..7726681 100644
--- a/drivers/net/ice/ice_dcf.h
+++ b/drivers/net/ice/ice_dcf.h
@@ -105,7 +105,7 @@ struct ice_dcf_hw {
void (*vc_event_msg_cb)(struct ice_dcf_hw *dcf_hw,
uint8_t *msg, uint16_t msglen);
 
-   int vsi_update_thread_num;
+   RTE_ATOMIC(int) vsi_update_thread_num;
 
uint8_t *arq_buf;
 
diff --git a/drivers/net/ice/ice_dcf_ethdev.c b/drivers/net/ice/ice_dcf_ethdev.c
index d58ec9d..8f3a385 100644
--- a/drivers/net/ice/ice_dcf_ethdev.c
+++ b/drivers/net/ice/ice_dcf_ethdev.c
@@ -1743,7 +1743,7 @@ static int ice_dcf_xstats_get(struct rte_eth_dev *dev,
 ice_dcf_adminq_need_retry(struct ice_adapter *ad)
 {
return ad->hw.dcf_enabled &&
-  !__atomic_load_n(&ad->dcf_state_on, __ATOMIC_RELAXED);
+  !rte_atomic_load_explicit(&ad->dcf_state_on, 
rte_memory_order_relaxed);
 }
 
 /* Add UDP tunneling port */
@@ -1944,12 +1944,12 @@ static int ice_dcf_xstats_get(struct rte_eth_dev *dev,
adapter->real_hw.vc_event_msg_cb = ice_dcf_handle_pf_event_msg;
if (ice_dcf_init_hw(eth_dev, &adapter->real_hw) != 0) {
PMD_INIT_LOG(ERR, "Failed to init DCF hardware");
-   __atomic_store_n(&parent_adapter->dcf_state_on, false,
-__ATOMIC_RELAXED);
+   rte_atomic_store_explicit(&parent_adapter->dcf_state_on, false,
+rte_memory_order_relaxed);
return -1;
}
 
-   __atomic_store_n(&parent_adapter->dcf_state_on, true, __ATOMIC_RELAXED);
+   rte_atomic_store_explicit(&parent_adapter->dcf_state_on, true, 
rte_memory_order_relaxed);
 
if (ice_dcf_init_parent_adapter(eth_dev) != 0) {
PMD_INIT_LOG(ERR, "Failed to init DCF parent adapter");
diff --git a/drivers/net/ice/ice_dcf_parent.c b/drivers/net/ice/ice_dcf_parent.c
index 6e845f4..a478b69 100644
--- a/drivers/net/ice/ice_dcf_parent.c
+++ b/drivers/net/ice/ice_dcf_parent.c
@@ -123,8 +123,8 @@ struct ice_dcf_reset_event_param {
container_of(hw, struct ice_dcf_adapter, real_hw);
struct ice_adapter *parent_adapter = &adapter->parent;
 
-   __atomic_fetch_add(&hw->vsi_update_thread_num, 1,
-  

[PATCH v4 03/45] net/iavf: use rte stdatomic API

2024-04-19 Thread Tyler Retzlaff
Replace the use of gcc builtin __atomic_xxx intrinsics with
corresponding rte_atomic_xxx optional rte stdatomic API.

Signed-off-by: Tyler Retzlaff 
Acked-by: Stephen Hemminger 
---
 drivers/net/iavf/iavf.h   | 16 
 drivers/net/iavf/iavf_rxtx.c  |  4 ++--
 drivers/net/iavf/iavf_rxtx_vec_neon.c |  2 +-
 drivers/net/iavf/iavf_vchnl.c | 14 +++---
 4 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/drivers/net/iavf/iavf.h b/drivers/net/iavf/iavf.h
index 7ab41c9..ad526c6 100644
--- a/drivers/net/iavf/iavf.h
+++ b/drivers/net/iavf/iavf.h
@@ -238,8 +238,8 @@ struct iavf_info {
struct virtchnl_vlan_caps vlan_v2_caps;
uint64_t supported_rxdid;
uint8_t *proto_xtr; /* proto xtr type for all queues */
-   volatile enum virtchnl_ops pend_cmd; /* pending command not finished */
-   uint32_t pend_cmd_count;
+   volatile RTE_ATOMIC(enum virtchnl_ops) pend_cmd; /* pending command not 
finished */
+   RTE_ATOMIC(uint32_t) pend_cmd_count;
int cmd_retval; /* return value of the cmd response from PF */
uint8_t *aq_resp; /* buffer to store the adminq response from PF */
 
@@ -456,13 +456,13 @@ struct iavf_cmd_info {
 _atomic_set_cmd(struct iavf_info *vf, enum virtchnl_ops ops)
 {
enum virtchnl_ops op_unk = VIRTCHNL_OP_UNKNOWN;
-   int ret = __atomic_compare_exchange(&vf->pend_cmd, &op_unk, &ops,
-   0, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE);
+   int ret = rte_atomic_compare_exchange_strong_explicit(&vf->pend_cmd, 
&op_unk, ops,
+   rte_memory_order_acquire, rte_memory_order_acquire);
 
if (!ret)
PMD_DRV_LOG(ERR, "There is incomplete cmd %d", vf->pend_cmd);
 
-   __atomic_store_n(&vf->pend_cmd_count, 1, __ATOMIC_RELAXED);
+   rte_atomic_store_explicit(&vf->pend_cmd_count, 1, 
rte_memory_order_relaxed);
 
return !ret;
 }
@@ -472,13 +472,13 @@ struct iavf_cmd_info {
 _atomic_set_async_response_cmd(struct iavf_info *vf, enum virtchnl_ops ops)
 {
enum virtchnl_ops op_unk = VIRTCHNL_OP_UNKNOWN;
-   int ret = __atomic_compare_exchange(&vf->pend_cmd, &op_unk, &ops,
-   0, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE);
+   int ret = rte_atomic_compare_exchange_strong_explicit(&vf->pend_cmd, 
&op_unk, ops,
+   rte_memory_order_acquire, rte_memory_order_acquire);
 
if (!ret)
PMD_DRV_LOG(ERR, "There is incomplete cmd %d", vf->pend_cmd);
 
-   __atomic_store_n(&vf->pend_cmd_count, 2, __ATOMIC_RELAXED);
+   rte_atomic_store_explicit(&vf->pend_cmd_count, 2, 
rte_memory_order_relaxed);
 
return !ret;
 }
diff --git a/drivers/net/iavf/iavf_rxtx.c b/drivers/net/iavf/iavf_rxtx.c
index 59a0b9e..ecc3143 100644
--- a/drivers/net/iavf/iavf_rxtx.c
+++ b/drivers/net/iavf/iavf_rxtx.c
@@ -2025,7 +2025,7 @@ struct iavf_txq_ops iavf_txq_release_mbufs_ops[] = {
s[j] = rte_le_to_cpu_16(rxdp[j].wb.status_error0);
 
/* This barrier is to order loads of different words in the 
descriptor */
-   rte_atomic_thread_fence(__ATOMIC_ACQUIRE);
+   rte_atomic_thread_fence(rte_memory_order_acquire);
 
/* Compute how many contiguous DD bits were set */
for (j = 0, nb_dd = 0; j < IAVF_LOOK_AHEAD; j++) {
@@ -2152,7 +2152,7 @@ struct iavf_txq_ops iavf_txq_release_mbufs_ops[] = {
}
 
/* This barrier is to order loads of different words in the 
descriptor */
-   rte_atomic_thread_fence(__ATOMIC_ACQUIRE);
+   rte_atomic_thread_fence(rte_memory_order_acquire);
 
/* Compute how many contiguous DD bits were set */
for (j = 0, nb_dd = 0; j < IAVF_LOOK_AHEAD; j++) {
diff --git a/drivers/net/iavf/iavf_rxtx_vec_neon.c 
b/drivers/net/iavf/iavf_rxtx_vec_neon.c
index 83825aa..20b656e 100644
--- a/drivers/net/iavf/iavf_rxtx_vec_neon.c
+++ b/drivers/net/iavf/iavf_rxtx_vec_neon.c
@@ -273,7 +273,7 @@
descs[0] =  vld1q_u64((uint64_t *)(rxdp));
 
/* Use acquire fence to order loads of descriptor qwords */
-   rte_atomic_thread_fence(__ATOMIC_ACQUIRE);
+   rte_atomic_thread_fence(rte_memory_order_acquire);
/* A.2 reload qword0 to make it ordered after qword1 load */
descs[3] = vld1q_lane_u64((uint64_t *)(rxdp + 3), descs[3], 0);
descs[2] = vld1q_lane_u64((uint64_t *)(rxdp + 2), descs[2], 0);
diff --git a/drivers/net/iavf/iavf_vchnl.c b/drivers/net/iavf/iavf_vchnl.c
index d30..6d5969f 100644
--- a/drivers/net/iavf/iavf_vchnl.c
+++ b/drivers/net/iavf/iavf_vchnl.c
@@ -41,7 +41,7 @@ struct iavf_event_element {
 };
 
 struct iavf_event_handler {
-   uint32_t ndev;
+   RTE_ATOMIC(uint32_t) ndev;
rte_thread_t tid;
int fd[2];
pthread_mutex_t lock;
@@ -129,7 +129,7 @@ struct iavf_event_hand

[PATCH v4 05/45] net/i40e: use rte stdatomic API

2024-04-19 Thread Tyler Retzlaff
Replace the use of gcc builtin __atomic_xxx intrinsics with
corresponding rte_atomic_xxx optional rte stdatomic API.

Signed-off-by: Tyler Retzlaff 
Acked-by: Stephen Hemminger 
---
 drivers/net/i40e/i40e_ethdev.c| 4 ++--
 drivers/net/i40e/i40e_rxtx.c  | 6 +++---
 drivers/net/i40e/i40e_rxtx_vec_neon.c | 2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
index 380ce1a..801cc95 100644
--- a/drivers/net/i40e/i40e_ethdev.c
+++ b/drivers/net/i40e/i40e_ethdev.c
@@ -4687,7 +4687,7 @@ enum i40e_status_code
u64 size,
u32 alignment)
 {
-   static uint64_t i40e_dma_memzone_id;
+   static RTE_ATOMIC(uint64_t) i40e_dma_memzone_id;
const struct rte_memzone *mz = NULL;
char z_name[RTE_MEMZONE_NAMESIZE];
 
@@ -4695,7 +4695,7 @@ enum i40e_status_code
return I40E_ERR_PARAM;
 
snprintf(z_name, sizeof(z_name), "i40e_dma_%" PRIu64,
-   __atomic_fetch_add(&i40e_dma_memzone_id, 1, __ATOMIC_RELAXED));
+   rte_atomic_fetch_add_explicit(&i40e_dma_memzone_id, 1, 
rte_memory_order_relaxed));
mz = rte_memzone_reserve_bounded(z_name, size, SOCKET_ID_ANY,
RTE_MEMZONE_IOVA_CONTIG, alignment, RTE_PGSIZE_2M);
if (!mz)
diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c
index 5d25ab4..155f243 100644
--- a/drivers/net/i40e/i40e_rxtx.c
+++ b/drivers/net/i40e/i40e_rxtx.c
@@ -486,7 +486,7 @@
}
 
/* This barrier is to order loads of different words in the 
descriptor */
-   rte_atomic_thread_fence(__ATOMIC_ACQUIRE);
+   rte_atomic_thread_fence(rte_memory_order_acquire);
 
/* Compute how many status bits were set */
for (j = 0, nb_dd = 0; j < I40E_LOOK_AHEAD; j++) {
@@ -745,7 +745,7 @@
 * Use acquire fence to ensure that qword1 which includes DD
 * bit is loaded before loading of other descriptor words.
 */
-   rte_atomic_thread_fence(__ATOMIC_ACQUIRE);
+   rte_atomic_thread_fence(rte_memory_order_acquire);
 
rxd = *rxdp;
nb_hold++;
@@ -867,7 +867,7 @@
 * Use acquire fence to ensure that qword1 which includes DD
 * bit is loaded before loading of other descriptor words.
 */
-   rte_atomic_thread_fence(__ATOMIC_ACQUIRE);
+   rte_atomic_thread_fence(rte_memory_order_acquire);
 
rxd = *rxdp;
nb_hold++;
diff --git a/drivers/net/i40e/i40e_rxtx_vec_neon.c 
b/drivers/net/i40e/i40e_rxtx_vec_neon.c
index d873e30..3a99137 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_neon.c
+++ b/drivers/net/i40e/i40e_rxtx_vec_neon.c
@@ -425,7 +425,7 @@
descs[0] =  vld1q_u64((uint64_t *)(rxdp));
 
/* Use acquire fence to order loads of descriptor qwords */
-   rte_atomic_thread_fence(__ATOMIC_ACQUIRE);
+   rte_atomic_thread_fence(rte_memory_order_acquire);
/* A.2 reload qword0 to make it ordered after qword1 load */
descs[3] = vld1q_lane_u64((uint64_t *)(rxdp + 3), descs[3], 0);
descs[2] = vld1q_lane_u64((uint64_t *)(rxdp + 2), descs[2], 0);
-- 
1.8.3.1



[PATCH v4 06/45] net/hns3: use rte stdatomic API

2024-04-19 Thread Tyler Retzlaff
Replace the use of gcc builtin __atomic_xxx intrinsics with
corresponding rte_atomic_xxx optional rte stdatomic API.

Signed-off-by: Tyler Retzlaff 
Acked-by: Stephen Hemminger 
---
 drivers/net/hns3/hns3_cmd.c   | 18 ++--
 drivers/net/hns3/hns3_dcb.c   |  2 +-
 drivers/net/hns3/hns3_ethdev.c| 36 +++
 drivers/net/hns3/hns3_ethdev.h| 32 ++---
 drivers/net/hns3/hns3_ethdev_vf.c | 60 +++
 drivers/net/hns3/hns3_intr.c  | 36 +++
 drivers/net/hns3/hns3_intr.h  |  4 +--
 drivers/net/hns3/hns3_mbx.c   |  6 ++--
 drivers/net/hns3/hns3_mp.c|  6 ++--
 drivers/net/hns3/hns3_rxtx.c  | 10 +++
 drivers/net/hns3/hns3_tm.c|  4 +--
 11 files changed, 107 insertions(+), 107 deletions(-)

diff --git a/drivers/net/hns3/hns3_cmd.c b/drivers/net/hns3/hns3_cmd.c
index 001ff49..3c5fdbe 100644
--- a/drivers/net/hns3/hns3_cmd.c
+++ b/drivers/net/hns3/hns3_cmd.c
@@ -44,12 +44,12 @@
 hns3_allocate_dma_mem(struct hns3_hw *hw, struct hns3_cmq_ring *ring,
  uint64_t size, uint32_t alignment)
 {
-   static uint64_t hns3_dma_memzone_id;
+   static RTE_ATOMIC(uint64_t) hns3_dma_memzone_id;
const struct rte_memzone *mz = NULL;
char z_name[RTE_MEMZONE_NAMESIZE];
 
snprintf(z_name, sizeof(z_name), "hns3_dma_%" PRIu64,
-   __atomic_fetch_add(&hns3_dma_memzone_id, 1, __ATOMIC_RELAXED));
+   rte_atomic_fetch_add_explicit(&hns3_dma_memzone_id, 1, 
rte_memory_order_relaxed));
mz = rte_memzone_reserve_bounded(z_name, size, SOCKET_ID_ANY,
 RTE_MEMZONE_IOVA_CONTIG, alignment,
 RTE_PGSIZE_2M);
@@ -198,8 +198,8 @@
hns3_err(hw, "wrong cmd addr(%0x) head (%u, %u-%u)", addr, head,
 csq->next_to_use, csq->next_to_clean);
if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
-   __atomic_store_n(&hw->reset.disable_cmd, 1,
-__ATOMIC_RELAXED);
+   rte_atomic_store_explicit(&hw->reset.disable_cmd, 1,
+rte_memory_order_relaxed);
hns3_schedule_delayed_reset(HNS3_DEV_HW_TO_ADAPTER(hw));
}
 
@@ -313,7 +313,7 @@ static int hns3_cmd_poll_reply(struct hns3_hw *hw)
if (hns3_cmd_csq_done(hw))
return 0;
 
-   if (__atomic_load_n(&hw->reset.disable_cmd, __ATOMIC_RELAXED)) {
+   if (rte_atomic_load_explicit(&hw->reset.disable_cmd, 
rte_memory_order_relaxed)) {
hns3_err(hw,
 "Don't wait for reply because of disable_cmd");
return -EBUSY;
@@ -360,7 +360,7 @@ static int hns3_cmd_poll_reply(struct hns3_hw *hw)
int retval;
uint32_t ntc;
 
-   if (__atomic_load_n(&hw->reset.disable_cmd, __ATOMIC_RELAXED))
+   if (rte_atomic_load_explicit(&hw->reset.disable_cmd, 
rte_memory_order_relaxed))
return -EBUSY;
 
rte_spinlock_lock(&hw->cmq.csq.lock);
@@ -747,7 +747,7 @@ static int hns3_cmd_poll_reply(struct hns3_hw *hw)
ret = -EBUSY;
goto err_cmd_init;
}
-   __atomic_store_n(&hw->reset.disable_cmd, 0, __ATOMIC_RELAXED);
+   rte_atomic_store_explicit(&hw->reset.disable_cmd, 0, 
rte_memory_order_relaxed);
 
ret = hns3_cmd_query_firmware_version_and_capability(hw);
if (ret) {
@@ -790,7 +790,7 @@ static int hns3_cmd_poll_reply(struct hns3_hw *hw)
return 0;
 
 err_cmd_init:
-   __atomic_store_n(&hw->reset.disable_cmd, 1, __ATOMIC_RELAXED);
+   rte_atomic_store_explicit(&hw->reset.disable_cmd, 1, 
rte_memory_order_relaxed);
return ret;
 }
 
@@ -819,7 +819,7 @@ static int hns3_cmd_poll_reply(struct hns3_hw *hw)
if (!hns->is_vf)
(void)hns3_firmware_compat_config(hw, false);
 
-   __atomic_store_n(&hw->reset.disable_cmd, 1, __ATOMIC_RELAXED);
+   rte_atomic_store_explicit(&hw->reset.disable_cmd, 1, 
rte_memory_order_relaxed);
 
/*
 * A delay is added to ensure that the register cleanup operations
diff --git a/drivers/net/hns3/hns3_dcb.c b/drivers/net/hns3/hns3_dcb.c
index 915e4eb..2f917fe 100644
--- a/drivers/net/hns3/hns3_dcb.c
+++ b/drivers/net/hns3/hns3_dcb.c
@@ -648,7 +648,7 @@
 * and configured directly to the hardware in the RESET_STAGE_RESTORE
 * stage of the reset process.
 */
-   if (__atomic_load_n(&hw->reset.resetting, __ATOMIC_RELAXED) == 0) {
+   if (rte_atomic_load_explicit(&hw->reset.resetting, 
rte_memory_order_relaxed) == 0) {
for (i = 0; i < hw->rss_ind_tbl_size; i++)
rss_cfg->rss_indirection_tbl[i] =
i % hw->allo

[PATCH v4 07/45] net/bnxt: use rte stdatomic API

2024-04-19 Thread Tyler Retzlaff
Replace the use of gcc builtin __atomic_xxx intrinsics with
corresponding rte_atomic_xxx optional rte stdatomic API.

Signed-off-by: Tyler Retzlaff 
Acked-by: Stephen Hemminger 
---
 drivers/net/bnxt/bnxt_cpr.h   |  4 ++--
 drivers/net/bnxt/bnxt_rxq.h   |  2 +-
 drivers/net/bnxt/bnxt_rxr.c   | 13 -
 drivers/net/bnxt/bnxt_rxtx_vec_neon.c |  2 +-
 drivers/net/bnxt/bnxt_stats.c |  4 ++--
 5 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/drivers/net/bnxt/bnxt_cpr.h b/drivers/net/bnxt/bnxt_cpr.h
index c7b3480..43f06fd 100644
--- a/drivers/net/bnxt/bnxt_cpr.h
+++ b/drivers/net/bnxt/bnxt_cpr.h
@@ -107,7 +107,7 @@ struct bnxt_cp_ring_info {
 
 /**
  * Check validity of a completion ring entry. If the entry is valid, include a
- * C11 __ATOMIC_ACQUIRE fence to ensure that subsequent loads of fields in the
+ * C11 rte_memory_order_acquire fence to ensure that subsequent loads of 
fields in the
  * completion are not hoisted by the compiler or by the CPU to come before the
  * loading of the "valid" field.
  *
@@ -130,7 +130,7 @@ struct bnxt_cp_ring_info {
expected = !(raw_cons & ring_size);
valid = !!(rte_le_to_cpu_32(c->info3_v) & CMPL_BASE_V);
if (valid == expected) {
-   rte_atomic_thread_fence(__ATOMIC_ACQUIRE);
+   rte_atomic_thread_fence(rte_memory_order_acquire);
return true;
}
return false;
diff --git a/drivers/net/bnxt/bnxt_rxq.h b/drivers/net/bnxt/bnxt_rxq.h
index 77bc382..36e0ac3 100644
--- a/drivers/net/bnxt/bnxt_rxq.h
+++ b/drivers/net/bnxt/bnxt_rxq.h
@@ -40,7 +40,7 @@ struct bnxt_rx_queue {
struct bnxt_rx_ring_info*rx_ring;
struct bnxt_cp_ring_info*cp_ring;
struct rte_mbuf fake_mbuf;
-   uint64_trx_mbuf_alloc_fail;
+   RTE_ATOMIC(uint64_t)rx_mbuf_alloc_fail;
uint8_t need_realloc;
const struct rte_memzone *mz;
 };
diff --git a/drivers/net/bnxt/bnxt_rxr.c b/drivers/net/bnxt/bnxt_rxr.c
index 957b7d6..69e8384 100644
--- a/drivers/net/bnxt/bnxt_rxr.c
+++ b/drivers/net/bnxt/bnxt_rxr.c
@@ -49,7 +49,8 @@ static inline int bnxt_alloc_rx_data(struct bnxt_rx_queue 
*rxq,
rx_buf = &rxr->rx_buf_ring[prod];
mbuf = __bnxt_alloc_rx_data(rxq->mb_pool);
if (!mbuf) {
-   __atomic_fetch_add(&rxq->rx_mbuf_alloc_fail, 1, 
__ATOMIC_RELAXED);
+   rte_atomic_fetch_add_explicit(&rxq->rx_mbuf_alloc_fail, 1,
+   rte_memory_order_relaxed);
/* If buff has failed already, setting this again won't hurt */
rxq->need_realloc = 1;
return -ENOMEM;
@@ -86,7 +87,8 @@ static inline int bnxt_alloc_ag_data(struct bnxt_rx_queue 
*rxq,
 
mbuf = __bnxt_alloc_rx_data(rxq->mb_pool);
if (!mbuf) {
-   __atomic_fetch_add(&rxq->rx_mbuf_alloc_fail, 1, 
__ATOMIC_RELAXED);
+   rte_atomic_fetch_add_explicit(&rxq->rx_mbuf_alloc_fail, 1,
+   rte_memory_order_relaxed);
/* If buff has failed already, setting this again won't hurt */
rxq->need_realloc = 1;
return -ENOMEM;
@@ -465,7 +467,8 @@ static inline struct rte_mbuf *bnxt_tpa_end(
struct rte_mbuf *new_data = __bnxt_alloc_rx_data(rxq->mb_pool);
RTE_ASSERT(new_data != NULL);
if (!new_data) {
-   __atomic_fetch_add(&rxq->rx_mbuf_alloc_fail, 1, 
__ATOMIC_RELAXED);
+   rte_atomic_fetch_add_explicit(&rxq->rx_mbuf_alloc_fail, 1,
+   rte_memory_order_relaxed);
return NULL;
}
tpa_info->mbuf = new_data;
@@ -1677,8 +1680,8 @@ int bnxt_init_one_rx_ring(struct bnxt_rx_queue *rxq)
rxr->tpa_info[i].mbuf =
__bnxt_alloc_rx_data(rxq->mb_pool);
if (!rxr->tpa_info[i].mbuf) {
-   
__atomic_fetch_add(&rxq->rx_mbuf_alloc_fail, 1,
-   __ATOMIC_RELAXED);
+   
rte_atomic_fetch_add_explicit(&rxq->rx_mbuf_alloc_fail, 1,
+   
rte_memory_order_relaxed);
return -ENOMEM;
}
}
diff --git a/drivers/net/bnxt/bnxt_rxtx_vec_neon.c 
b/drivers/net/bnxt/bnxt_rxtx_vec_neon.c
index 775400f..04864e0 100644
--- a/drivers/net/bnxt/bnxt_rxtx_vec_neon.c
+++ b/drivers/net/bnxt/bnxt_rxtx_vec_neon.c
@@ -240,7 +240,7 @@
rxcmp1[0] = vld1q_u32((void *)&cpr->cp_desc_ring[cons + 1]);
 
/* Use acquire fence to order loads of descriptor words. */
-   rte_atomic_thread_fence(__ATOMIC_ACQUIRE);
+   rte_atomic_thread_fence(rte_memory_order_acquire);
   

[PATCH v4 08/45] net/cpfl: use rte stdatomic API

2024-04-19 Thread Tyler Retzlaff
Replace the use of gcc builtin __atomic_xxx intrinsics with
corresponding rte_atomic_xxx optional rte stdatomic API.

Signed-off-by: Tyler Retzlaff 
Acked-by: Stephen Hemminger 
---
 drivers/net/cpfl/cpfl_ethdev.c | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/net/cpfl/cpfl_ethdev.c b/drivers/net/cpfl/cpfl_ethdev.c
index ef19aa1..5b47e22 100644
--- a/drivers/net/cpfl/cpfl_ethdev.c
+++ b/drivers/net/cpfl/cpfl_ethdev.c
@@ -300,8 +300,9 @@ struct rte_cpfl_xstats_name_off {
 
for (i = 0; i < dev->data->nb_rx_queues; i++) {
cpfl_rxq = dev->data->rx_queues[i];
-   mbuf_alloc_failed += 
__atomic_load_n(&cpfl_rxq->base.rx_stats.mbuf_alloc_failed,
-__ATOMIC_RELAXED);
+   mbuf_alloc_failed +=
+   
rte_atomic_load_explicit(&cpfl_rxq->base.rx_stats.mbuf_alloc_failed,
+   rte_memory_order_relaxed);
}
 
return mbuf_alloc_failed;
@@ -349,7 +350,8 @@ struct rte_cpfl_xstats_name_off {
 
for (i = 0; i < dev->data->nb_rx_queues; i++) {
cpfl_rxq = dev->data->rx_queues[i];
-   __atomic_store_n(&cpfl_rxq->base.rx_stats.mbuf_alloc_failed, 0, 
__ATOMIC_RELAXED);
+   
rte_atomic_store_explicit(&cpfl_rxq->base.rx_stats.mbuf_alloc_failed, 0,
+   rte_memory_order_relaxed);
}
 }
 
-- 
1.8.3.1



[PATCH v4 10/45] net/octeon_ep: use rte stdatomic API

2024-04-19 Thread Tyler Retzlaff
Replace the use of gcc builtin __atomic_xxx intrinsics with
corresponding rte_atomic_xxx optional rte stdatomic API.

Signed-off-by: Tyler Retzlaff 
Acked-by: Stephen Hemminger 
---
 drivers/net/octeon_ep/cnxk_ep_rx.h| 5 +++--
 drivers/net/octeon_ep/cnxk_ep_tx.c| 5 +++--
 drivers/net/octeon_ep/cnxk_ep_vf.c| 8 
 drivers/net/octeon_ep/otx2_ep_vf.c| 8 
 drivers/net/octeon_ep/otx_ep_common.h | 4 ++--
 drivers/net/octeon_ep/otx_ep_rxtx.c   | 6 --
 6 files changed, 20 insertions(+), 16 deletions(-)

diff --git a/drivers/net/octeon_ep/cnxk_ep_rx.h 
b/drivers/net/octeon_ep/cnxk_ep_rx.h
index ecf95cd..9422042 100644
--- a/drivers/net/octeon_ep/cnxk_ep_rx.h
+++ b/drivers/net/octeon_ep/cnxk_ep_rx.h
@@ -98,7 +98,7 @@
 * This adds an extra local variable, but almost halves the
 * number of PCIe writes.
 */
-   val = __atomic_load_n(droq->pkts_sent_ism, __ATOMIC_RELAXED);
+   val = rte_atomic_load_explicit(droq->pkts_sent_ism, 
rte_memory_order_relaxed);
 
new_pkts = val - droq->pkts_sent_prev;
droq->pkts_sent_prev = val;
@@ -111,7 +111,8 @@
rte_mb();
 
rte_write64(OTX2_SDP_REQUEST_ISM, droq->pkts_sent_reg);
-   while (__atomic_load_n(droq->pkts_sent_ism, __ATOMIC_RELAXED) 
>= val) {
+   while (rte_atomic_load_explicit(droq->pkts_sent_ism,
+   rte_memory_order_relaxed) >= val) {
rte_write64(OTX2_SDP_REQUEST_ISM, droq->pkts_sent_reg);
rte_mb();
}
diff --git a/drivers/net/octeon_ep/cnxk_ep_tx.c 
b/drivers/net/octeon_ep/cnxk_ep_tx.c
index 233c8aa..e093140 100644
--- a/drivers/net/octeon_ep/cnxk_ep_tx.c
+++ b/drivers/net/octeon_ep/cnxk_ep_tx.c
@@ -15,7 +15,7 @@
 * This adds an extra local variable, but almost halves the
 * number of PCIe writes.
 */
-   val = __atomic_load_n(iq->inst_cnt_ism, __ATOMIC_RELAXED);
+   val = rte_atomic_load_explicit(iq->inst_cnt_ism, 
rte_memory_order_relaxed);
iq->inst_cnt += val - iq->inst_cnt_prev;
iq->inst_cnt_prev = val;
 
@@ -27,7 +27,8 @@
rte_mb();
 
rte_write64(OTX2_SDP_REQUEST_ISM, iq->inst_cnt_reg);
-   while (__atomic_load_n(iq->inst_cnt_ism, __ATOMIC_RELAXED) >= 
val) {
+   while (rte_atomic_load_explicit(iq->inst_cnt_ism,
+   rte_memory_order_relaxed) >= val) {
rte_write64(OTX2_SDP_REQUEST_ISM, iq->inst_cnt_reg);
rte_mb();
}
diff --git a/drivers/net/octeon_ep/cnxk_ep_vf.c 
b/drivers/net/octeon_ep/cnxk_ep_vf.c
index 39f357e..39b28de 100644
--- a/drivers/net/octeon_ep/cnxk_ep_vf.c
+++ b/drivers/net/octeon_ep/cnxk_ep_vf.c
@@ -150,10 +150,10 @@
rte_write64(ism_addr, (uint8_t *)otx_ep->hw_addr +
CNXK_EP_R_IN_CNTS_ISM(iq_no));
iq->inst_cnt_ism =
-   (uint32_t *)((uint8_t *)otx_ep->ism_buffer_mz->addr
+   (uint32_t __rte_atomic *)((uint8_t *)otx_ep->ism_buffer_mz->addr
 + CNXK_EP_IQ_ISM_OFFSET(iq_no));
otx_ep_err("SDP_R[%d] INST Q ISM virt: %p, dma: 0x%" PRIX64, iq_no,
-  (void *)iq->inst_cnt_ism, ism_addr);
+  (void *)(uintptr_t)iq->inst_cnt_ism, ism_addr);
*iq->inst_cnt_ism = 0;
iq->inst_cnt_prev = 0;
iq->partial_ih = ((uint64_t)otx_ep->pkind) << 36;
@@ -235,10 +235,10 @@
rte_write64(ism_addr, (uint8_t *)otx_ep->hw_addr +
CNXK_EP_R_OUT_CNTS_ISM(oq_no));
droq->pkts_sent_ism =
-   (uint32_t *)((uint8_t *)otx_ep->ism_buffer_mz->addr
+   (uint32_t __rte_atomic *)((uint8_t *)otx_ep->ism_buffer_mz->addr
 + CNXK_EP_OQ_ISM_OFFSET(oq_no));
otx_ep_err("SDP_R[%d] OQ ISM virt: %p dma: 0x%" PRIX64,
-   oq_no, (void *)droq->pkts_sent_ism, ism_addr);
+   oq_no, (void *)(uintptr_t)droq->pkts_sent_ism, ism_addr);
*droq->pkts_sent_ism = 0;
droq->pkts_sent_prev = 0;
 
diff --git a/drivers/net/octeon_ep/otx2_ep_vf.c 
b/drivers/net/octeon_ep/otx2_ep_vf.c
index 25e0e5a..2aeebb4 100644
--- a/drivers/net/octeon_ep/otx2_ep_vf.c
+++ b/drivers/net/octeon_ep/otx2_ep_vf.c
@@ -300,10 +300,10 @@ static int otx2_vf_enable_rxq_intr(struct otx_ep_device 
*otx_epvf,
oct_ep_write64(ism_addr, (uint8_t *)otx_ep->hw_addr +
SDP_VF_R_IN_CNTS_ISM(iq_no));
iq->inst_cnt_ism =
-   (uint32_t *)((uint8_t *)otx_ep->ism_buffer_mz->addr
+   (uint32_t __rte_atomic *)((uint8_t *)otx_ep->ism_buffer_mz->addr
 + OTX2_EP_IQ_ISM_OFFSET(iq_no));
otx_ep_err("SDP_R[%d] INST Q ISM virt: %p, dma: 0x%x", iq_no,
-  (void *)iq->inst_cnt_ism,
+  (void *)(uintptr_t)iq->inst_cnt_ism,
   (unsigned int)ism_addr);
*iq->i

[PATCH v4 09/45] net/af_xdp: use rte stdatomic API

2024-04-19 Thread Tyler Retzlaff
Replace the use of gcc builtin __atomic_xxx intrinsics with
corresponding rte_atomic_xxx optional rte stdatomic API.

Signed-off-by: Tyler Retzlaff 
Acked-by: Stephen Hemminger 
---
 drivers/net/af_xdp/rte_eth_af_xdp.c | 20 +++-
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c 
b/drivers/net/af_xdp/rte_eth_af_xdp.c
index 268a130..4833180 100644
--- a/drivers/net/af_xdp/rte_eth_af_xdp.c
+++ b/drivers/net/af_xdp/rte_eth_af_xdp.c
@@ -116,7 +116,7 @@ struct xsk_umem_info {
const struct rte_memzone *mz;
struct rte_mempool *mb_pool;
void *buffer;
-   uint8_t refcnt;
+   RTE_ATOMIC(uint8_t) refcnt;
uint32_t max_xsks;
 };
 
@@ -995,7 +995,8 @@ static int link_xdp_prog_with_dev(int ifindex, int fd, 
__u32 flags)
break;
xsk_socket__delete(rxq->xsk);
 
-   if (__atomic_fetch_sub(&rxq->umem->refcnt, 1, __ATOMIC_ACQUIRE) 
- 1 == 0)
+   if (rte_atomic_fetch_sub_explicit(&rxq->umem->refcnt, 1,
+   rte_memory_order_acquire) - 1 == 0)
xdp_umem_destroy(rxq->umem);
 
/* free pkt_tx_queue */
@@ -1097,8 +1098,8 @@ static inline uintptr_t get_base_addr(struct rte_mempool 
*mp, uint64_t *align)
ret = -1;
goto out;
}
-   if 
(__atomic_load_n(&internals->rx_queues[i].umem->refcnt,
-   __ATOMIC_ACQUIRE)) {
+   if 
(rte_atomic_load_explicit(&internals->rx_queues[i].umem->refcnt,
+   rte_memory_order_acquire)) {
*umem = internals->rx_queues[i].umem;
goto out;
}
@@ -1131,11 +1132,11 @@ xsk_umem_info *xdp_umem_configure(struct pmd_internals 
*internals,
return NULL;
 
if (umem != NULL &&
-   __atomic_load_n(&umem->refcnt, __ATOMIC_ACQUIRE) <
+   rte_atomic_load_explicit(&umem->refcnt, 
rte_memory_order_acquire) <
umem->max_xsks) {
AF_XDP_LOG(INFO, "%s,qid%i sharing UMEM\n",
internals->if_name, rxq->xsk_queue_idx);
-   __atomic_fetch_add(&umem->refcnt, 1, __ATOMIC_ACQUIRE);
+   rte_atomic_fetch_add_explicit(&umem->refcnt, 1, 
rte_memory_order_acquire);
}
}
 
@@ -1177,7 +1178,7 @@ xsk_umem_info *xdp_umem_configure(struct pmd_internals 
*internals,
mb_pool->name, umem->max_xsks);
}
 
-   __atomic_store_n(&umem->refcnt, 1, __ATOMIC_RELEASE);
+   rte_atomic_store_explicit(&umem->refcnt, 1, 
rte_memory_order_release);
}
 
return umem;
@@ -1606,7 +1607,8 @@ struct msg_internal {
if (rxq->umem == NULL)
return -ENOMEM;
txq->umem = rxq->umem;
-   reserve_before = __atomic_load_n(&rxq->umem->refcnt, __ATOMIC_ACQUIRE) 
<= 1;
+   reserve_before = rte_atomic_load_explicit(&rxq->umem->refcnt,
+   rte_memory_order_acquire) <= 1;
 
 #if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG)
ret = rte_pktmbuf_alloc_bulk(rxq->umem->mb_pool, fq_bufs, reserve_size);
@@ -1723,7 +1725,7 @@ struct msg_internal {
 out_xsk:
xsk_socket__delete(rxq->xsk);
 out_umem:
-   if (__atomic_fetch_sub(&rxq->umem->refcnt, 1, __ATOMIC_ACQUIRE) - 1 == 
0)
+   if (rte_atomic_fetch_sub_explicit(&rxq->umem->refcnt, 1, 
rte_memory_order_acquire) - 1 == 0)
xdp_umem_destroy(rxq->umem);
 
return ret;
-- 
1.8.3.1



[PATCH v4 11/45] net/octeontx: use rte stdatomic API

2024-04-19 Thread Tyler Retzlaff
Replace the use of gcc builtin __atomic_xxx intrinsics with
corresponding rte_atomic_xxx optional rte stdatomic API.

Signed-off-by: Tyler Retzlaff 
Acked-by: Stephen Hemminger 
---
 drivers/net/octeontx/octeontx_ethdev.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/octeontx/octeontx_ethdev.c 
b/drivers/net/octeontx/octeontx_ethdev.c
index e397136..3c21540 100644
--- a/drivers/net/octeontx/octeontx_ethdev.c
+++ b/drivers/net/octeontx/octeontx_ethdev.c
@@ -31,7 +31,7 @@
 /* Useful in stopping/closing event device if no of
  * eth ports are using it.
  */
-uint16_t evdev_refcnt;
+RTE_ATOMIC(uint16_t) evdev_refcnt;
 
 #define OCTEONTX_QLM_MODE_SGMII  7
 #define OCTEONTX_QLM_MODE_XFI   12
@@ -559,7 +559,7 @@ enum octeontx_link_speed {
return 0;
 
/* Stopping/closing event device once all eth ports are closed. */
-   if (__atomic_fetch_sub(&evdev_refcnt, 1, __ATOMIC_ACQUIRE) - 1 == 0) {
+   if (rte_atomic_fetch_sub_explicit(&evdev_refcnt, 1, 
rte_memory_order_acquire) - 1 == 0) {
rte_event_dev_stop(nic->evdev);
rte_event_dev_close(nic->evdev);
}
@@ -1593,7 +1593,7 @@ static void build_xstat_names(struct rte_eth_xstat_name 
*xstat_names)
nic->pko_vfid = pko_vfid;
nic->port_id = port;
nic->evdev = evdev;
-   __atomic_fetch_add(&evdev_refcnt, 1, __ATOMIC_ACQUIRE);
+   rte_atomic_fetch_add_explicit(&evdev_refcnt, 1, 
rte_memory_order_acquire);
 
res = octeontx_port_open(nic);
if (res < 0)
@@ -1844,7 +1844,7 @@ static void build_xstat_names(struct rte_eth_xstat_name 
*xstat_names)
}
}
 
-   __atomic_store_n(&evdev_refcnt, 0, __ATOMIC_RELEASE);
+   rte_atomic_store_explicit(&evdev_refcnt, 0, rte_memory_order_release);
/*
 * Do 1:1 links for ports & queues. All queues would be mapped to
 * one port. If there are more ports than queues, then some ports
-- 
1.8.3.1



[PATCH v4 12/45] net/cxgbe: use rte stdatomic API

2024-04-19 Thread Tyler Retzlaff
Replace the use of gcc builtin __atomic_xxx intrinsics with
corresponding rte_atomic_xxx optional rte stdatomic API.

Signed-off-by: Tyler Retzlaff 
Acked-by: Stephen Hemminger 
---
 drivers/net/cxgbe/clip_tbl.c   | 12 ++--
 drivers/net/cxgbe/clip_tbl.h   |  2 +-
 drivers/net/cxgbe/cxgbe_main.c | 20 ++--
 drivers/net/cxgbe/cxgbe_ofld.h |  6 +++---
 drivers/net/cxgbe/l2t.c| 12 ++--
 drivers/net/cxgbe/l2t.h|  2 +-
 drivers/net/cxgbe/mps_tcam.c   | 21 +++--
 drivers/net/cxgbe/mps_tcam.h   |  2 +-
 drivers/net/cxgbe/smt.c| 12 ++--
 drivers/net/cxgbe/smt.h|  2 +-
 10 files changed, 46 insertions(+), 45 deletions(-)

diff --git a/drivers/net/cxgbe/clip_tbl.c b/drivers/net/cxgbe/clip_tbl.c
index b709e26..8588b88 100644
--- a/drivers/net/cxgbe/clip_tbl.c
+++ b/drivers/net/cxgbe/clip_tbl.c
@@ -55,7 +55,7 @@ void cxgbe_clip_release(struct rte_eth_dev *dev, struct 
clip_entry *ce)
int ret;
 
t4_os_lock(&ce->lock);
-   if (__atomic_fetch_sub(&ce->refcnt, 1, __ATOMIC_RELAXED) - 1 == 0) {
+   if (rte_atomic_fetch_sub_explicit(&ce->refcnt, 1, 
rte_memory_order_relaxed) - 1 == 0) {
ret = clip6_release_mbox(dev, ce->addr);
if (ret)
dev_debug(adap, "CLIP FW DEL CMD failed: %d", ret);
@@ -79,7 +79,7 @@ static struct clip_entry *find_or_alloc_clipe(struct clip_tbl 
*c,
unsigned int clipt_size = c->clipt_size;
 
for (e = &c->cl_list[0], end = &c->cl_list[clipt_size]; e != end; ++e) {
-   if (__atomic_load_n(&e->refcnt, __ATOMIC_RELAXED) == 0) {
+   if (rte_atomic_load_explicit(&e->refcnt, 
rte_memory_order_relaxed) == 0) {
if (!first_free)
first_free = e;
} else {
@@ -114,12 +114,12 @@ static struct clip_entry *t4_clip_alloc(struct 
rte_eth_dev *dev,
ce = find_or_alloc_clipe(ctbl, lip);
if (ce) {
t4_os_lock(&ce->lock);
-   if (__atomic_load_n(&ce->refcnt, __ATOMIC_RELAXED) == 0) {
+   if (rte_atomic_load_explicit(&ce->refcnt, 
rte_memory_order_relaxed) == 0) {
rte_memcpy(ce->addr, lip, sizeof(ce->addr));
if (v6) {
ce->type = FILTER_TYPE_IPV6;
-   __atomic_store_n(&ce->refcnt, 1,
-__ATOMIC_RELAXED);
+   rte_atomic_store_explicit(&ce->refcnt, 1,
+rte_memory_order_relaxed);
ret = clip6_get_mbox(dev, lip);
if (ret)
dev_debug(adap,
@@ -129,7 +129,7 @@ static struct clip_entry *t4_clip_alloc(struct rte_eth_dev 
*dev,
ce->type = FILTER_TYPE_IPV4;
}
} else {
-   __atomic_fetch_add(&ce->refcnt, 1, __ATOMIC_RELAXED);
+   rte_atomic_fetch_add_explicit(&ce->refcnt, 1, 
rte_memory_order_relaxed);
}
t4_os_unlock(&ce->lock);
}
diff --git a/drivers/net/cxgbe/clip_tbl.h b/drivers/net/cxgbe/clip_tbl.h
index 3b2be66..439fcf6 100644
--- a/drivers/net/cxgbe/clip_tbl.h
+++ b/drivers/net/cxgbe/clip_tbl.h
@@ -13,7 +13,7 @@ struct clip_entry {
enum filter_type type;   /* entry type */
u32 addr[4]; /* IPV4 or IPV6 address */
rte_spinlock_t lock; /* entry lock */
-   u32 refcnt;  /* entry reference count */
+   RTE_ATOMIC(u32) refcnt;  /* entry reference count */
 };
 
 struct clip_tbl {
diff --git a/drivers/net/cxgbe/cxgbe_main.c b/drivers/net/cxgbe/cxgbe_main.c
index c479454..2ed21f2 100644
--- a/drivers/net/cxgbe/cxgbe_main.c
+++ b/drivers/net/cxgbe/cxgbe_main.c
@@ -418,15 +418,15 @@ void cxgbe_remove_tid(struct tid_info *t, unsigned int 
chan, unsigned int tid,
 
if (t->tid_tab[tid]) {
t->tid_tab[tid] = NULL;
-   __atomic_fetch_sub(&t->conns_in_use, 1, __ATOMIC_RELAXED);
+   rte_atomic_fetch_sub_explicit(&t->conns_in_use, 1, 
rte_memory_order_relaxed);
if (t->hash_base && tid >= t->hash_base) {
if (family == FILTER_TYPE_IPV4)
-   __atomic_fetch_sub(&t->hash_tids_in_use, 1,
-  __ATOMIC_RELAXED);
+   
rte_atomic_fetch_sub_explicit(&t->hash_tids_in_use, 1,
+  rte_memory_order_relaxed);
} else {
if (family == FILTER_TYPE_IPV4)
-   __atomic_fetch_sub(&t->tids_in_use, 1,
-  __ATOMIC_RELAXED);
+  

[PATCH v4 13/45] net/gve: use rte stdatomic API

2024-04-19 Thread Tyler Retzlaff
Replace the use of gcc builtin __atomic_xxx intrinsics with
corresponding rte_atomic_xxx optional rte stdatomic API.

Signed-off-by: Tyler Retzlaff 
Acked-by: Stephen Hemminger 
---
 drivers/net/gve/base/gve_osdep.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/gve/base/gve_osdep.h b/drivers/net/gve/base/gve_osdep.h
index a3702f4..c0ee0d5 100644
--- a/drivers/net/gve/base/gve_osdep.h
+++ b/drivers/net/gve/base/gve_osdep.h
@@ -135,7 +135,7 @@ struct gve_dma_mem {
 static inline void *
 gve_alloc_dma_mem(struct gve_dma_mem *mem, u64 size)
 {
-   static uint16_t gve_dma_memzone_id;
+   static RTE_ATOMIC(uint16_t) gve_dma_memzone_id;
const struct rte_memzone *mz = NULL;
char z_name[RTE_MEMZONE_NAMESIZE];
 
@@ -143,7 +143,7 @@ struct gve_dma_mem {
return NULL;
 
snprintf(z_name, sizeof(z_name), "gve_dma_%u",
-__atomic_fetch_add(&gve_dma_memzone_id, 1, __ATOMIC_RELAXED));
+rte_atomic_fetch_add_explicit(&gve_dma_memzone_id, 1, 
rte_memory_order_relaxed));
mz = rte_memzone_reserve_aligned(z_name, size, SOCKET_ID_ANY,
 RTE_MEMZONE_IOVA_CONTIG,
 PAGE_SIZE);
-- 
1.8.3.1



[PATCH v4 14/45] net/memif: use rte stdatomic API

2024-04-19 Thread Tyler Retzlaff
Replace the use of gcc builtin __atomic_xxx intrinsics with
corresponding rte_atomic_xxx optional rte stdatomic API.

Signed-off-by: Tyler Retzlaff 
Acked-by: Stephen Hemminger 
---
 drivers/net/memif/memif.h |  4 ++--
 drivers/net/memif/rte_eth_memif.c | 50 +++
 2 files changed, 27 insertions(+), 27 deletions(-)

diff --git a/drivers/net/memif/memif.h b/drivers/net/memif/memif.h
index f5a4693..3f5b407 100644
--- a/drivers/net/memif/memif.h
+++ b/drivers/net/memif/memif.h
@@ -169,9 +169,9 @@ typedef struct __rte_packed __rte_aligned(128)
uint32_t cookie;/**< MEMIF_COOKIE */
uint16_t flags; /**< flags */
 #define MEMIF_RING_FLAG_MASK_INT 1 /**< disable interrupt mode */
-   uint16_t head;  /**< pointer to ring buffer head */
+   RTE_ATOMIC(uint16_t) head;  /**< pointer to ring 
buffer head */
MEMIF_CACHELINE_ALIGN_MARK(cacheline1);
-   uint16_t tail;  /**< pointer to ring buffer tail */
+   RTE_ATOMIC(uint16_t) tail;  /**< pointer to ring 
buffer tail */
MEMIF_CACHELINE_ALIGN_MARK(cacheline2);
memif_desc_t desc[0];   /**< buffer descriptors */
 } memif_ring_t;
diff --git a/drivers/net/memif/rte_eth_memif.c 
b/drivers/net/memif/rte_eth_memif.c
index 18377d9..16da22b 100644
--- a/drivers/net/memif/rte_eth_memif.c
+++ b/drivers/net/memif/rte_eth_memif.c
@@ -262,7 +262,7 @@ struct mp_region_msg {
 * threads, so using load-acquire pairs with store-release
 * in function eth_memif_rx for C2S queues.
 */
-   cur_tail = __atomic_load_n(&ring->tail, __ATOMIC_ACQUIRE);
+   cur_tail = rte_atomic_load_explicit(&ring->tail, 
rte_memory_order_acquire);
while (mq->last_tail != cur_tail) {
RTE_MBUF_PREFETCH_TO_FREE(mq->buffers[(mq->last_tail + 1) & 
mask]);
rte_pktmbuf_free_seg(mq->buffers[mq->last_tail & mask]);
@@ -334,10 +334,10 @@ struct mp_region_msg {
 
if (type == MEMIF_RING_C2S) {
cur_slot = mq->last_head;
-   last_slot = __atomic_load_n(&ring->head, __ATOMIC_ACQUIRE);
+   last_slot = rte_atomic_load_explicit(&ring->head, 
rte_memory_order_acquire);
} else {
cur_slot = mq->last_tail;
-   last_slot = __atomic_load_n(&ring->tail, __ATOMIC_ACQUIRE);
+   last_slot = rte_atomic_load_explicit(&ring->tail, 
rte_memory_order_acquire);
}
 
if (cur_slot == last_slot)
@@ -473,7 +473,7 @@ struct mp_region_msg {
 
 no_free_bufs:
if (type == MEMIF_RING_C2S) {
-   __atomic_store_n(&ring->tail, cur_slot, __ATOMIC_RELEASE);
+   rte_atomic_store_explicit(&ring->tail, cur_slot, 
rte_memory_order_release);
mq->last_head = cur_slot;
} else {
mq->last_tail = cur_slot;
@@ -485,7 +485,7 @@ struct mp_region_msg {
 * is called in the context of receiver thread. The loads in
 * the receiver do not need to synchronize with its own stores.
 */
-   head = __atomic_load_n(&ring->head, __ATOMIC_RELAXED);
+   head = rte_atomic_load_explicit(&ring->head, 
rte_memory_order_relaxed);
n_slots = ring_size - head + mq->last_tail;
 
while (n_slots--) {
@@ -493,7 +493,7 @@ struct mp_region_msg {
d0 = &ring->desc[s0];
d0->length = pmd->run.pkt_buffer_size;
}
-   __atomic_store_n(&ring->head, head, __ATOMIC_RELEASE);
+   rte_atomic_store_explicit(&ring->head, head, 
rte_memory_order_release);
}
 
mq->n_pkts += n_rx_pkts;
@@ -541,7 +541,7 @@ struct mp_region_msg {
 * threads, so using load-acquire pairs with store-release
 * to synchronize it between threads.
 */
-   last_slot = __atomic_load_n(&ring->tail, __ATOMIC_ACQUIRE);
+   last_slot = rte_atomic_load_explicit(&ring->tail, 
rte_memory_order_acquire);
if (cur_slot == last_slot)
goto refill;
n_slots = last_slot - cur_slot;
@@ -591,7 +591,7 @@ struct mp_region_msg {
 * is called in the context of receiver thread. The loads in
 * the receiver do not need to synchronize with its own stores.
 */
-   head = __atomic_load_n(&ring->head, __ATOMIC_RELAXED);
+   head = rte_atomic_load_explicit(&ring->head, rte_memory_order_relaxed);
n_slots = ring_size - head + mq->last_tail;
 
if (n_slots < 32)
@@ -620,7 +620,7 @@ struct mp_region_msg {
 * threads, so using store-release pairs with load-acquire
 * in function eth_memif_tx.
 */
-   __atomic_store_n(&ring->head, head, __ATOMIC_RELEASE);
+   rte_atomic_store_explicit(&ring->head, head, rte_memory_order_release);
 
mq-

[PATCH v4 15/45] net/thunderx: use rte stdatomic API

2024-04-19 Thread Tyler Retzlaff
Replace the use of gcc builtin __atomic_xxx intrinsics with
corresponding rte_atomic_xxx optional rte stdatomic API.

Signed-off-by: Tyler Retzlaff 
Acked-by: Stephen Hemminger 
---
 drivers/net/thunderx/nicvf_rxtx.c   | 9 +
 drivers/net/thunderx/nicvf_struct.h | 4 ++--
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/drivers/net/thunderx/nicvf_rxtx.c 
b/drivers/net/thunderx/nicvf_rxtx.c
index 74f43b9..76b6fdb 100644
--- a/drivers/net/thunderx/nicvf_rxtx.c
+++ b/drivers/net/thunderx/nicvf_rxtx.c
@@ -374,8 +374,8 @@ static const alignas(RTE_CACHE_LINE_SIZE) uint32_t 
ptype_table[16][16] = {
NICVF_RX_ASSERT((unsigned int)to_fill <= (qlen_mask -
(nicvf_addr_read(rbdr->rbdr_status) & NICVF_RBDR_COUNT_MASK)));
 
-   next_tail = __atomic_fetch_add(&rbdr->next_tail, to_fill,
-   __ATOMIC_ACQUIRE);
+   next_tail = rte_atomic_fetch_add_explicit(&rbdr->next_tail, to_fill,
+   rte_memory_order_acquire);
ltail = next_tail;
for (i = 0; i < to_fill; i++) {
struct rbdr_entry_t *entry = desc + (ltail & qlen_mask);
@@ -385,9 +385,10 @@ static const alignas(RTE_CACHE_LINE_SIZE) uint32_t 
ptype_table[16][16] = {
ltail++;
}
 
-   rte_wait_until_equal_32(&rbdr->tail, next_tail, __ATOMIC_RELAXED);
+   rte_wait_until_equal_32((uint32_t *)(uintptr_t)&rbdr->tail, next_tail,
+   rte_memory_order_relaxed);
 
-   __atomic_store_n(&rbdr->tail, ltail, __ATOMIC_RELEASE);
+   rte_atomic_store_explicit(&rbdr->tail, ltail, rte_memory_order_release);
nicvf_addr_write(door, to_fill);
return to_fill;
 }
diff --git a/drivers/net/thunderx/nicvf_struct.h 
b/drivers/net/thunderx/nicvf_struct.h
index cfcd942..60d3ec0 100644
--- a/drivers/net/thunderx/nicvf_struct.h
+++ b/drivers/net/thunderx/nicvf_struct.h
@@ -20,8 +20,8 @@ struct __rte_cache_aligned nicvf_rbdr {
struct rbdr_entry_t *desc;
nicvf_iova_addr_t phys;
uint32_t buffsz;
-   uint32_t tail;
-   uint32_t next_tail;
+   RTE_ATOMIC(uint32_t) tail;
+   RTE_ATOMIC(uint32_t) next_tail;
uint32_t head;
uint32_t qlen_mask;
 };
-- 
1.8.3.1



[PATCH v4 16/45] net/virtio: use rte stdatomic API

2024-04-19 Thread Tyler Retzlaff
Replace the use of gcc builtin __atomic_xxx intrinsics with
corresponding rte_atomic_xxx optional rte stdatomic API.

Signed-off-by: Tyler Retzlaff 
Acked-by: Stephen Hemminger 
---
 drivers/net/virtio/virtio_ring.h |  4 +--
 drivers/net/virtio/virtio_user/virtio_user_dev.c | 12 -
 drivers/net/virtio/virtqueue.h   | 32 
 3 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/drivers/net/virtio/virtio_ring.h b/drivers/net/virtio/virtio_ring.h
index e848c0b..2a25751 100644
--- a/drivers/net/virtio/virtio_ring.h
+++ b/drivers/net/virtio/virtio_ring.h
@@ -59,7 +59,7 @@ struct vring_used_elem {
 
 struct vring_used {
uint16_t flags;
-   uint16_t idx;
+   RTE_ATOMIC(uint16_t) idx;
struct vring_used_elem ring[];
 };
 
@@ -70,7 +70,7 @@ struct vring_packed_desc {
uint64_t addr;
uint32_t len;
uint16_t id;
-   uint16_t flags;
+   RTE_ATOMIC(uint16_t) flags;
 };
 
 #define RING_EVENT_FLAGS_ENABLE 0x0
diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.c 
b/drivers/net/virtio/virtio_user/virtio_user_dev.c
index 4fdfe70..24e2b2c 100644
--- a/drivers/net/virtio/virtio_user/virtio_user_dev.c
+++ b/drivers/net/virtio/virtio_user/virtio_user_dev.c
@@ -948,7 +948,7 @@ int virtio_user_stop_device(struct virtio_user_dev *dev)
 static inline int
 desc_is_avail(struct vring_packed_desc *desc, bool wrap_counter)
 {
-   uint16_t flags = __atomic_load_n(&desc->flags, __ATOMIC_ACQUIRE);
+   uint16_t flags = rte_atomic_load_explicit(&desc->flags, 
rte_memory_order_acquire);
 
return wrap_counter == !!(flags & VRING_PACKED_DESC_F_AVAIL) &&
wrap_counter != !!(flags & VRING_PACKED_DESC_F_USED);
@@ -1037,8 +1037,8 @@ int virtio_user_stop_device(struct virtio_user_dev *dev)
if (vq->used_wrap_counter)
flags |= VRING_PACKED_DESC_F_AVAIL_USED;
 
-   __atomic_store_n(&vring->desc[vq->used_idx].flags, flags,
-__ATOMIC_RELEASE);
+   rte_atomic_store_explicit(&vring->desc[vq->used_idx].flags, 
flags,
+rte_memory_order_release);
 
vq->used_idx += n_descs;
if (vq->used_idx >= dev->queue_size) {
@@ -1057,9 +1057,9 @@ int virtio_user_stop_device(struct virtio_user_dev *dev)
struct vring *vring = &dev->vrings.split[queue_idx];
 
/* Consume avail ring, using used ring idx as first one */
-   while (__atomic_load_n(&vring->used->idx, __ATOMIC_RELAXED)
+   while (rte_atomic_load_explicit(&vring->used->idx, 
rte_memory_order_relaxed)
   != vring->avail->idx) {
-   avail_idx = __atomic_load_n(&vring->used->idx, __ATOMIC_RELAXED)
+   avail_idx = rte_atomic_load_explicit(&vring->used->idx, 
rte_memory_order_relaxed)
& (vring->num - 1);
desc_idx = vring->avail->ring[avail_idx];
 
@@ -1070,7 +1070,7 @@ int virtio_user_stop_device(struct virtio_user_dev *dev)
uep->id = desc_idx;
uep->len = n_descs;
 
-   __atomic_fetch_add(&vring->used->idx, 1, __ATOMIC_RELAXED);
+   rte_atomic_fetch_add_explicit(&vring->used->idx, 1, 
rte_memory_order_relaxed);
}
 }
 
diff --git a/drivers/net/virtio/virtqueue.h b/drivers/net/virtio/virtqueue.h
index 75d70f1..60211a4 100644
--- a/drivers/net/virtio/virtqueue.h
+++ b/drivers/net/virtio/virtqueue.h
@@ -37,7 +37,7 @@
 virtio_mb(uint8_t weak_barriers)
 {
if (weak_barriers)
-   rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
+   rte_atomic_thread_fence(rte_memory_order_seq_cst);
else
rte_mb();
 }
@@ -46,7 +46,7 @@
 virtio_rmb(uint8_t weak_barriers)
 {
if (weak_barriers)
-   rte_atomic_thread_fence(__ATOMIC_ACQUIRE);
+   rte_atomic_thread_fence(rte_memory_order_acquire);
else
rte_io_rmb();
 }
@@ -55,7 +55,7 @@
 virtio_wmb(uint8_t weak_barriers)
 {
if (weak_barriers)
-   rte_atomic_thread_fence(__ATOMIC_RELEASE);
+   rte_atomic_thread_fence(rte_memory_order_release);
else
rte_io_wmb();
 }
@@ -67,12 +67,12 @@
uint16_t flags;
 
if (weak_barriers) {
-/* x86 prefers to using rte_io_rmb over __atomic_load_n as it reports
+/* x86 prefers to using rte_io_rmb over rte_atomic_load_explicit as it reports
  * a better perf(~1.5%), which comes from the saved branch by the compiler.
  * The if and else branch are identical  on the platforms except Arm.
  */
 #ifdef RTE_ARCH_ARM
-   flags = __atomic_load_n(&dp->flags, __ATOMIC_ACQUIRE);
+   flags = rte_atomic_load_explicit(&dp->flags, 
rte_memory_order_acquire);
 #else
flags = dp->flags;
rte_io_rmb();
@@ -90,12 +90,12 @@
  uint16_t flags, uint8_t wea

[PATCH v4 17/45] net/hinic: use rte stdatomic API

2024-04-19 Thread Tyler Retzlaff
Replace the use of gcc builtin __atomic_xxx intrinsics with
corresponding rte_atomic_xxx optional rte stdatomic API.

Signed-off-by: Tyler Retzlaff 
Acked-by: Stephen Hemminger 
---
 drivers/net/hinic/hinic_pmd_rx.c | 2 +-
 drivers/net/hinic/hinic_pmd_rx.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/hinic/hinic_pmd_rx.c b/drivers/net/hinic/hinic_pmd_rx.c
index 7adb6e3..c2cd295 100644
--- a/drivers/net/hinic/hinic_pmd_rx.c
+++ b/drivers/net/hinic/hinic_pmd_rx.c
@@ -1004,7 +1004,7 @@ u16 hinic_recv_pkts(void *rx_queue, struct rte_mbuf 
**rx_pkts, u16 nb_pkts)
while (pkts < nb_pkts) {
 /* 2. current ci is done */
rx_cqe = &rxq->rx_cqe[sw_ci];
-   status = __atomic_load_n(&rx_cqe->status, __ATOMIC_ACQUIRE);
+   status = rte_atomic_load_explicit(&rx_cqe->status, 
rte_memory_order_acquire);
if (!HINIC_GET_RX_DONE_BE(status))
break;
 
diff --git a/drivers/net/hinic/hinic_pmd_rx.h b/drivers/net/hinic/hinic_pmd_rx.h
index 2dde3ec..43c236b 100644
--- a/drivers/net/hinic/hinic_pmd_rx.h
+++ b/drivers/net/hinic/hinic_pmd_rx.h
@@ -33,7 +33,7 @@ struct __rte_cache_aligned hinic_rq_cqe {
 #else
 struct hinic_rq_cqe {
 #endif
-   u32 status;
+   RTE_ATOMIC(u32) status;
u32 vlan_len;
u32 offload_type;
u32 rss_hash;
-- 
1.8.3.1



[PATCH v4 18/45] net/idpf: use rte stdatomic API

2024-04-19 Thread Tyler Retzlaff
Replace the use of gcc builtin __atomic_xxx intrinsics with
corresponding rte_atomic_xxx optional rte stdatomic API.

Signed-off-by: Tyler Retzlaff 
Acked-by: Stephen Hemminger 
---
 drivers/net/idpf/idpf_ethdev.c | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/net/idpf/idpf_ethdev.c b/drivers/net/idpf/idpf_ethdev.c
index 86151c9..1df4d6b 100644
--- a/drivers/net/idpf/idpf_ethdev.c
+++ b/drivers/net/idpf/idpf_ethdev.c
@@ -259,8 +259,8 @@ struct rte_idpf_xstats_name_off {
 
for (i = 0; i < dev->data->nb_rx_queues; i++) {
rxq = dev->data->rx_queues[i];
-   mbuf_alloc_failed += 
__atomic_load_n(&rxq->rx_stats.mbuf_alloc_failed,
-__ATOMIC_RELAXED);
+   mbuf_alloc_failed += 
rte_atomic_load_explicit(&rxq->rx_stats.mbuf_alloc_failed,
+rte_memory_order_relaxed);
}
 
return mbuf_alloc_failed;
@@ -308,7 +308,8 @@ struct rte_idpf_xstats_name_off {
 
for (i = 0; i < dev->data->nb_rx_queues; i++) {
rxq = dev->data->rx_queues[i];
-   __atomic_store_n(&rxq->rx_stats.mbuf_alloc_failed, 0, 
__ATOMIC_RELAXED);
+   rte_atomic_store_explicit(&rxq->rx_stats.mbuf_alloc_failed, 0,
+   rte_memory_order_relaxed);
}
 }
 
-- 
1.8.3.1



[PATCH v4 19/45] net/qede: use rte stdatomic API

2024-04-19 Thread Tyler Retzlaff
Replace the use of gcc builtin __atomic_xxx intrinsics with
corresponding rte_atomic_xxx optional rte stdatomic API.

Signed-off-by: Tyler Retzlaff 
Acked-by: Stephen Hemminger 
---
 drivers/net/qede/base/bcm_osal.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/qede/base/bcm_osal.c b/drivers/net/qede/base/bcm_osal.c
index 2edeb38..abd1186 100644
--- a/drivers/net/qede/base/bcm_osal.c
+++ b/drivers/net/qede/base/bcm_osal.c
@@ -51,11 +51,11 @@ void osal_poll_mode_dpc(osal_int_ptr_t hwfn_cookie)
 /* Counter to track current memzone allocated */
 static uint16_t ecore_mz_count;
 
-static uint32_t ref_cnt;
+static RTE_ATOMIC(uint32_t) ref_cnt;
 
 int ecore_mz_mapping_alloc(void)
 {
-   if (__atomic_fetch_add(&ref_cnt, 1, __ATOMIC_RELAXED) == 0) {
+   if (rte_atomic_fetch_add_explicit(&ref_cnt, 1, 
rte_memory_order_relaxed) == 0) {
ecore_mz_mapping = rte_calloc("ecore_mz_map",
rte_memzone_max_get(), sizeof(struct 
rte_memzone *), 0);
}
@@ -68,7 +68,7 @@ int ecore_mz_mapping_alloc(void)
 
 void ecore_mz_mapping_free(void)
 {
-   if (__atomic_fetch_sub(&ref_cnt, 1, __ATOMIC_RELAXED) - 1 == 0) {
+   if (rte_atomic_fetch_sub_explicit(&ref_cnt, 1, 
rte_memory_order_relaxed) - 1 == 0) {
rte_free(ecore_mz_mapping);
ecore_mz_mapping = NULL;
}
-- 
1.8.3.1



[PATCH v4 20/45] net/ring: use rte stdatomic API

2024-04-19 Thread Tyler Retzlaff
Replace the use of gcc builtin __atomic_xxx intrinsics with
corresponding rte_atomic_xxx optional rte stdatomic API.

Signed-off-by: Tyler Retzlaff 
Acked-by: Stephen Hemminger 
---
 drivers/net/ring/rte_eth_ring.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ring/rte_eth_ring.c b/drivers/net/ring/rte_eth_ring.c
index 48953dd..b16f5d5 100644
--- a/drivers/net/ring/rte_eth_ring.c
+++ b/drivers/net/ring/rte_eth_ring.c
@@ -44,8 +44,8 @@ enum dev_action {
 
 struct ring_queue {
struct rte_ring *rng;
-   uint64_t rx_pkts;
-   uint64_t tx_pkts;
+   RTE_ATOMIC(uint64_t) rx_pkts;
+   RTE_ATOMIC(uint64_t) tx_pkts;
 };
 
 struct pmd_internals {
@@ -82,7 +82,7 @@ struct pmd_internals {
if (r->rng->flags & RING_F_SC_DEQ)
r->rx_pkts += nb_rx;
else
-   __atomic_fetch_add(&r->rx_pkts, nb_rx, __ATOMIC_RELAXED);
+   rte_atomic_fetch_add_explicit(&r->rx_pkts, nb_rx, 
rte_memory_order_relaxed);
return nb_rx;
 }
 
@@ -96,7 +96,7 @@ struct pmd_internals {
if (r->rng->flags & RING_F_SP_ENQ)
r->tx_pkts += nb_tx;
else
-   __atomic_fetch_add(&r->tx_pkts, nb_tx, __ATOMIC_RELAXED);
+   rte_atomic_fetch_add_explicit(&r->tx_pkts, nb_tx, 
rte_memory_order_relaxed);
return nb_tx;
 }
 
-- 
1.8.3.1



[PATCH v4 21/45] vdpa/mlx5: use rte stdatomic API

2024-04-19 Thread Tyler Retzlaff
Replace the use of gcc builtin __atomic_xxx intrinsics with
corresponding rte_atomic_xxx optional rte stdatomic API.

Signed-off-by: Tyler Retzlaff 
Acked-by: Stephen Hemminger 
---
 drivers/vdpa/mlx5/mlx5_vdpa.c | 24 +-
 drivers/vdpa/mlx5/mlx5_vdpa.h | 14 +--
 drivers/vdpa/mlx5/mlx5_vdpa_cthread.c | 46 +--
 drivers/vdpa/mlx5/mlx5_vdpa_lm.c  |  4 ++-
 drivers/vdpa/mlx5/mlx5_vdpa_mem.c |  4 ++-
 drivers/vdpa/mlx5/mlx5_vdpa_virtq.c   |  4 ++-
 6 files changed, 52 insertions(+), 44 deletions(-)

diff --git a/drivers/vdpa/mlx5/mlx5_vdpa.c b/drivers/vdpa/mlx5/mlx5_vdpa.c
index f900384..98c39a5 100644
--- a/drivers/vdpa/mlx5/mlx5_vdpa.c
+++ b/drivers/vdpa/mlx5/mlx5_vdpa.c
@@ -261,8 +261,8 @@
uint32_t timeout = 0;
 
/* Check and wait all close tasks done. */
-   while (__atomic_load_n(&priv->dev_close_progress,
-   __ATOMIC_RELAXED) != 0 && timeout < 1000) {
+   while (rte_atomic_load_explicit(&priv->dev_close_progress,
+   rte_memory_order_relaxed) != 0 && timeout < 1000) {
rte_delay_us_sleep(1);
timeout++;
}
@@ -294,8 +294,8 @@
priv->last_c_thrd_idx = 0;
else
priv->last_c_thrd_idx++;
-   __atomic_store_n(&priv->dev_close_progress,
-   1, __ATOMIC_RELAXED);
+   rte_atomic_store_explicit(&priv->dev_close_progress,
+   1, rte_memory_order_relaxed);
if (mlx5_vdpa_task_add(priv,
priv->last_c_thrd_idx,
MLX5_VDPA_TASK_DEV_CLOSE_NOWAIT,
@@ -319,8 +319,8 @@
if (!priv->connected)
mlx5_vdpa_dev_cache_clean(priv);
priv->vid = 0;
-   __atomic_store_n(&priv->dev_close_progress, 0,
-   __ATOMIC_RELAXED);
+   rte_atomic_store_explicit(&priv->dev_close_progress, 0,
+   rte_memory_order_relaxed);
priv->state = MLX5_VDPA_STATE_PROBED;
DRV_LOG(INFO, "vDPA device %d was closed.", vid);
return ret;
@@ -664,7 +664,9 @@
 static int
 mlx5_vdpa_virtq_resource_prepare(struct mlx5_vdpa_priv *priv)
 {
-   uint32_t remaining_cnt = 0, err_cnt = 0, task_num = 0;
+   RTE_ATOMIC(uint32_t) remaining_cnt = 0;
+   RTE_ATOMIC(uint32_t) err_cnt = 0;
+   uint32_t task_num = 0;
uint32_t max_queues, index, thrd_idx, data[1];
struct mlx5_vdpa_virtq *virtq;
 
@@ -847,8 +849,8 @@
if (conf_thread_mng.initializer_priv == priv)
if (mlx5_vdpa_mult_threads_create())
goto error;
-   __atomic_fetch_add(&conf_thread_mng.refcnt, 1,
-   __ATOMIC_RELAXED);
+   rte_atomic_fetch_add_explicit(&conf_thread_mng.refcnt, 1,
+   rte_memory_order_relaxed);
}
if (mlx5_vdpa_create_dev_resources(priv))
goto error;
@@ -937,8 +939,8 @@
if (priv->vdev)
rte_vdpa_unregister_device(priv->vdev);
if (priv->use_c_thread)
-   if (__atomic_fetch_sub(&conf_thread_mng.refcnt,
-   1, __ATOMIC_RELAXED) == 1)
+   if (rte_atomic_fetch_sub_explicit(&conf_thread_mng.refcnt,
+   1, rte_memory_order_relaxed) == 1)
mlx5_vdpa_mult_threads_destroy(true);
rte_free(priv);
 }
diff --git a/drivers/vdpa/mlx5/mlx5_vdpa.h b/drivers/vdpa/mlx5/mlx5_vdpa.h
index 4ce6977..e156520 100644
--- a/drivers/vdpa/mlx5/mlx5_vdpa.h
+++ b/drivers/vdpa/mlx5/mlx5_vdpa.h
@@ -93,8 +93,8 @@ enum mlx5_vdpa_task_type {
 struct __rte_aligned(4) mlx5_vdpa_task {
struct mlx5_vdpa_priv *priv;
enum mlx5_vdpa_task_type type;
-   uint32_t *remaining_cnt;
-   uint32_t *err_cnt;
+   RTE_ATOMIC(uint32_t) *remaining_cnt;
+   RTE_ATOMIC(uint32_t) *err_cnt;
uint32_t idx;
 } __rte_packed;
 
@@ -107,7 +107,7 @@ struct mlx5_vdpa_c_thread {
 
 struct mlx5_vdpa_conf_thread_mng {
void *initializer_priv;
-   uint32_t refcnt;
+   RTE_ATOMIC(uint32_t) refcnt;
uint32_t max_thrds;
pthread_mutex_t cthrd_lock;
struct mlx5_vdpa_c_thread cthrd[MLX5_VDPA_MAX_C_THRD];
@@ -212,7 +212,7 @@ struct mlx5_vdpa_priv {
uint64_t features; /* Negotiated features. */
uint16_t log_max_rqt_size;
uint16_t last_c_thrd_idx;
-   uint16_t dev_close_progress;
+   RTE_ATOMIC(uint16_t) dev_close_progress;
uint16_t num_mrs; /* Number of memory regions. */
struct mlx5_vdpa_steer steer;
struct mlx5dv_var *var;
@@ -581,13 +581,13 @@ int mlx5_vdpa_dirty_bitmap_set(struct mlx5_vdpa_priv 
*priv, uint64_t log_base,
 mlx5_vdpa_task_add(struct mlx5_vdpa_priv *priv,
uint32_t thrd_idx,
enum mlx5_vdpa_task_type task_type,
-   uint32_t *remaining_cnt, 

[PATCH v4 22/45] raw/ifpga: use rte stdatomic API

2024-04-19 Thread Tyler Retzlaff
Replace the use of gcc builtin __atomic_xxx intrinsics with
corresponding rte_atomic_xxx optional rte stdatomic API.

Signed-off-by: Tyler Retzlaff 
Acked-by: Stephen Hemminger 
---
 drivers/raw/ifpga/ifpga_rawdev.c | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/raw/ifpga/ifpga_rawdev.c b/drivers/raw/ifpga/ifpga_rawdev.c
index f89bd3f..78d3c88 100644
--- a/drivers/raw/ifpga/ifpga_rawdev.c
+++ b/drivers/raw/ifpga/ifpga_rawdev.c
@@ -73,7 +73,7 @@
 
 static struct ifpga_rawdev ifpga_rawdevices[IFPGA_RAWDEV_NUM];
 
-static int ifpga_monitor_refcnt;
+static RTE_ATOMIC(int) ifpga_monitor_refcnt;
 static rte_thread_t ifpga_monitor_start_thread;
 
 static struct ifpga_rawdev *
@@ -512,7 +512,7 @@ static int set_surprise_link_check_aer(
int gsd_enable, ret;
 #define MS 1000
 
-   while (__atomic_load_n(&ifpga_monitor_refcnt, __ATOMIC_RELAXED)) {
+   while (rte_atomic_load_explicit(&ifpga_monitor_refcnt, 
rte_memory_order_relaxed)) {
gsd_enable = 0;
for (i = 0; i < IFPGA_RAWDEV_NUM; i++) {
ifpga_rdev = &ifpga_rawdevices[i];
@@ -549,7 +549,7 @@ static int set_surprise_link_check_aer(
 
dev->poll_enabled = 1;
 
-   if (!__atomic_fetch_add(&ifpga_monitor_refcnt, 1, __ATOMIC_RELAXED)) {
+   if (!rte_atomic_fetch_add_explicit(&ifpga_monitor_refcnt, 1, 
rte_memory_order_relaxed)) {
ret = 
rte_thread_create_internal_control(&ifpga_monitor_start_thread,
"ifpga-mon", ifpga_rawdev_gsd_handle, NULL);
if (ret != 0) {
@@ -573,7 +573,8 @@ static int set_surprise_link_check_aer(
 
dev->poll_enabled = 0;
 
-   if (!(__atomic_fetch_sub(&ifpga_monitor_refcnt, 1, __ATOMIC_RELAXED) - 
1) &&
+   if (!(rte_atomic_fetch_sub_explicit(&ifpga_monitor_refcnt, 1,
+   rte_memory_order_relaxed) - 1) &&
ifpga_monitor_start_thread.opaque_id != 0) {
ret = 
pthread_cancel((pthread_t)ifpga_monitor_start_thread.opaque_id);
if (ret)
-- 
1.8.3.1



[PATCH v4 23/45] event/opdl: use rte stdatomic API

2024-04-19 Thread Tyler Retzlaff
Replace the use of gcc builtin __atomic_xxx intrinsics with
corresponding rte_atomic_xxx optional rte stdatomic API.

Signed-off-by: Tyler Retzlaff 
Acked-by: Stephen Hemminger 
---
 drivers/event/opdl/opdl_ring.c | 80 +-
 1 file changed, 40 insertions(+), 40 deletions(-)

diff --git a/drivers/event/opdl/opdl_ring.c b/drivers/event/opdl/opdl_ring.c
index e87ffd5..3476f6b 100644
--- a/drivers/event/opdl/opdl_ring.c
+++ b/drivers/event/opdl/opdl_ring.c
@@ -47,12 +47,12 @@ struct __rte_cache_aligned shared_state {
/* Last known minimum sequence number of dependencies, used for multi
 * thread operation
 */
-   uint32_t available_seq;
+   RTE_ATOMIC(uint32_t) available_seq;
char _pad1[RTE_CACHE_LINE_SIZE * 3];
-   uint32_t head;  /* Head sequence number (for multi thread operation) */
+   RTE_ATOMIC(uint32_t) head;  /* Head sequence number (for multi thread 
operation) */
char _pad2[RTE_CACHE_LINE_SIZE * 3];
struct opdl_stage *stage;  /* back pointer */
-   uint32_t tail;  /* Tail sequence number */
+   RTE_ATOMIC(uint32_t) tail;  /* Tail sequence number */
char _pad3[RTE_CACHE_LINE_SIZE * 2];
 };
 
@@ -149,10 +149,10 @@ struct opdl_ring {
 available(const struct opdl_stage *s)
 {
if (s->threadsafe == true) {
-   uint32_t n = __atomic_load_n(&s->shared.available_seq,
-   __ATOMIC_ACQUIRE) -
-   __atomic_load_n(&s->shared.head,
-   __ATOMIC_ACQUIRE);
+   uint32_t n = rte_atomic_load_explicit(&s->shared.available_seq,
+   rte_memory_order_acquire) -
+   rte_atomic_load_explicit(&s->shared.head,
+   rte_memory_order_acquire);
 
/* Return 0 if available_seq needs to be updated */
return (n <= s->num_slots) ? n : 0;
@@ -168,7 +168,7 @@ struct opdl_ring {
 {
uint32_t i;
uint32_t this_tail = s->shared.tail;
-   uint32_t min_seq = __atomic_load_n(&s->deps[0]->tail, __ATOMIC_ACQUIRE);
+   uint32_t min_seq = rte_atomic_load_explicit(&s->deps[0]->tail, 
rte_memory_order_acquire);
/* Input stage sequence numbers are greater than the sequence numbers of
 * its dependencies so an offset of t->num_slots is needed when
 * calculating available slots and also the condition which is used to
@@ -179,16 +179,16 @@ struct opdl_ring {
if (is_input_stage(s)) {
wrap = s->num_slots;
for (i = 1; i < s->num_deps; i++) {
-   uint32_t seq = __atomic_load_n(&s->deps[i]->tail,
-   __ATOMIC_ACQUIRE);
+   uint32_t seq = 
rte_atomic_load_explicit(&s->deps[i]->tail,
+   rte_memory_order_acquire);
if ((this_tail - seq) > (this_tail - min_seq))
min_seq = seq;
}
} else {
wrap = 0;
for (i = 1; i < s->num_deps; i++) {
-   uint32_t seq = __atomic_load_n(&s->deps[i]->tail,
-   __ATOMIC_ACQUIRE);
+   uint32_t seq = 
rte_atomic_load_explicit(&s->deps[i]->tail,
+   rte_memory_order_acquire);
if ((seq - this_tail) < (min_seq - this_tail))
min_seq = seq;
}
@@ -197,8 +197,8 @@ struct opdl_ring {
if (s->threadsafe == false)
s->available_seq = min_seq + wrap;
else
-   __atomic_store_n(&s->shared.available_seq, min_seq + wrap,
-   __ATOMIC_RELEASE);
+   rte_atomic_store_explicit(&s->shared.available_seq, min_seq + 
wrap,
+   rte_memory_order_release);
 }
 
 /* Wait until the number of available slots reaches number requested */
@@ -298,7 +298,7 @@ struct opdl_ring {
copy_entries_in(t, head, entries, num_entries);
 
s->head += num_entries;
-   __atomic_store_n(&s->shared.tail, s->head, __ATOMIC_RELEASE);
+   rte_atomic_store_explicit(&s->shared.tail, s->head, 
rte_memory_order_release);
 
return num_entries;
 }
@@ -381,18 +381,18 @@ struct opdl_ring {
/* There should be no race condition here. If shared.tail
 * matches, no other core can update it until this one does.
 */
-   if (__atomic_load_n(&s->shared.tail, __ATOMIC_ACQUIRE) ==
+   if (rte_atomic_load_explicit(&s->shared.tail, 
rte_memory_order_acquire) ==
tail) {
if (num_entries >= (head - tail)) {
claim_mgr_remove(disclaims);
-   __atomic_store_n(

[PATCH v4 25/45] event/dsw: use rte stdatomic API

2024-04-19 Thread Tyler Retzlaff
Replace the use of gcc builtin __atomic_xxx intrinsics with
corresponding rte_atomic_xxx optional rte stdatomic API.

Signed-off-by: Tyler Retzlaff 
Acked-by: Stephen Hemminger 
Reviewed-by: Mattias Rönnblom 
---
 drivers/event/dsw/dsw_evdev.h  |  6 +++---
 drivers/event/dsw/dsw_event.c  | 47 +++---
 drivers/event/dsw/dsw_xstats.c |  4 ++--
 3 files changed, 35 insertions(+), 22 deletions(-)

diff --git a/drivers/event/dsw/dsw_evdev.h b/drivers/event/dsw/dsw_evdev.h
index 3a5989f..2018306 100644
--- a/drivers/event/dsw/dsw_evdev.h
+++ b/drivers/event/dsw/dsw_evdev.h
@@ -227,9 +227,9 @@ struct __rte_cache_aligned dsw_port {
alignas(RTE_CACHE_LINE_SIZE) struct rte_ring *ctl_in_ring;
 
/* Estimate of current port load. */
-   alignas(RTE_CACHE_LINE_SIZE) int16_t load;
+   alignas(RTE_CACHE_LINE_SIZE) RTE_ATOMIC(int16_t) load;
/* Estimate of flows currently migrating to this port. */
-   alignas(RTE_CACHE_LINE_SIZE) int32_t immigration_load;
+   alignas(RTE_CACHE_LINE_SIZE) RTE_ATOMIC(int32_t) immigration_load;
 };
 
 struct dsw_queue {
@@ -252,7 +252,7 @@ struct dsw_evdev {
uint8_t num_queues;
int32_t max_inflight;
 
-   alignas(RTE_CACHE_LINE_SIZE) int32_t credits_on_loan;
+   alignas(RTE_CACHE_LINE_SIZE) RTE_ATOMIC(int32_t) credits_on_loan;
 };
 
 #define DSW_CTL_PAUS_REQ (0)
diff --git a/drivers/event/dsw/dsw_event.c b/drivers/event/dsw/dsw_event.c
index 23488d9..70c3c3a 100644
--- a/drivers/event/dsw/dsw_event.c
+++ b/drivers/event/dsw/dsw_event.c
@@ -33,7 +33,8 @@
}
 
total_on_loan =
-   __atomic_load_n(&dsw->credits_on_loan, __ATOMIC_RELAXED);
+   rte_atomic_load_explicit(&dsw->credits_on_loan,
+rte_memory_order_relaxed);
available = dsw->max_inflight - total_on_loan;
acquired_credits = RTE_MAX(missing_credits, DSW_PORT_MIN_CREDITS);
 
@@ -45,13 +46,16 @@
 * allocation.
 */
new_total_on_loan =
-   __atomic_fetch_add(&dsw->credits_on_loan, acquired_credits,
-  __ATOMIC_RELAXED) + acquired_credits;
+   rte_atomic_fetch_add_explicit(&dsw->credits_on_loan,
+ acquired_credits,
+ rte_memory_order_relaxed) +
+ acquired_credits;
 
if (unlikely(new_total_on_loan > dsw->max_inflight)) {
/* Some other port took the last credits */
-   __atomic_fetch_sub(&dsw->credits_on_loan, acquired_credits,
-  __ATOMIC_RELAXED);
+   rte_atomic_fetch_sub_explicit(&dsw->credits_on_loan,
+ acquired_credits,
+ rte_memory_order_relaxed);
return false;
}
 
@@ -77,8 +81,9 @@
 
port->inflight_credits = leave_credits;
 
-   __atomic_fetch_sub(&dsw->credits_on_loan, return_credits,
-  __ATOMIC_RELAXED);
+   rte_atomic_fetch_sub_explicit(&dsw->credits_on_loan,
+ return_credits,
+ rte_memory_order_relaxed);
 
DSW_LOG_DP_PORT(DEBUG, port->id,
"Returned %d tokens to pool.\n",
@@ -156,19 +161,22 @@
int16_t period_load;
int16_t new_load;
 
-   old_load = __atomic_load_n(&port->load, __ATOMIC_RELAXED);
+   old_load = rte_atomic_load_explicit(&port->load,
+   rte_memory_order_relaxed);
 
period_load = dsw_port_load_close_period(port, now);
 
new_load = (period_load + old_load*DSW_OLD_LOAD_WEIGHT) /
(DSW_OLD_LOAD_WEIGHT+1);
 
-   __atomic_store_n(&port->load, new_load, __ATOMIC_RELAXED);
+   rte_atomic_store_explicit(&port->load, new_load,
+ rte_memory_order_relaxed);
 
/* The load of the recently immigrated flows should hopefully
 * be reflected the load estimate by now.
 */
-   __atomic_store_n(&port->immigration_load, 0, __ATOMIC_RELAXED);
+   rte_atomic_store_explicit(&port->immigration_load, 0,
+ rte_memory_order_relaxed);
 }
 
 static void
@@ -390,10 +398,11 @@ struct dsw_queue_flow_burst {
 
for (i = 0; i < dsw->num_ports; i++) {
int16_t measured_load =
-   __atomic_load_n(&dsw->ports[i].load, __ATOMIC_RELAXED);
+   rte_atomic_load_explicit(&dsw->ports[i].load,
+rte_memory_order_relaxed);
int32_t immigration_load =
-   __atomic_load_n(&dsw->ports[i].immigration_load,
-   __ATOMIC_RELAXED);
+   

[PATCH v4 24/45] event/octeontx: use rte stdatomic API

2024-04-19 Thread Tyler Retzlaff
Replace the use of gcc builtin __atomic_xxx intrinsics with
corresponding rte_atomic_xxx optional rte stdatomic API.

Signed-off-by: Tyler Retzlaff 
Acked-by: Stephen Hemminger 
---
 drivers/event/octeontx/timvf_evdev.h  |  8 
 drivers/event/octeontx/timvf_worker.h | 36 +--
 2 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/drivers/event/octeontx/timvf_evdev.h 
b/drivers/event/octeontx/timvf_evdev.h
index e7a63e4..3a2dc47 100644
--- a/drivers/event/octeontx/timvf_evdev.h
+++ b/drivers/event/octeontx/timvf_evdev.h
@@ -126,15 +126,15 @@ enum timvf_clk_src {
 struct __rte_aligned(8) tim_mem_bucket {
uint64_t first_chunk;
union {
-   uint64_t w1;
+   RTE_ATOMIC(uint64_t) w1;
struct {
-   uint32_t nb_entry;
+   RTE_ATOMIC(uint32_t) nb_entry;
uint8_t sbt:1;
uint8_t hbt:1;
uint8_t bsk:1;
uint8_t rsvd:5;
-   uint8_t lock;
-   int16_t chunk_remainder;
+   RTE_ATOMIC(uint8_t) lock;
+   RTE_ATOMIC(int16_t) chunk_remainder;
};
};
uint64_t current_chunk;
diff --git a/drivers/event/octeontx/timvf_worker.h 
b/drivers/event/octeontx/timvf_worker.h
index e4b923e..de9f1b0 100644
--- a/drivers/event/octeontx/timvf_worker.h
+++ b/drivers/event/octeontx/timvf_worker.h
@@ -19,22 +19,22 @@
 static inline int16_t
 timr_bkt_get_rem(struct tim_mem_bucket *bktp)
 {
-   return __atomic_load_n(&bktp->chunk_remainder,
-   __ATOMIC_ACQUIRE);
+   return rte_atomic_load_explicit(&bktp->chunk_remainder,
+   rte_memory_order_acquire);
 }
 
 static inline void
 timr_bkt_set_rem(struct tim_mem_bucket *bktp, uint16_t v)
 {
-   __atomic_store_n(&bktp->chunk_remainder, v,
-   __ATOMIC_RELEASE);
+   rte_atomic_store_explicit(&bktp->chunk_remainder, v,
+   rte_memory_order_release);
 }
 
 static inline void
 timr_bkt_sub_rem(struct tim_mem_bucket *bktp, uint16_t v)
 {
-   __atomic_fetch_sub(&bktp->chunk_remainder, v,
-   __ATOMIC_RELEASE);
+   rte_atomic_fetch_sub_explicit(&bktp->chunk_remainder, v,
+   rte_memory_order_release);
 }
 
 static inline uint8_t
@@ -47,14 +47,14 @@
 timr_bkt_set_sbt(struct tim_mem_bucket *bktp)
 {
const uint64_t v = TIM_BUCKET_W1_M_SBT << TIM_BUCKET_W1_S_SBT;
-   return __atomic_fetch_or(&bktp->w1, v, __ATOMIC_ACQ_REL);
+   return rte_atomic_fetch_or_explicit(&bktp->w1, v, 
rte_memory_order_acq_rel);
 }
 
 static inline uint64_t
 timr_bkt_clr_sbt(struct tim_mem_bucket *bktp)
 {
const uint64_t v = ~(TIM_BUCKET_W1_M_SBT << TIM_BUCKET_W1_S_SBT);
-   return __atomic_fetch_and(&bktp->w1, v, __ATOMIC_ACQ_REL);
+   return rte_atomic_fetch_and_explicit(&bktp->w1, v, 
rte_memory_order_acq_rel);
 }
 
 static inline uint8_t
@@ -81,34 +81,34 @@
 {
/*Clear everything except lock. */
const uint64_t v = TIM_BUCKET_W1_M_LOCK << TIM_BUCKET_W1_S_LOCK;
-   return __atomic_fetch_and(&bktp->w1, v, __ATOMIC_ACQ_REL);
+   return rte_atomic_fetch_and_explicit(&bktp->w1, v, 
rte_memory_order_acq_rel);
 }
 
 static inline uint64_t
 timr_bkt_fetch_sema_lock(struct tim_mem_bucket *bktp)
 {
-   return __atomic_fetch_add(&bktp->w1, TIM_BUCKET_SEMA_WLOCK,
-   __ATOMIC_ACQ_REL);
+   return rte_atomic_fetch_add_explicit(&bktp->w1, TIM_BUCKET_SEMA_WLOCK,
+   rte_memory_order_acq_rel);
 }
 
 static inline uint64_t
 timr_bkt_fetch_sema(struct tim_mem_bucket *bktp)
 {
-   return __atomic_fetch_add(&bktp->w1, TIM_BUCKET_SEMA,
-   __ATOMIC_RELAXED);
+   return rte_atomic_fetch_add_explicit(&bktp->w1, TIM_BUCKET_SEMA,
+   rte_memory_order_relaxed);
 }
 
 static inline uint64_t
 timr_bkt_inc_lock(struct tim_mem_bucket *bktp)
 {
const uint64_t v = 1ull << TIM_BUCKET_W1_S_LOCK;
-   return __atomic_fetch_add(&bktp->w1, v, __ATOMIC_ACQ_REL);
+   return rte_atomic_fetch_add_explicit(&bktp->w1, v, 
rte_memory_order_acq_rel);
 }
 
 static inline void
 timr_bkt_dec_lock(struct tim_mem_bucket *bktp)
 {
-   __atomic_fetch_add(&bktp->lock, 0xff, __ATOMIC_ACQ_REL);
+   rte_atomic_fetch_add_explicit(&bktp->lock, 0xff, 
rte_memory_order_acq_rel);
 }
 
 static inline uint32_t
@@ -121,13 +121,13 @@
 static inline void
 timr_bkt_inc_nent(struct tim_mem_bucket *bktp)
 {
-   __atomic_fetch_add(&bktp->nb_entry, 1, __ATOMIC_RELAXED);
+   rte_atomic_fetch_add_explicit(&bktp->nb_entry, 1, 
rte_memory_order_relaxed);
 }
 
 static inline void
 timr_bkt_add_nent(struct tim_mem_bucket *bktp, uint32_t v)
 {
-   __atomic_fetch_add(&bktp->nb_entry, v, __ATOMIC_RELAXED);
+   rte_atomic_fetch_add_explicit(&bk

[PATCH v4 26/45] dma/skeleton: use rte stdatomic API

2024-04-19 Thread Tyler Retzlaff
Replace the use of gcc builtin __atomic_xxx intrinsics with
corresponding rte_atomic_xxx optional rte stdatomic API.

Signed-off-by: Tyler Retzlaff 
Acked-by: Stephen Hemminger 
---
 drivers/dma/skeleton/skeleton_dmadev.c | 5 +++--
 drivers/dma/skeleton/skeleton_dmadev.h | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/dma/skeleton/skeleton_dmadev.c 
b/drivers/dma/skeleton/skeleton_dmadev.c
index 48f88f9..926c188 100644
--- a/drivers/dma/skeleton/skeleton_dmadev.c
+++ b/drivers/dma/skeleton/skeleton_dmadev.c
@@ -142,7 +142,7 @@
else if (desc->op == SKELDMA_OP_FILL)
do_fill(desc);
 
-   __atomic_fetch_add(&hw->completed_count, 1, __ATOMIC_RELEASE);
+   rte_atomic_fetch_add_explicit(&hw->completed_count, 1, 
rte_memory_order_release);
(void)rte_ring_enqueue(hw->desc_completed, (void *)desc);
}
 
@@ -335,7 +335,8 @@
RTE_SET_USED(vchan);
 
*status = RTE_DMA_VCHAN_IDLE;
-   if (hw->submitted_count != __atomic_load_n(&hw->completed_count, 
__ATOMIC_ACQUIRE)
+   if (hw->submitted_count != 
rte_atomic_load_explicit(&hw->completed_count,
+   rte_memory_order_acquire)
|| hw->zero_req_count == 0)
*status = RTE_DMA_VCHAN_ACTIVE;
return 0;
diff --git a/drivers/dma/skeleton/skeleton_dmadev.h 
b/drivers/dma/skeleton/skeleton_dmadev.h
index cfd37d1..0365f64 100644
--- a/drivers/dma/skeleton/skeleton_dmadev.h
+++ b/drivers/dma/skeleton/skeleton_dmadev.h
@@ -81,7 +81,7 @@ struct skeldma_hw {
/* Cache delimiter for cpuwork thread's operation data */
alignas(RTE_CACHE_LINE_SIZE) char cache2;
volatile uint32_t zero_req_count;
-   uint64_t completed_count;
+   RTE_ATOMIC(uint64_t) completed_count;
 };
 
 #endif /* SKELETON_DMADEV_H */
-- 
1.8.3.1



[PATCH v4 27/45] crypto/octeontx: use rte stdatomic API

2024-04-19 Thread Tyler Retzlaff
Replace the use of gcc builtin __atomic_xxx intrinsics with
corresponding rte_atomic_xxx optional rte stdatomic API.

Signed-off-by: Tyler Retzlaff 
Acked-by: Stephen Hemminger 
---
 drivers/crypto/octeontx/otx_cryptodev_ops.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/crypto/octeontx/otx_cryptodev_ops.c 
b/drivers/crypto/octeontx/otx_cryptodev_ops.c
index 947e1be..bafd0c1 100644
--- a/drivers/crypto/octeontx/otx_cryptodev_ops.c
+++ b/drivers/crypto/octeontx/otx_cryptodev_ops.c
@@ -652,7 +652,7 @@
if (!rsp_info->sched_type)
ssows_head_wait(ws);
 
-   rte_atomic_thread_fence(__ATOMIC_RELEASE);
+   rte_atomic_thread_fence(rte_memory_order_release);
ssovf_store_pair(add_work, req, ws->grps[rsp_info->queue_id]);
 }
 
@@ -896,7 +896,7 @@
pcount = pending_queue_level(pqueue, DEFAULT_CMD_QLEN);
 
/* Ensure pcount isn't read before data lands */
-   rte_atomic_thread_fence(__ATOMIC_ACQUIRE);
+   rte_atomic_thread_fence(rte_memory_order_acquire);
 
count = (nb_ops > pcount) ? pcount : nb_ops;
 
-- 
1.8.3.1



[PATCH v4 28/45] common/mlx5: use rte stdatomic API

2024-04-19 Thread Tyler Retzlaff
Replace the use of gcc builtin __atomic_xxx intrinsics with
corresponding rte_atomic_xxx optional rte stdatomic API.

Signed-off-by: Tyler Retzlaff 
Acked-by: Stephen Hemminger 
---
 drivers/common/mlx5/linux/mlx5_nl.c |  5 +--
 drivers/common/mlx5/mlx5_common.h   |  2 +-
 drivers/common/mlx5/mlx5_common_mr.c| 16 -
 drivers/common/mlx5/mlx5_common_mr.h|  2 +-
 drivers/common/mlx5/mlx5_common_utils.c | 32 +-
 drivers/common/mlx5/mlx5_common_utils.h |  6 ++--
 drivers/common/mlx5/mlx5_malloc.c   | 58 -
 7 files changed, 61 insertions(+), 60 deletions(-)

diff --git a/drivers/common/mlx5/linux/mlx5_nl.c 
b/drivers/common/mlx5/linux/mlx5_nl.c
index 61192eb..a5ac4dc 100644
--- a/drivers/common/mlx5/linux/mlx5_nl.c
+++ b/drivers/common/mlx5/linux/mlx5_nl.c
@@ -175,10 +175,11 @@ struct mlx5_nl_port_info {
uint16_t state; /**< IB device port state (out). */
 };
 
-uint32_t atomic_sn;
+RTE_ATOMIC(uint32_t) atomic_sn;
 
 /* Generate Netlink sequence number. */
-#define MLX5_NL_SN_GENERATE (__atomic_fetch_add(&atomic_sn, 1, 
__ATOMIC_RELAXED) + 1)
+#define MLX5_NL_SN_GENERATE (rte_atomic_fetch_add_explicit(&atomic_sn, 1, \
+   rte_memory_order_relaxed) + 1)
 
 /**
  * Opens a Netlink socket.
diff --git a/drivers/common/mlx5/mlx5_common.h 
b/drivers/common/mlx5/mlx5_common.h
index 9c80277..14c70ed 100644
--- a/drivers/common/mlx5/mlx5_common.h
+++ b/drivers/common/mlx5/mlx5_common.h
@@ -195,7 +195,7 @@ enum mlx5_cqe_status {
/* Prevent speculative reading of other fields in CQE until
 * CQE is valid.
 */
-   rte_atomic_thread_fence(__ATOMIC_ACQUIRE);
+   rte_atomic_thread_fence(rte_memory_order_acquire);
 
if (unlikely(op_code == MLX5_CQE_RESP_ERR ||
 op_code == MLX5_CQE_REQ_ERR))
diff --git a/drivers/common/mlx5/mlx5_common_mr.c 
b/drivers/common/mlx5/mlx5_common_mr.c
index 85ec10d..50922ad 100644
--- a/drivers/common/mlx5/mlx5_common_mr.c
+++ b/drivers/common/mlx5/mlx5_common_mr.c
@@ -35,7 +35,7 @@ struct mlx5_range {
 /** Memory region for a mempool. */
 struct mlx5_mempool_mr {
struct mlx5_pmd_mr pmd_mr;
-   uint32_t refcnt; /**< Number of mempools sharing this MR. */
+   RTE_ATOMIC(uint32_t) refcnt; /**< Number of mempools sharing this MR. */
 };
 
 /* Mempool registration. */
@@ -56,11 +56,11 @@ struct mlx5_mempool_reg {
 {
struct mlx5_mprq_buf *buf = opaque;
 
-   if (__atomic_load_n(&buf->refcnt, __ATOMIC_RELAXED) == 1) {
+   if (rte_atomic_load_explicit(&buf->refcnt, rte_memory_order_relaxed) == 
1) {
rte_mempool_put(buf->mp, buf);
-   } else if (unlikely(__atomic_fetch_sub(&buf->refcnt, 1,
-  __ATOMIC_RELAXED) - 1 == 0)) {
-   __atomic_store_n(&buf->refcnt, 1, __ATOMIC_RELAXED);
+   } else if (unlikely(rte_atomic_fetch_sub_explicit(&buf->refcnt, 1,
+  rte_memory_order_relaxed) - 1 == 
0)) {
+   rte_atomic_store_explicit(&buf->refcnt, 1, 
rte_memory_order_relaxed);
rte_mempool_put(buf->mp, buf);
}
 }
@@ -1650,7 +1650,7 @@ struct mlx5_mempool_get_extmem_data {
unsigned int i;
 
for (i = 0; i < mpr->mrs_n; i++)
-   __atomic_fetch_add(&mpr->mrs[i].refcnt, 1, __ATOMIC_RELAXED);
+   rte_atomic_fetch_add_explicit(&mpr->mrs[i].refcnt, 1, 
rte_memory_order_relaxed);
 }
 
 /**
@@ -1665,8 +1665,8 @@ struct mlx5_mempool_get_extmem_data {
bool ret = false;
 
for (i = 0; i < mpr->mrs_n; i++)
-   ret |= __atomic_fetch_sub(&mpr->mrs[i].refcnt, 1,
- __ATOMIC_RELAXED) - 1 == 0;
+   ret |= rte_atomic_fetch_sub_explicit(&mpr->mrs[i].refcnt, 1,
+ rte_memory_order_relaxed) - 1 == 0;
return ret;
 }
 
diff --git a/drivers/common/mlx5/mlx5_common_mr.h 
b/drivers/common/mlx5/mlx5_common_mr.h
index aa10b68..a7f1042 100644
--- a/drivers/common/mlx5/mlx5_common_mr.h
+++ b/drivers/common/mlx5/mlx5_common_mr.h
@@ -93,7 +93,7 @@ struct mlx5_mr_share_cache {
 /* Multi-Packet RQ buffer header. */
 struct __rte_cache_aligned mlx5_mprq_buf {
struct rte_mempool *mp;
-   uint16_t refcnt; /* Atomically accessed refcnt. */
+   RTE_ATOMIC(uint16_t) refcnt; /* Atomically accessed refcnt. */
struct rte_mbuf_ext_shared_info shinfos[];
/*
 * Shared information per stride.
diff --git a/drivers/common/mlx5/mlx5_common_utils.c 
b/drivers/common/mlx5/mlx5_common_utils.c
index e69d068..4b95d35 100644
--- a/drivers/common/mlx5/mlx5_common_utils.c
+++ b/drivers/common/mlx5/mlx5_common_utils.c
@@ -81,14 +81,14 @@ struct mlx5_list *
while (entry != NULL) {
if (l_const->cb_match(l_const->ctx, entry, ctx) == 0) {
if (reuse) {
-   ret = __atomic_fetch_

[PATCH v4 29/45] common/idpf: use rte stdatomic API

2024-04-19 Thread Tyler Retzlaff
Replace the use of gcc builtin __atomic_xxx intrinsics with
corresponding rte_atomic_xxx optional rte stdatomic API.

Signed-off-by: Tyler Retzlaff 
Acked-by: Stephen Hemminger 
---
 drivers/common/idpf/idpf_common_device.h  |  6 +++---
 drivers/common/idpf/idpf_common_rxtx.c| 14 --
 drivers/common/idpf/idpf_common_rxtx.h|  2 +-
 drivers/common/idpf/idpf_common_rxtx_avx512.c | 16 
 4 files changed, 20 insertions(+), 18 deletions(-)

diff --git a/drivers/common/idpf/idpf_common_device.h 
b/drivers/common/idpf/idpf_common_device.h
index 3834c1f..bfa927a 100644
--- a/drivers/common/idpf/idpf_common_device.h
+++ b/drivers/common/idpf/idpf_common_device.h
@@ -48,7 +48,7 @@ struct idpf_adapter {
struct idpf_hw hw;
struct virtchnl2_version_info virtchnl_version;
struct virtchnl2_get_capabilities caps;
-   volatile uint32_t pend_cmd; /* pending command not finished */
+   volatile RTE_ATOMIC(uint32_t) pend_cmd; /* pending command not finished 
*/
uint32_t cmd_retval; /* return value of the cmd response from cp */
uint8_t *mbx_resp; /* buffer to store the mailbox response from cp */
 
@@ -179,8 +179,8 @@ struct idpf_cmd_info {
 atomic_set_cmd(struct idpf_adapter *adapter, uint32_t ops)
 {
uint32_t op_unk = VIRTCHNL2_OP_UNKNOWN;
-   bool ret = __atomic_compare_exchange(&adapter->pend_cmd, &op_unk, &ops,
-   0, __ATOMIC_ACQUIRE, 
__ATOMIC_ACQUIRE);
+   bool ret = 
rte_atomic_compare_exchange_strong_explicit(&adapter->pend_cmd, &op_unk, ops,
+   rte_memory_order_acquire, 
rte_memory_order_acquire);
 
if (!ret)
DRV_LOG(ERR, "There is incomplete cmd %d", adapter->pend_cmd);
diff --git a/drivers/common/idpf/idpf_common_rxtx.c 
b/drivers/common/idpf/idpf_common_rxtx.c
index 83b131e..b09c58c 100644
--- a/drivers/common/idpf/idpf_common_rxtx.c
+++ b/drivers/common/idpf/idpf_common_rxtx.c
@@ -592,8 +592,8 @@
next_avail = 0;
rx_bufq->nb_rx_hold -= delta;
} else {
-   __atomic_fetch_add(&rx_bufq->rx_stats.mbuf_alloc_failed,
-  nb_desc - next_avail, 
__ATOMIC_RELAXED);
+   
rte_atomic_fetch_add_explicit(&rx_bufq->rx_stats.mbuf_alloc_failed,
+  nb_desc - next_avail, 
rte_memory_order_relaxed);
RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u 
queue_id=%u",
   rx_bufq->port_id, rx_bufq->queue_id);
return;
@@ -612,8 +612,8 @@
next_avail += nb_refill;
rx_bufq->nb_rx_hold -= nb_refill;
} else {
-   __atomic_fetch_add(&rx_bufq->rx_stats.mbuf_alloc_failed,
-  nb_desc - next_avail, 
__ATOMIC_RELAXED);
+   
rte_atomic_fetch_add_explicit(&rx_bufq->rx_stats.mbuf_alloc_failed,
+  nb_desc - next_avail, 
rte_memory_order_relaxed);
RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u 
queue_id=%u",
   rx_bufq->port_id, rx_bufq->queue_id);
}
@@ -1093,7 +1093,8 @@
 
nmb = rte_mbuf_raw_alloc(rxq->mp);
if (unlikely(nmb == NULL)) {
-   __atomic_fetch_add(&rxq->rx_stats.mbuf_alloc_failed, 1, 
__ATOMIC_RELAXED);
+   
rte_atomic_fetch_add_explicit(&rxq->rx_stats.mbuf_alloc_failed, 1,
+   rte_memory_order_relaxed);
RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
   "queue_id=%u", rxq->port_id, rxq->queue_id);
break;
@@ -1203,7 +1204,8 @@
 
nmb = rte_mbuf_raw_alloc(rxq->mp);
if (unlikely(!nmb)) {
-   __atomic_fetch_add(&rxq->rx_stats.mbuf_alloc_failed, 1, 
__ATOMIC_RELAXED);
+   
rte_atomic_fetch_add_explicit(&rxq->rx_stats.mbuf_alloc_failed, 1,
+   rte_memory_order_relaxed);
RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
   "queue_id=%u", rxq->port_id, rxq->queue_id);
break;
diff --git a/drivers/common/idpf/idpf_common_rxtx.h 
b/drivers/common/idpf/idpf_common_rxtx.h
index b49b1ed..ed1 100644
--- a/drivers/common/idpf/idpf_common_rxtx.h
+++ b/drivers/common/idpf/idpf_common_rxtx.h
@@ -97,7 +97,7 @@
 #define IDPF_RX_SPLIT_BUFQ2_ID 2
 
 struct idpf_rx_stats {
-   uint64_t mbuf_alloc_failed;
+   RTE_ATOMIC(uint64_t) mbuf_alloc_failed;
 };
 
 struct idpf_rx_queue {
diff --git a/drivers/common/idpf/idpf_common_rxtx_avx512.c 
b/drivers/common/idpf/idpf_common_rxtx_avx512.c
index f65e8d5..3b5e124 100

[PATCH v4 30/45] common/iavf: use rte stdatomic API

2024-04-19 Thread Tyler Retzlaff
Replace the use of gcc builtin __atomic_xxx intrinsics with
corresponding rte_atomic_xxx optional rte stdatomic API.

Signed-off-by: Tyler Retzlaff 
Acked-by: Stephen Hemminger 
---
 drivers/common/iavf/iavf_impl.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/common/iavf/iavf_impl.c b/drivers/common/iavf/iavf_impl.c
index 8919b0e..c0ff301 100644
--- a/drivers/common/iavf/iavf_impl.c
+++ b/drivers/common/iavf/iavf_impl.c
@@ -18,7 +18,7 @@ enum iavf_status
u64 size,
u32 alignment)
 {
-   static uint64_t iavf_dma_memzone_id;
+   static RTE_ATOMIC(uint64_t) iavf_dma_memzone_id;
const struct rte_memzone *mz = NULL;
char z_name[RTE_MEMZONE_NAMESIZE];
 
@@ -26,7 +26,7 @@ enum iavf_status
return IAVF_ERR_PARAM;
 
snprintf(z_name, sizeof(z_name), "iavf_dma_%" PRIu64,
-   __atomic_fetch_add(&iavf_dma_memzone_id, 1, __ATOMIC_RELAXED));
+   rte_atomic_fetch_add_explicit(&iavf_dma_memzone_id, 1, 
rte_memory_order_relaxed));
mz = rte_memzone_reserve_bounded(z_name, size, SOCKET_ID_ANY,
 RTE_MEMZONE_IOVA_CONTIG, alignment,
 RTE_PGSIZE_2M);
-- 
1.8.3.1



[PATCH v4 31/45] baseband/acc: use rte stdatomic API

2024-04-19 Thread Tyler Retzlaff
Replace the use of gcc builtin __atomic_xxx intrinsics with
corresponding rte_atomic_xxx optional rte stdatomic API.

Signed-off-by: Tyler Retzlaff 
Acked-by: Stephen Hemminger 
---
 drivers/baseband/acc/rte_acc100_pmd.c | 36 +--
 drivers/baseband/acc/rte_vrb_pmd.c| 46 +++
 2 files changed, 48 insertions(+), 34 deletions(-)

diff --git a/drivers/baseband/acc/rte_acc100_pmd.c 
b/drivers/baseband/acc/rte_acc100_pmd.c
index 4f666e5..ee50b9c 100644
--- a/drivers/baseband/acc/rte_acc100_pmd.c
+++ b/drivers/baseband/acc/rte_acc100_pmd.c
@@ -3673,8 +3673,8 @@
 
desc_idx = acc_desc_idx_tail(q, *dequeued_descs);
desc = q->ring_addr + desc_idx;
-   atom_desc.atom_hdr = __atomic_load_n((uint64_t *)desc,
-   __ATOMIC_RELAXED);
+   atom_desc.atom_hdr = rte_atomic_load_explicit((uint64_t __rte_atomic 
*)desc,
+   rte_memory_order_relaxed);
 
/* Check fdone bit */
if (!(atom_desc.rsp.val & ACC_FDONE))
@@ -3728,8 +3728,8 @@
uint16_t current_dequeued_descs = 0, descs_in_tb;
 
desc = acc_desc_tail(q, *dequeued_descs);
-   atom_desc.atom_hdr = __atomic_load_n((uint64_t *)desc,
-   __ATOMIC_RELAXED);
+   atom_desc.atom_hdr = rte_atomic_load_explicit((uint64_t __rte_atomic 
*)desc,
+   rte_memory_order_relaxed);
 
/* Check fdone bit */
if (!(atom_desc.rsp.val & ACC_FDONE))
@@ -3742,8 +3742,8 @@
/* Check if last CB in TB is ready to dequeue (and thus
 * the whole TB) - checking sdone bit. If not return.
 */
-   atom_desc.atom_hdr = __atomic_load_n((uint64_t *)last_desc,
-   __ATOMIC_RELAXED);
+   atom_desc.atom_hdr = rte_atomic_load_explicit((uint64_t __rte_atomic 
*)last_desc,
+   rte_memory_order_relaxed);
if (!(atom_desc.rsp.val & ACC_SDONE))
return -1;
 
@@ -3755,8 +3755,8 @@
 
while (i < descs_in_tb) {
desc = acc_desc_tail(q, *dequeued_descs);
-   atom_desc.atom_hdr = __atomic_load_n((uint64_t *)desc,
-   __ATOMIC_RELAXED);
+   atom_desc.atom_hdr = rte_atomic_load_explicit((uint64_t 
__rte_atomic *)desc,
+   rte_memory_order_relaxed);
rsp.val = atom_desc.rsp.val;
rte_bbdev_log_debug("Resp. desc %p: %x descs %d cbs %d\n",
desc, rsp.val, descs_in_tb, desc->req.numCBs);
@@ -3793,8 +3793,8 @@
struct rte_bbdev_dec_op *op;
 
desc = acc_desc_tail(q, dequeued_cbs);
-   atom_desc.atom_hdr = __atomic_load_n((uint64_t *)desc,
-   __ATOMIC_RELAXED);
+   atom_desc.atom_hdr = rte_atomic_load_explicit((uint64_t __rte_atomic 
*)desc,
+   rte_memory_order_relaxed);
 
/* Check fdone bit */
if (!(atom_desc.rsp.val & ACC_FDONE))
@@ -3846,8 +3846,8 @@
struct rte_bbdev_dec_op *op;
 
desc = acc_desc_tail(q, dequeued_cbs);
-   atom_desc.atom_hdr = __atomic_load_n((uint64_t *)desc,
-   __ATOMIC_RELAXED);
+   atom_desc.atom_hdr = rte_atomic_load_explicit((uint64_t __rte_atomic 
*)desc,
+   rte_memory_order_relaxed);
 
/* Check fdone bit */
if (!(atom_desc.rsp.val & ACC_FDONE))
@@ -3902,8 +3902,8 @@
uint8_t cbs_in_tb = 1, cb_idx = 0;
 
desc = acc_desc_tail(q, dequeued_cbs);
-   atom_desc.atom_hdr = __atomic_load_n((uint64_t *)desc,
-   __ATOMIC_RELAXED);
+   atom_desc.atom_hdr = rte_atomic_load_explicit((uint64_t __rte_atomic 
*)desc,
+   rte_memory_order_relaxed);
 
/* Check fdone bit */
if (!(atom_desc.rsp.val & ACC_FDONE))
@@ -3919,8 +3919,8 @@
/* Check if last CB in TB is ready to dequeue (and thus
 * the whole TB) - checking sdone bit. If not return.
 */
-   atom_desc.atom_hdr = __atomic_load_n((uint64_t *)last_desc,
-   __ATOMIC_RELAXED);
+   atom_desc.atom_hdr = rte_atomic_load_explicit((uint64_t __rte_atomic 
*)last_desc,
+   rte_memory_order_relaxed);
if (!(atom_desc.rsp.val & ACC_SDONE))
return -1;
 
@@ -3930,8 +3930,8 @@
/* Read remaining CBs if exists */
while (cb_idx < cbs_in_tb) {
desc = acc_desc_tail(q, dequeued_cbs);
-   atom_desc.atom_hdr = __atomic_load_n((uint64_t *)desc,
-   __ATOMIC_RELAXED);
+   atom_desc.atom_hdr = rte_atomic_load_explicit((uint64_t 
__rte_atomic *)desc,
+   rte_memory_order_relaxed);
rsp.val = atom_desc.rsp.val;
rte_bbdev_log_debug("Resp. desc %p: %x r %d c %d\n",
desc, rsp.val, cb_idx, 
cbs_in_tb);
diff --git a/driv

[PATCH v4 32/45] net/txgbe: use rte stdatomic API

2024-04-19 Thread Tyler Retzlaff
Replace the use of gcc builtin __atomic_xxx intrinsics with
corresponding rte_atomic_xxx optional rte stdatomic API.

Signed-off-by: Tyler Retzlaff 
Acked-by: Stephen Hemminger 
---
 drivers/net/txgbe/txgbe_ethdev.c| 12 +++-
 drivers/net/txgbe/txgbe_ethdev.h|  2 +-
 drivers/net/txgbe/txgbe_ethdev_vf.c |  2 +-
 3 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/drivers/net/txgbe/txgbe_ethdev.c b/drivers/net/txgbe/txgbe_ethdev.c
index b75e889..a58f197 100644
--- a/drivers/net/txgbe/txgbe_ethdev.c
+++ b/drivers/net/txgbe/txgbe_ethdev.c
@@ -595,7 +595,7 @@ static int txgbe_dev_interrupt_action(struct rte_eth_dev 
*dev,
return 0;
}
 
-   __atomic_clear(&ad->link_thread_running, __ATOMIC_SEQ_CST);
+   rte_atomic_store_explicit(&ad->link_thread_running, 0, 
rte_memory_order_seq_cst);
rte_eth_copy_pci_info(eth_dev, pci_dev);
 
hw->hw_addr = (void *)pci_dev->mem_resource[0].addr;
@@ -2834,7 +2834,7 @@ static int txgbe_dev_xstats_get_names_by_id(struct 
rte_eth_dev *dev,
struct txgbe_adapter *ad = TXGBE_DEV_ADAPTER(dev);
uint32_t timeout = timeout_ms ? timeout_ms : WARNING_TIMEOUT;
 
-   while (__atomic_load_n(&ad->link_thread_running, __ATOMIC_SEQ_CST)) {
+   while (rte_atomic_load_explicit(&ad->link_thread_running, 
rte_memory_order_seq_cst)) {
msec_delay(1);
timeout--;
 
@@ -2859,7 +2859,7 @@ static int txgbe_dev_xstats_get_names_by_id(struct 
rte_eth_dev *dev,
 
rte_thread_detach(rte_thread_self());
txgbe_dev_setup_link_alarm_handler(dev);
-   __atomic_clear(&ad->link_thread_running, __ATOMIC_SEQ_CST);
+   rte_atomic_store_explicit(&ad->link_thread_running, 0, 
rte_memory_order_seq_cst);
return 0;
 }
 
@@ -2908,7 +2908,8 @@ static int txgbe_dev_xstats_get_names_by_id(struct 
rte_eth_dev *dev,
} else if (hw->phy.media_type == txgbe_media_type_fiber &&
dev->data->dev_conf.intr_conf.lsc != 0) {
txgbe_dev_wait_setup_link_complete(dev, 0);
-   if (!__atomic_test_and_set(&ad->link_thread_running, 
__ATOMIC_SEQ_CST)) {
+   if 
(!rte_atomic_exchange_explicit(&ad->link_thread_running, 1,
+   rte_memory_order_seq_cst)) {
/* To avoid race condition between threads, set
 * the TXGBE_FLAG_NEED_LINK_CONFIG flag only
 * when there is no link thread running.
@@ -2918,7 +2919,8 @@ static int txgbe_dev_xstats_get_names_by_id(struct 
rte_eth_dev *dev,
"txgbe-link",

txgbe_dev_setup_link_thread_handler, dev) < 0) {
PMD_DRV_LOG(ERR, "Create link thread 
failed!");
-   
__atomic_clear(&ad->link_thread_running, __ATOMIC_SEQ_CST);
+   
rte_atomic_store_explicit(&ad->link_thread_running, 0,
+   rte_memory_order_seq_cst);
}
} else {
PMD_DRV_LOG(ERR,
diff --git a/drivers/net/txgbe/txgbe_ethdev.h b/drivers/net/txgbe/txgbe_ethdev.h
index 7e8067c..e8f55f7 100644
--- a/drivers/net/txgbe/txgbe_ethdev.h
+++ b/drivers/net/txgbe/txgbe_ethdev.h
@@ -372,7 +372,7 @@ struct txgbe_adapter {
/* For RSS reta table update */
uint8_t rss_reta_updated;
 
-   uint32_t link_thread_running;
+   RTE_ATOMIC(uint32_t) link_thread_running;
rte_thread_t link_thread_tid;
 };
 
diff --git a/drivers/net/txgbe/txgbe_ethdev_vf.c 
b/drivers/net/txgbe/txgbe_ethdev_vf.c
index f1341fb..1abc190 100644
--- a/drivers/net/txgbe/txgbe_ethdev_vf.c
+++ b/drivers/net/txgbe/txgbe_ethdev_vf.c
@@ -206,7 +206,7 @@ static int txgbevf_dev_link_update(struct rte_eth_dev *dev,
return 0;
}
 
-   __atomic_clear(&ad->link_thread_running, __ATOMIC_SEQ_CST);
+   rte_atomic_store_explicit(&ad->link_thread_running, 0, 
rte_memory_order_seq_cst);
rte_eth_copy_pci_info(eth_dev, pci_dev);
 
hw->device_id = pci_dev->id.device_id;
-- 
1.8.3.1



[PATCH v4 33/45] net/null: use rte stdatomic API

2024-04-19 Thread Tyler Retzlaff
Replace the use of gcc builtin __atomic_xxx intrinsics with
corresponding rte_atomic_xxx optional rte stdatomic API.

Signed-off-by: Tyler Retzlaff 
Acked-by: Stephen Hemminger 
---
 drivers/net/null/rte_eth_null.c | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/net/null/rte_eth_null.c b/drivers/net/null/rte_eth_null.c
index 7c46004..f4ed3b8 100644
--- a/drivers/net/null/rte_eth_null.c
+++ b/drivers/net/null/rte_eth_null.c
@@ -37,8 +37,8 @@ struct null_queue {
struct rte_mempool *mb_pool;
struct rte_mbuf *dummy_packet;
 
-   uint64_t rx_pkts;
-   uint64_t tx_pkts;
+   RTE_ATOMIC(uint64_t) rx_pkts;
+   RTE_ATOMIC(uint64_t) tx_pkts;
 };
 
 struct pmd_options {
@@ -102,7 +102,7 @@ struct pmd_internals {
}
 
/* NOTE: review for potential ordering optimization */
-   __atomic_fetch_add(&h->rx_pkts, i, __ATOMIC_SEQ_CST);
+   rte_atomic_fetch_add_explicit(&h->rx_pkts, i, rte_memory_order_seq_cst);
 
return i;
 }
@@ -130,7 +130,7 @@ struct pmd_internals {
}
 
/* NOTE: review for potential ordering optimization */
-   __atomic_fetch_add(&h->rx_pkts, i, __ATOMIC_SEQ_CST);
+   rte_atomic_fetch_add_explicit(&h->rx_pkts, i, rte_memory_order_seq_cst);
 
return i;
 }
@@ -155,7 +155,7 @@ struct pmd_internals {
rte_pktmbuf_free(bufs[i]);
 
/* NOTE: review for potential ordering optimization */
-   __atomic_fetch_add(&h->tx_pkts, i, __ATOMIC_SEQ_CST);
+   rte_atomic_fetch_add_explicit(&h->tx_pkts, i, rte_memory_order_seq_cst);
 
return i;
 }
@@ -178,7 +178,7 @@ struct pmd_internals {
}
 
/* NOTE: review for potential ordering optimization */
-   __atomic_fetch_add(&h->tx_pkts, i, __ATOMIC_SEQ_CST);
+   rte_atomic_fetch_add_explicit(&h->tx_pkts, i, rte_memory_order_seq_cst);
 
return i;
 }
-- 
1.8.3.1



[PATCH v4 34/45] event/dlb2: use rte stdatomic API

2024-04-19 Thread Tyler Retzlaff
Replace the use of gcc builtin __atomic_xxx intrinsics with
corresponding rte_atomic_xxx optional rte stdatomic API.

Signed-off-by: Tyler Retzlaff 
Acked-by: Stephen Hemminger 
---
 drivers/event/dlb2/dlb2.c| 34 +-
 drivers/event/dlb2/dlb2_priv.h   | 13 +
 drivers/event/dlb2/dlb2_xstats.c |  2 +-
 3 files changed, 23 insertions(+), 26 deletions(-)

diff --git a/drivers/event/dlb2/dlb2.c b/drivers/event/dlb2/dlb2.c
index 628ddef..0b91f03 100644
--- a/drivers/event/dlb2/dlb2.c
+++ b/drivers/event/dlb2/dlb2.c
@@ -1005,7 +1005,7 @@ struct process_local_port_data
}
 
dlb2->new_event_limit = config->nb_events_limit;
-   __atomic_store_n(&dlb2->inflights, 0, __ATOMIC_SEQ_CST);
+   rte_atomic_store_explicit(&dlb2->inflights, 0, 
rte_memory_order_seq_cst);
 
/* Save number of ports/queues for this event dev */
dlb2->num_ports = config->nb_event_ports;
@@ -2668,10 +2668,10 @@ static int dlb2_num_dir_queues_setup(struct 
dlb2_eventdev *dlb2)
batch_size = credits;
 
if (likely(credits &&
-  __atomic_compare_exchange_n(
+  rte_atomic_compare_exchange_strong_explicit(
qm_port->credit_pool[type],
-   &credits, credits - batch_size, false,
-   __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)))
+   &credits, credits - batch_size,
+   rte_memory_order_seq_cst, rte_memory_order_seq_cst)))
return batch_size;
else
return 0;
@@ -2687,7 +2687,7 @@ static int dlb2_num_dir_queues_setup(struct dlb2_eventdev 
*dlb2)
/* Replenish credits, saving one quanta for enqueues */
uint16_t val = ev_port->inflight_credits - quanta;
 
-   __atomic_fetch_sub(&dlb2->inflights, val, __ATOMIC_SEQ_CST);
+   rte_atomic_fetch_sub_explicit(&dlb2->inflights, val, 
rte_memory_order_seq_cst);
ev_port->inflight_credits -= val;
}
 }
@@ -2696,8 +2696,8 @@ static int dlb2_num_dir_queues_setup(struct dlb2_eventdev 
*dlb2)
 dlb2_check_enqueue_sw_credits(struct dlb2_eventdev *dlb2,
  struct dlb2_eventdev_port *ev_port)
 {
-   uint32_t sw_inflights = __atomic_load_n(&dlb2->inflights,
-   __ATOMIC_SEQ_CST);
+   uint32_t sw_inflights = rte_atomic_load_explicit(&dlb2->inflights,
+   rte_memory_order_seq_cst);
const int num = 1;
 
if (unlikely(ev_port->inflight_max < sw_inflights)) {
@@ -2719,8 +2719,8 @@ static int dlb2_num_dir_queues_setup(struct dlb2_eventdev 
*dlb2)
return 1;
}
 
-   __atomic_fetch_add(&dlb2->inflights, credit_update_quanta,
-  __ATOMIC_SEQ_CST);
+   rte_atomic_fetch_add_explicit(&dlb2->inflights, 
credit_update_quanta,
+  rte_memory_order_seq_cst);
ev_port->inflight_credits += (credit_update_quanta);
 
if (ev_port->inflight_credits < num) {
@@ -3234,17 +3234,17 @@ static int dlb2_num_dir_queues_setup(struct 
dlb2_eventdev *dlb2)
if (qm_port->dlb2->version == DLB2_HW_V2) {
qm_port->cached_ldb_credits += num;
if (qm_port->cached_ldb_credits >= 2 * batch_size) {
-   __atomic_fetch_add(
+   rte_atomic_fetch_add_explicit(
qm_port->credit_pool[DLB2_LDB_QUEUE],
-   batch_size, __ATOMIC_SEQ_CST);
+   batch_size, rte_memory_order_seq_cst);
qm_port->cached_ldb_credits -= batch_size;
}
} else {
qm_port->cached_credits += num;
if (qm_port->cached_credits >= 2 * batch_size) {
-   __atomic_fetch_add(
+   rte_atomic_fetch_add_explicit(
  qm_port->credit_pool[DLB2_COMBINED_POOL],
- batch_size, __ATOMIC_SEQ_CST);
+ batch_size, rte_memory_order_seq_cst);
qm_port->cached_credits -= batch_size;
}
}
@@ -3252,17 +3252,17 @@ static int dlb2_num_dir_queues_setup(struct 
dlb2_eventdev *dlb2)
if (qm_port->dlb2->version == DLB2_HW_V2) {
qm_port->cached_dir_credits += num;
if (qm_port->cached_dir_credits >= 2 * batch_size) {
-   __atomic_fetch_add(
+   rte_atomic_fetch_add_explicit(
qm_port->

[PATCH v4 35/45] dma/idxd: use rte stdatomic API

2024-04-19 Thread Tyler Retzlaff
Replace the use of gcc builtin __atomic_xxx intrinsics with
corresponding rte_atomic_xxx optional rte stdatomic API.

Signed-off-by: Tyler Retzlaff 
Acked-by: Stephen Hemminger 
---
 drivers/dma/idxd/idxd_internal.h | 2 +-
 drivers/dma/idxd/idxd_pci.c  | 9 +
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/dma/idxd/idxd_internal.h b/drivers/dma/idxd/idxd_internal.h
index cd41777..537cf9b 100644
--- a/drivers/dma/idxd/idxd_internal.h
+++ b/drivers/dma/idxd/idxd_internal.h
@@ -33,7 +33,7 @@ struct idxd_pci_common {
rte_spinlock_t lk;
 
uint8_t wq_cfg_sz;
-   uint16_t ref_count;
+   RTE_ATOMIC(uint16_t) ref_count;
volatile struct rte_idxd_bar0 *regs;
volatile uint32_t *wq_regs_base;
volatile struct rte_idxd_grpcfg *grp_regs;
diff --git a/drivers/dma/idxd/idxd_pci.c b/drivers/dma/idxd/idxd_pci.c
index a78889a..06fa115 100644
--- a/drivers/dma/idxd/idxd_pci.c
+++ b/drivers/dma/idxd/idxd_pci.c
@@ -136,7 +136,8 @@
 * the PCI struct
 */
/* NOTE: review for potential ordering optimization */
-   is_last_wq = (__atomic_fetch_sub(&idxd->u.pci->ref_count, 1, 
__ATOMIC_SEQ_CST) == 1);
+   is_last_wq = (rte_atomic_fetch_sub_explicit(&idxd->u.pci->ref_count, 1,
+   rte_memory_order_seq_cst) == 1);
if (is_last_wq) {
/* disable the device */
err_code = idxd_pci_dev_command(idxd, idxd_disable_dev);
@@ -330,9 +331,9 @@
return ret;
}
qid = rte_dma_get_dev_id_by_name(qname);
-   max_qid = __atomic_load_n(
+   max_qid = rte_atomic_load_explicit(
&((struct idxd_dmadev 
*)rte_dma_fp_objs[qid].dev_private)->u.pci->ref_count,
-   __ATOMIC_SEQ_CST);
+   rte_memory_order_seq_cst);
 
/* we have queue 0 done, now configure the rest of the queues */
for (qid = 1; qid < max_qid; qid++) {
@@ -389,7 +390,7 @@
free(idxd.u.pci);
return ret;
}
-   __atomic_fetch_add(&idxd.u.pci->ref_count, 1, __ATOMIC_SEQ_CST);
+   rte_atomic_fetch_add_explicit(&idxd.u.pci->ref_count, 1, 
rte_memory_order_seq_cst);
}
 
return 0;
-- 
1.8.3.1



[PATCH v4 38/45] bus/vmbus: use rte stdatomic API

2024-04-19 Thread Tyler Retzlaff
Replace the use of gcc builtin __atomic_xxx intrinsics with
corresponding rte_atomic_xxx optional rte stdatomic API.

Signed-off-by: Tyler Retzlaff 
Acked-by: Stephen Hemminger 
---
 drivers/bus/vmbus/rte_vmbus_reg.h | 2 +-
 drivers/bus/vmbus/vmbus_channel.c | 8 
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/bus/vmbus/rte_vmbus_reg.h 
b/drivers/bus/vmbus/rte_vmbus_reg.h
index a17ce40..e3299aa 100644
--- a/drivers/bus/vmbus/rte_vmbus_reg.h
+++ b/drivers/bus/vmbus/rte_vmbus_reg.h
@@ -28,7 +28,7 @@ struct vmbus_message {
  */
 
 struct vmbus_mon_trig {
-   uint32_tpending;
+   RTE_ATOMIC(uint32_t)pending;
uint32_tarmed;
 } __rte_packed;
 
diff --git a/drivers/bus/vmbus/vmbus_channel.c 
b/drivers/bus/vmbus/vmbus_channel.c
index 4d74df3..925c2aa 100644
--- a/drivers/bus/vmbus/vmbus_channel.c
+++ b/drivers/bus/vmbus/vmbus_channel.c
@@ -19,16 +19,16 @@
 #include "private.h"
 
 static inline void
-vmbus_sync_set_bit(volatile uint32_t *addr, uint32_t mask)
+vmbus_sync_set_bit(volatile RTE_ATOMIC(uint32_t) *addr, uint32_t mask)
 {
-   /* Use GCC builtin which atomic does atomic OR operation */
-   __atomic_fetch_or(addr, mask, __ATOMIC_SEQ_CST);
+   rte_atomic_fetch_or_explicit(addr, mask, rte_memory_order_seq_cst);
 }
 
 static inline void
 vmbus_set_monitor(const struct vmbus_channel *channel, uint32_t monitor_id)
 {
-   uint32_t *monitor_addr, monitor_mask;
+   RTE_ATOMIC(uint32_t) *monitor_addr;
+   uint32_t monitor_mask;
unsigned int trigger_index;
 
trigger_index = monitor_id / HV_MON_TRIG_LEN;
-- 
1.8.3.1



[PATCH v4 36/45] crypto/ccp: use rte stdatomic API

2024-04-19 Thread Tyler Retzlaff
Replace the use of gcc builtin __atomic_xxx intrinsics with
corresponding rte_atomic_xxx optional rte stdatomic API.

Signed-off-by: Tyler Retzlaff 
Acked-by: Stephen Hemminger 
---
 drivers/crypto/ccp/ccp_dev.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/crypto/ccp/ccp_dev.c b/drivers/crypto/ccp/ccp_dev.c
index b7ca3af..41c1422 100644
--- a/drivers/crypto/ccp/ccp_dev.c
+++ b/drivers/crypto/ccp/ccp_dev.c
@@ -116,15 +116,15 @@ struct ccp_queue *
 static inline void
 ccp_set_bit(unsigned long *bitmap, int n)
 {
-   __atomic_fetch_or(&bitmap[WORD_OFFSET(n)], (1UL << BIT_OFFSET(n)),
-   __ATOMIC_SEQ_CST);
+   rte_atomic_fetch_or_explicit((unsigned long __rte_atomic 
*)&bitmap[WORD_OFFSET(n)],
+   (1UL << BIT_OFFSET(n)), rte_memory_order_seq_cst);
 }
 
 static inline void
 ccp_clear_bit(unsigned long *bitmap, int n)
 {
-   __atomic_fetch_and(&bitmap[WORD_OFFSET(n)], ~(1UL << BIT_OFFSET(n)),
-   __ATOMIC_SEQ_CST);
+   rte_atomic_fetch_and_explicit((unsigned long __rte_atomic 
*)&bitmap[WORD_OFFSET(n)],
+   ~(1UL << BIT_OFFSET(n)), rte_memory_order_seq_cst);
 }
 
 static inline uint32_t
-- 
1.8.3.1



[PATCH v4 37/45] common/cpt: use rte stdatomic API

2024-04-19 Thread Tyler Retzlaff
Replace the use of gcc builtin __atomic_xxx intrinsics with
corresponding rte_atomic_xxx optional rte stdatomic API.

Signed-off-by: Tyler Retzlaff 
Acked-by: Stephen Hemminger 
---
 drivers/common/cpt/cpt_common.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/common/cpt/cpt_common.h b/drivers/common/cpt/cpt_common.h
index 6596cc0..dee430f 100644
--- a/drivers/common/cpt/cpt_common.h
+++ b/drivers/common/cpt/cpt_common.h
@@ -73,7 +73,7 @@ struct __rte_aligned(8) cpt_request_info {
const unsigned int qsize)
 {
/* Ensure ordering between setting the entry and updating the tail */
-   rte_atomic_thread_fence(__ATOMIC_RELEASE);
+   rte_atomic_thread_fence(rte_memory_order_release);
 
q->tail = (q->tail + cnt) & (qsize - 1);
 }
-- 
1.8.3.1



[PATCH v4 39/45] examples: use rte stdatomic API

2024-04-19 Thread Tyler Retzlaff
Replace the use of gcc builtin __atomic_xxx intrinsics with
corresponding rte_atomic_xxx optional rte stdatomic API.

Signed-off-by: Tyler Retzlaff 
Acked-by: Stephen Hemminger 
---
 examples/bbdev_app/main.c  | 13 +
 examples/l2fwd-event/l2fwd_common.h|  4 +--
 examples/l2fwd-event/l2fwd_event.c | 24 
 examples/l2fwd-jobstats/main.c | 11 
 .../client_server_mp/mp_server/main.c  |  6 ++--
 examples/server_node_efd/efd_server/main.c |  6 ++--
 examples/vhost/main.c  | 32 +++---
 examples/vhost/main.h  |  4 +--
 examples/vhost/virtio_net.c| 13 +
 examples/vhost_blk/vhost_blk.c |  8 +++---
 examples/vm_power_manager/channel_monitor.c|  9 +++---
 11 files changed, 68 insertions(+), 62 deletions(-)

diff --git a/examples/bbdev_app/main.c b/examples/bbdev_app/main.c
index d4c686c..7124b49 100644
--- a/examples/bbdev_app/main.c
+++ b/examples/bbdev_app/main.c
@@ -165,7 +165,7 @@ struct stats_lcore_params {
.num_dec_cores = 1,
 };
 
-static uint16_t global_exit_flag;
+static RTE_ATOMIC(uint16_t) global_exit_flag;
 
 /* display usage */
 static inline void
@@ -277,7 +277,7 @@ uint16_t bbdev_parse_number(const char *mask)
 signal_handler(int signum)
 {
printf("\nSignal %d received\n", signum);
-   __atomic_store_n(&global_exit_flag, 1, __ATOMIC_RELAXED);
+   rte_atomic_store_explicit(&global_exit_flag, 1, 
rte_memory_order_relaxed);
 }
 
 static void
@@ -321,7 +321,8 @@ uint16_t bbdev_parse_number(const char *mask)
fflush(stdout);
 
for (count = 0; count <= MAX_CHECK_TIME &&
-   !__atomic_load_n(&global_exit_flag, __ATOMIC_RELAXED); 
count++) {
+   !rte_atomic_load_explicit(&global_exit_flag,
+   rte_memory_order_relaxed); count++) {
memset(&link, 0, sizeof(link));
link_get_err = rte_eth_link_get_nowait(port_id, &link);
 
@@ -675,7 +676,7 @@ uint16_t bbdev_parse_number(const char *mask)
 {
struct stats_lcore_params *stats_lcore = arg;
 
-   while (!__atomic_load_n(&global_exit_flag, __ATOMIC_RELAXED)) {
+   while (!rte_atomic_load_explicit(&global_exit_flag, 
rte_memory_order_relaxed)) {
print_stats(stats_lcore);
rte_delay_ms(500);
}
@@ -921,7 +922,7 @@ uint16_t bbdev_parse_number(const char *mask)
const bool run_decoder = (lcore_conf->core_type &
(1 << RTE_BBDEV_OP_TURBO_DEC));
 
-   while (!__atomic_load_n(&global_exit_flag, __ATOMIC_RELAXED)) {
+   while (!rte_atomic_load_explicit(&global_exit_flag, 
rte_memory_order_relaxed)) {
if (run_encoder)
run_encoding(lcore_conf);
if (run_decoder)
@@ -1055,7 +1056,7 @@ uint16_t bbdev_parse_number(const char *mask)
.align = alignof(struct rte_mbuf *),
};
 
-   __atomic_store_n(&global_exit_flag, 0, __ATOMIC_RELAXED);
+   rte_atomic_store_explicit(&global_exit_flag, 0, 
rte_memory_order_relaxed);
 
sigret = signal(SIGTERM, signal_handler);
if (sigret == SIG_ERR)
diff --git a/examples/l2fwd-event/l2fwd_common.h 
b/examples/l2fwd-event/l2fwd_common.h
index c56b3e7..8cf91b9 100644
--- a/examples/l2fwd-event/l2fwd_common.h
+++ b/examples/l2fwd-event/l2fwd_common.h
@@ -61,8 +61,8 @@
 /* Per-port statistics struct */
 struct __rte_cache_aligned l2fwd_port_statistics {
uint64_t dropped;
-   uint64_t tx;
-   uint64_t rx;
+   RTE_ATOMIC(uint64_t) tx;
+   RTE_ATOMIC(uint64_t) rx;
 };
 
 /* Event vector attributes */
diff --git a/examples/l2fwd-event/l2fwd_event.c 
b/examples/l2fwd-event/l2fwd_event.c
index 4b5a032..2247202 100644
--- a/examples/l2fwd-event/l2fwd_event.c
+++ b/examples/l2fwd-event/l2fwd_event.c
@@ -163,8 +163,8 @@
dst_port = rsrc->dst_ports[mbuf->port];
 
if (timer_period > 0)
-   __atomic_fetch_add(&rsrc->port_stats[mbuf->port].rx,
-   1, __ATOMIC_RELAXED);
+   rte_atomic_fetch_add_explicit(&rsrc->port_stats[mbuf->port].rx,
+   1, rte_memory_order_relaxed);
mbuf->port = dst_port;
 
if (flags & L2FWD_EVENT_UPDT_MAC)
@@ -179,8 +179,8 @@
rte_event_eth_tx_adapter_txq_set(mbuf, 0);
 
if (timer_period > 0)
-   __atomic_fetch_add(&rsrc->port_stats[mbuf->port].tx,
-   1, __ATOMIC_RELAXED);
+   rte_atomic_fetch_add_explicit(&rsrc->port_stats[mbuf->port].tx,
+   1, rte_memory_order_relaxed);
 }
 
 static __rte_always_inline void
@@ -367,8 +367,8 @@
vec->queue = 0;
 
if (timer_period > 0)
-   __atomi

[PATCH v4 40/45] app/dumpcap: use rte stdatomic API

2024-04-19 Thread Tyler Retzlaff
Replace the use of gcc builtin __atomic_xxx intrinsics with
corresponding rte_atomic_xxx optional rte stdatomic API.

Signed-off-by: Tyler Retzlaff 
Acked-by: Stephen Hemminger 
---
 app/dumpcap/main.c | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/app/dumpcap/main.c b/app/dumpcap/main.c
index cc0f66b..b25b95e 100644
--- a/app/dumpcap/main.c
+++ b/app/dumpcap/main.c
@@ -51,7 +51,7 @@
 
 /* command line flags */
 static const char *progname;
-static bool quit_signal;
+static RTE_ATOMIC(bool) quit_signal;
 static bool group_read;
 static bool quiet;
 static bool use_pcapng = true;
@@ -475,7 +475,7 @@ static void parse_opts(int argc, char **argv)
 static void
 signal_handler(int sig_num __rte_unused)
 {
-   __atomic_store_n(&quit_signal, true, __ATOMIC_RELAXED);
+   rte_atomic_store_explicit(&quit_signal, true, rte_memory_order_relaxed);
 }
 
 
@@ -490,7 +490,7 @@ static void statistics_loop(void)
printf("%-15s  %10s  %10s\n",
   "Interface", "Received", "Dropped");
 
-   while (!__atomic_load_n(&quit_signal, __ATOMIC_RELAXED)) {
+   while (!rte_atomic_load_explicit(&quit_signal, 
rte_memory_order_relaxed)) {
RTE_ETH_FOREACH_DEV(p) {
if (rte_eth_dev_get_name_by_port(p, name) < 0)
continue;
@@ -528,7 +528,7 @@ static void statistics_loop(void)
 static void
 monitor_primary(void *arg __rte_unused)
 {
-   if (__atomic_load_n(&quit_signal, __ATOMIC_RELAXED))
+   if (rte_atomic_load_explicit(&quit_signal, rte_memory_order_relaxed))
return;
 
if (rte_eal_primary_proc_alive(NULL)) {
@@ -536,7 +536,7 @@ static void statistics_loop(void)
} else {
fprintf(stderr,
"Primary process is no longer active, exiting...\n");
-   __atomic_store_n(&quit_signal, true, __ATOMIC_RELAXED);
+   rte_atomic_store_explicit(&quit_signal, true, 
rte_memory_order_relaxed);
}
 }
 
@@ -983,7 +983,7 @@ int main(int argc, char **argv)
show_count(0);
}
 
-   while (!__atomic_load_n(&quit_signal, __ATOMIC_RELAXED)) {
+   while (!rte_atomic_load_explicit(&quit_signal, 
rte_memory_order_relaxed)) {
if (process_ring(out, r) < 0) {
fprintf(stderr, "pcapng file write failed; %s\n",
strerror(errno));
-- 
1.8.3.1



[PATCH v4 42/45] app/test-eventdev: use rte stdatomic API

2024-04-19 Thread Tyler Retzlaff
Replace the use of gcc builtin __atomic_xxx intrinsics with
corresponding rte_atomic_xxx optional rte stdatomic API.

Signed-off-by: Tyler Retzlaff 
Acked-by: Stephen Hemminger 
---
 app/test-eventdev/test_order_atq.c| 4 ++--
 app/test-eventdev/test_order_common.c | 5 +++--
 app/test-eventdev/test_order_common.h | 8 
 app/test-eventdev/test_order_queue.c  | 4 ++--
 app/test-eventdev/test_perf_common.h  | 6 +++---
 5 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/app/test-eventdev/test_order_atq.c 
b/app/test-eventdev/test_order_atq.c
index 2fee4b4..128d3f2 100644
--- a/app/test-eventdev/test_order_atq.c
+++ b/app/test-eventdev/test_order_atq.c
@@ -28,7 +28,7 @@
uint16_t event = rte_event_dequeue_burst(dev_id, port,
&ev, 1, 0);
if (!event) {
-   if (__atomic_load_n(outstand_pkts, __ATOMIC_RELAXED) <= 
0)
+   if (rte_atomic_load_explicit(outstand_pkts, 
rte_memory_order_relaxed) <= 0)
break;
rte_pause();
continue;
@@ -64,7 +64,7 @@
BURST_SIZE, 0);
 
if (nb_rx == 0) {
-   if (__atomic_load_n(outstand_pkts, __ATOMIC_RELAXED) <= 
0)
+   if (rte_atomic_load_explicit(outstand_pkts, 
rte_memory_order_relaxed) <= 0)
break;
rte_pause();
continue;
diff --git a/app/test-eventdev/test_order_common.c 
b/app/test-eventdev/test_order_common.c
index a9894c6..0fceace 100644
--- a/app/test-eventdev/test_order_common.c
+++ b/app/test-eventdev/test_order_common.c
@@ -189,7 +189,7 @@
evt_err("failed to allocate t->expected_flow_seq memory");
goto exp_nomem;
}
-   __atomic_store_n(&t->outstand_pkts, opt->nb_pkts, __ATOMIC_RELAXED);
+   rte_atomic_store_explicit(&t->outstand_pkts, opt->nb_pkts, 
rte_memory_order_relaxed);
t->err = false;
t->nb_pkts = opt->nb_pkts;
t->nb_flows = opt->nb_flows;
@@ -296,7 +296,8 @@
 
while (t->err == false) {
uint64_t new_cycles = rte_get_timer_cycles();
-   int64_t remaining = __atomic_load_n(&t->outstand_pkts, 
__ATOMIC_RELAXED);
+   int64_t remaining = rte_atomic_load_explicit(&t->outstand_pkts,
+   rte_memory_order_relaxed);
 
if (remaining <= 0) {
t->result = EVT_TEST_SUCCESS;
diff --git a/app/test-eventdev/test_order_common.h 
b/app/test-eventdev/test_order_common.h
index d4cbc5c..7177fd8 100644
--- a/app/test-eventdev/test_order_common.h
+++ b/app/test-eventdev/test_order_common.h
@@ -48,7 +48,7 @@ struct __rte_cache_aligned test_order {
 * The atomic_* is an expensive operation,Since it is a functional test,
 * We are using the atomic_ operation to reduce the code complexity.
 */
-   uint64_t outstand_pkts;
+   RTE_ATOMIC(uint64_t) outstand_pkts;
enum evt_test_result result;
uint32_t nb_flows;
uint64_t nb_pkts;
@@ -95,7 +95,7 @@ struct __rte_cache_aligned test_order {
 order_process_stage_1(struct test_order *const t,
struct rte_event *const ev, const uint32_t nb_flows,
uint32_t *const expected_flow_seq,
-   uint64_t *const outstand_pkts)
+   RTE_ATOMIC(uint64_t) *const outstand_pkts)
 {
const uint32_t flow = (uintptr_t)ev->mbuf % nb_flows;
/* compare the seqn against expected value */
@@ -113,7 +113,7 @@ struct __rte_cache_aligned test_order {
 */
expected_flow_seq[flow]++;
rte_pktmbuf_free(ev->mbuf);
-   __atomic_fetch_sub(outstand_pkts, 1, __ATOMIC_RELAXED);
+   rte_atomic_fetch_sub_explicit(outstand_pkts, 1, 
rte_memory_order_relaxed);
 }
 
 static __rte_always_inline void
@@ -132,7 +132,7 @@ struct __rte_cache_aligned test_order {
const uint8_t port = w->port_id;\
const uint32_t nb_flows = t->nb_flows;\
uint32_t *expected_flow_seq = t->expected_flow_seq;\
-   uint64_t *outstand_pkts = &t->outstand_pkts;\
+   RTE_ATOMIC(uint64_t) *outstand_pkts = &t->outstand_pkts;\
if (opt->verbose_level > 1)\
printf("%s(): lcore %d dev_id %d port=%d\n",\
__func__, rte_lcore_id(), dev_id, port)
diff --git a/app/test-eventdev/test_order_queue.c 
b/app/test-eventdev/test_order_queue.c
index 80eaea5..a282ab2 100644
--- a/app/test-eventdev/test_order_queue.c
+++ b/app/test-eventdev/test_order_queue.c
@@ -28,7 +28,7 @@
uint16_t event = rte_event_dequeue_burst(dev_id, port,
&ev, 1, 0);
if (!event) {
-   if (__atomic_load_n(outstand_pkts, __ATOMIC_RELAXED) <= 
0)
+   if (rte_atomic_load_explicit(outstand_pkts, 

[PATCH v4 41/45] app/test: use rte stdatomic API

2024-04-19 Thread Tyler Retzlaff
Replace the use of gcc builtin __atomic_xxx intrinsics with
corresponding rte_atomic_xxx optional rte stdatomic API.

Signed-off-by: Tyler Retzlaff 
Acked-by: Stephen Hemminger 
---
 app/test/test_bpf.c|  46 -
 app/test/test_distributor.c| 114 -
 app/test/test_distributor_perf.c   |   4 +-
 app/test/test_func_reentrancy.c|  28 
 app/test/test_hash_multiwriter.c   |  16 ++---
 app/test/test_hash_readwrite.c |  74 ++---
 app/test/test_hash_readwrite_lf_perf.c |  88 -
 app/test/test_lcores.c |  25 
 app/test/test_lpm_perf.c   |  14 ++--
 app/test/test_mcslock.c|  12 ++--
 app/test/test_mempool_perf.c   |   9 +--
 app/test/test_pflock.c |  13 ++--
 app/test/test_pmd_perf.c   |  10 +--
 app/test/test_rcu_qsbr_perf.c  | 114 +
 app/test/test_ring_perf.c  |  11 ++--
 app/test/test_ring_stress_impl.h   |  10 +--
 app/test/test_rwlock.c |   9 +--
 app/test/test_seqlock.c|   6 +-
 app/test/test_service_cores.c  |  24 +++
 app/test/test_spinlock.c   |   9 +--
 app/test/test_stack_perf.c |  12 ++--
 app/test/test_threads.c|  33 +-
 app/test/test_ticketlock.c |   9 +--
 app/test/test_timer.c  |  31 +
 24 files changed, 378 insertions(+), 343 deletions(-)

diff --git a/app/test/test_bpf.c b/app/test/test_bpf.c
index 53e3a31..2e43442 100644
--- a/app/test/test_bpf.c
+++ b/app/test/test_bpf.c
@@ -39,8 +39,8 @@
  */
 
 struct dummy_offset {
-   uint64_t u64;
-   uint32_t u32;
+   RTE_ATOMIC(uint64_t) u64;
+   RTE_ATOMIC(uint32_t) u32;
uint16_t u16;
uint8_t  u8;
 };
@@ -1581,32 +1581,46 @@ struct bpf_test {
memset(&dfe, 0, sizeof(dfe));
 
rv = 1;
-   __atomic_fetch_add(&dfe.u32, rv, __ATOMIC_RELAXED);
-   __atomic_fetch_add(&dfe.u64, rv, __ATOMIC_RELAXED);
+   rte_atomic_fetch_add_explicit((uint32_t __rte_atomic *)&dfe.u32, rv,
+   rte_memory_order_relaxed);
+   rte_atomic_fetch_add_explicit((uint64_t __rte_atomic *)&dfe.u64, rv,
+   rte_memory_order_relaxed);
 
rv = -1;
-   __atomic_fetch_add(&dfe.u32, rv, __ATOMIC_RELAXED);
-   __atomic_fetch_add(&dfe.u64, rv, __ATOMIC_RELAXED);
+   rte_atomic_fetch_add_explicit((uint32_t __rte_atomic *)&dfe.u32, rv,
+   rte_memory_order_relaxed);
+   rte_atomic_fetch_add_explicit((uint64_t __rte_atomic *)&dfe.u64, rv,
+   rte_memory_order_relaxed);
 
rv = (int32_t)TEST_FILL_1;
-   __atomic_fetch_add(&dfe.u32, rv, __ATOMIC_RELAXED);
-   __atomic_fetch_add(&dfe.u64, rv, __ATOMIC_RELAXED);
+   rte_atomic_fetch_add_explicit((uint32_t __rte_atomic *)&dfe.u32, rv,
+   rte_memory_order_relaxed);
+   rte_atomic_fetch_add_explicit((uint64_t __rte_atomic *)&dfe.u64, rv,
+   rte_memory_order_relaxed);
 
rv = TEST_MUL_1;
-   __atomic_fetch_add(&dfe.u32, rv, __ATOMIC_RELAXED);
-   __atomic_fetch_add(&dfe.u64, rv, __ATOMIC_RELAXED);
+   rte_atomic_fetch_add_explicit((uint32_t __rte_atomic *)&dfe.u32, rv,
+   rte_memory_order_relaxed);
+   rte_atomic_fetch_add_explicit((uint64_t __rte_atomic *)&dfe.u64, rv,
+   rte_memory_order_relaxed);
 
rv = TEST_MUL_2;
-   __atomic_fetch_add(&dfe.u32, rv, __ATOMIC_RELAXED);
-   __atomic_fetch_add(&dfe.u64, rv, __ATOMIC_RELAXED);
+   rte_atomic_fetch_add_explicit((uint32_t __rte_atomic *)&dfe.u32, rv,
+   rte_memory_order_relaxed);
+   rte_atomic_fetch_add_explicit((uint64_t __rte_atomic *)&dfe.u64, rv,
+   rte_memory_order_relaxed);
 
rv = TEST_JCC_2;
-   __atomic_fetch_add(&dfe.u32, rv, __ATOMIC_RELAXED);
-   __atomic_fetch_add(&dfe.u64, rv, __ATOMIC_RELAXED);
+   rte_atomic_fetch_add_explicit((uint32_t __rte_atomic *)&dfe.u32, rv,
+   rte_memory_order_relaxed);
+   rte_atomic_fetch_add_explicit((uint64_t __rte_atomic *)&dfe.u64, rv,
+   rte_memory_order_relaxed);
 
rv = TEST_JCC_3;
-   __atomic_fetch_add(&dfe.u32, rv, __ATOMIC_RELAXED);
-   __atomic_fetch_add(&dfe.u64, rv, __ATOMIC_RELAXED);
+   rte_atomic_fetch_add_explicit((uint32_t __rte_atomic *)&dfe.u32, rv,
+   rte_memory_order_relaxed);
+   rte_atomic_fetch_add_explicit((uint64_t __rte_atomic *)&dfe.u64, rv,
+   rte_memory_order_relaxed);
 
return cmp_res(__func__, 1, rc, &dfe, dft, sizeof(dfe));
 }
diff --git a/app/test/test_distributor.c b/app/test/test_distributor.c
index 13357b9..60fe96e 100644
--- a/app/test/test_distributor.c
+++ b/app/test/test_distributor.c
@@ -47,14 +47,14 @@ struct worker_params {
 struct worker_params worker_params;
 
 /* statics - all zero-initi

[PATCH v4 43/45] app/test-crypto-perf: use rte stdatomic API

2024-04-19 Thread Tyler Retzlaff
Replace the use of gcc builtin __atomic_xxx intrinsics with
corresponding rte_atomic_xxx optional rte stdatomic API.

Signed-off-by: Tyler Retzlaff 
Acked-by: Stephen Hemminger 
---
 app/test-crypto-perf/cperf_test_latency.c|  6 +++---
 app/test-crypto-perf/cperf_test_pmd_cyclecount.c | 10 +-
 app/test-crypto-perf/cperf_test_throughput.c | 10 +-
 app/test-crypto-perf/cperf_test_verify.c | 10 +-
 4 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/app/test-crypto-perf/cperf_test_latency.c 
b/app/test-crypto-perf/cperf_test_latency.c
index 99b7d7c..b8ad6bf 100644
--- a/app/test-crypto-perf/cperf_test_latency.c
+++ b/app/test-crypto-perf/cperf_test_latency.c
@@ -136,7 +136,7 @@ struct priv_op_data {
uint32_t imix_idx = 0;
int ret = 0;
 
-   static uint16_t display_once;
+   static RTE_ATOMIC(uint16_t) display_once;
 
if (ctx == NULL)
return 0;
@@ -341,8 +341,8 @@ struct priv_op_data {
 
uint16_t exp = 0;
if (ctx->options->csv) {
-   if (__atomic_compare_exchange_n(&display_once, &exp, 1, 
0,
-   __ATOMIC_RELAXED, __ATOMIC_RELAXED))
+   if 
(rte_atomic_compare_exchange_strong_explicit(&display_once, &exp, 1,
+   rte_memory_order_relaxed, 
rte_memory_order_relaxed))
printf("\n# lcore, Buffer Size, Burst Size, 
Pakt Seq #, "
"cycles, time (us)");
 
diff --git a/app/test-crypto-perf/cperf_test_pmd_cyclecount.c 
b/app/test-crypto-perf/cperf_test_pmd_cyclecount.c
index 4a60f6d..7191d99 100644
--- a/app/test-crypto-perf/cperf_test_pmd_cyclecount.c
+++ b/app/test-crypto-perf/cperf_test_pmd_cyclecount.c
@@ -396,7 +396,7 @@ struct pmd_cyclecount_state {
state.lcore = rte_lcore_id();
state.linearize = 0;
 
-   static uint16_t display_once;
+   static RTE_ATOMIC(uint16_t) display_once;
static bool warmup = true;
 
/*
@@ -443,8 +443,8 @@ struct pmd_cyclecount_state {
 
uint16_t exp = 0;
if (!opts->csv) {
-   if (__atomic_compare_exchange_n(&display_once, &exp, 1, 
0,
-   __ATOMIC_RELAXED, __ATOMIC_RELAXED))
+   if 
(rte_atomic_compare_exchange_strong_explicit(&display_once, &exp, 1,
+   rte_memory_order_relaxed, 
rte_memory_order_relaxed))
printf(PRETTY_HDR_FMT, "lcore id", "Buf Size",
"Burst Size", "Enqueued",
"Dequeued", "Enq Retries",
@@ -460,8 +460,8 @@ struct pmd_cyclecount_state {
state.cycles_per_enq,
state.cycles_per_deq);
} else {
-   if (__atomic_compare_exchange_n(&display_once, &exp, 1, 
0,
-   __ATOMIC_RELAXED, __ATOMIC_RELAXED))
+   if 
(rte_atomic_compare_exchange_strong_explicit(&display_once, &exp, 1,
+   rte_memory_order_relaxed, 
rte_memory_order_relaxed))
printf(CSV_HDR_FMT, "# lcore id", "Buf Size",
"Burst Size", "Enqueued",
"Dequeued", "Enq Retries",
diff --git a/app/test-crypto-perf/cperf_test_throughput.c 
b/app/test-crypto-perf/cperf_test_throughput.c
index e3d266d..c0891e7 100644
--- a/app/test-crypto-perf/cperf_test_throughput.c
+++ b/app/test-crypto-perf/cperf_test_throughput.c
@@ -107,7 +107,7 @@ struct cperf_throughput_ctx {
uint8_t burst_size_idx = 0;
uint32_t imix_idx = 0;
 
-   static uint16_t display_once;
+   static RTE_ATOMIC(uint16_t) display_once;
 
struct rte_crypto_op *ops[ctx->options->max_burst_size];
struct rte_crypto_op *ops_processed[ctx->options->max_burst_size];
@@ -277,8 +277,8 @@ struct cperf_throughput_ctx {
 
uint16_t exp = 0;
if (!ctx->options->csv) {
-   if (__atomic_compare_exchange_n(&display_once, &exp, 1, 
0,
-   __ATOMIC_RELAXED, __ATOMIC_RELAXED))
+   if 
(rte_atomic_compare_exchange_strong_explicit(&display_once, &exp, 1,
+   rte_memory_order_relaxed, 
rte_memory_order_relaxed))

printf("%12s%12s%12s%12s%12s%12s%12s%12s%12s%12s\n\n",
"lcore id", "Buf Size", "Burst Size",
"Enqueued", "Dequeued", "Failed Enq",
@@ -298,8 +298,8 @@ struct cperf_throughput_ctx {
throughput_gbps,
 

[PATCH v4 45/45] app/test-bbdev: use rte stdatomic API

2024-04-19 Thread Tyler Retzlaff
Replace the use of gcc builtin __atomic_xxx intrinsics with
corresponding rte_atomic_xxx optional rte stdatomic API.

Signed-off-by: Tyler Retzlaff 
Acked-by: Stephen Hemminger 
---
 app/test-bbdev/test_bbdev_perf.c | 183 +++
 1 file changed, 110 insertions(+), 73 deletions(-)

diff --git a/app/test-bbdev/test_bbdev_perf.c b/app/test-bbdev/test_bbdev_perf.c
index dcce00a..9694ed3 100644
--- a/app/test-bbdev/test_bbdev_perf.c
+++ b/app/test-bbdev/test_bbdev_perf.c
@@ -144,7 +144,7 @@ struct test_op_params {
uint16_t num_to_process;
uint16_t num_lcores;
int vector_mask;
-   uint16_t sync;
+   RTE_ATOMIC(uint16_t) sync;
struct test_buffers q_bufs[RTE_MAX_NUMA_NODES][MAX_QUEUES];
 };
 
@@ -159,9 +159,9 @@ struct thread_params {
uint8_t iter_count;
double iter_average;
double bler;
-   uint16_t nb_dequeued;
-   int16_t processing_status;
-   uint16_t burst_sz;
+   RTE_ATOMIC(uint16_t) nb_dequeued;
+   RTE_ATOMIC(int16_t) processing_status;
+   RTE_ATOMIC(uint16_t) burst_sz;
struct test_op_params *op_params;
struct rte_bbdev_dec_op *dec_ops[MAX_BURST];
struct rte_bbdev_enc_op *enc_ops[MAX_BURST];
@@ -3195,56 +3195,64 @@ typedef int (test_case_function)(struct active_device 
*ad,
}
 
if (unlikely(event != RTE_BBDEV_EVENT_DEQUEUE)) {
-   __atomic_store_n(&tp->processing_status, TEST_FAILED, 
__ATOMIC_RELAXED);
+   rte_atomic_store_explicit(&tp->processing_status, TEST_FAILED,
+   rte_memory_order_relaxed);
printf(
"Dequeue interrupt handler called for incorrect 
event!\n");
return;
}
 
-   burst_sz = __atomic_load_n(&tp->burst_sz, __ATOMIC_RELAXED);
+   burst_sz = rte_atomic_load_explicit(&tp->burst_sz, 
rte_memory_order_relaxed);
num_ops = tp->op_params->num_to_process;
 
if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
deq = rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
&tp->dec_ops[
-   __atomic_load_n(&tp->nb_dequeued, 
__ATOMIC_RELAXED)],
+   
rte_atomic_load_explicit(&tp->nb_dequeued,
+   rte_memory_order_relaxed)],
burst_sz);
else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
deq = rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id,
&tp->dec_ops[
-   __atomic_load_n(&tp->nb_dequeued, 
__ATOMIC_RELAXED)],
+   
rte_atomic_load_explicit(&tp->nb_dequeued,
+   rte_memory_order_relaxed)],
burst_sz);
else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
deq = rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
&tp->enc_ops[
-   __atomic_load_n(&tp->nb_dequeued, 
__ATOMIC_RELAXED)],
+   
rte_atomic_load_explicit(&tp->nb_dequeued,
+   rte_memory_order_relaxed)],
burst_sz);
else if (test_vector.op_type == RTE_BBDEV_OP_FFT)
deq = rte_bbdev_dequeue_fft_ops(dev_id, queue_id,
&tp->fft_ops[
-   __atomic_load_n(&tp->nb_dequeued, 
__ATOMIC_RELAXED)],
+   
rte_atomic_load_explicit(&tp->nb_dequeued,
+   rte_memory_order_relaxed)],
burst_sz);
else if (test_vector.op_type == RTE_BBDEV_OP_MLDTS)
deq = rte_bbdev_dequeue_mldts_ops(dev_id, queue_id,
&tp->mldts_ops[
-   __atomic_load_n(&tp->nb_dequeued, 
__ATOMIC_RELAXED)],
+   
rte_atomic_load_explicit(&tp->nb_dequeued,
+   rte_memory_order_relaxed)],
burst_sz);
else /*RTE_BBDEV_OP_TURBO_ENC*/
deq = rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
&tp->enc_ops[
-   __atomic_load_n(&tp->nb_dequeued, 
__ATOMIC_RELAXED)],
+   
rte_atomic_load_explicit(&tp->nb_dequeued,
+   rte_memory_order_relaxed)],
burst_sz);
 
if (deq < burst_sz) {
printf(
"After receiving the interrupt all operations should be 
dequeued. Expected: %u, got: %u\n",
burst_sz, deq);
-  

[PATCH v4 44/45] app/test-compress-perf: use rte stdatomic API

2024-04-19 Thread Tyler Retzlaff
Replace the use of gcc builtin __atomic_xxx intrinsics with
corresponding rte_atomic_xxx optional rte stdatomic API.

Signed-off-by: Tyler Retzlaff 
Acked-by: Stephen Hemminger 
---
 app/test-compress-perf/comp_perf_test_common.h |  2 +-
 app/test-compress-perf/comp_perf_test_cyclecount.c |  4 ++--
 app/test-compress-perf/comp_perf_test_throughput.c | 10 +-
 app/test-compress-perf/comp_perf_test_verify.c |  6 +++---
 4 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/app/test-compress-perf/comp_perf_test_common.h 
b/app/test-compress-perf/comp_perf_test_common.h
index d039e5a..085e269 100644
--- a/app/test-compress-perf/comp_perf_test_common.h
+++ b/app/test-compress-perf/comp_perf_test_common.h
@@ -14,7 +14,7 @@ struct cperf_mem_resources {
uint16_t qp_id;
uint8_t lcore_id;
 
-   uint16_t print_info_once;
+   RTE_ATOMIC(uint16_t) print_info_once;
 
uint32_t total_bufs;
uint8_t *compressed_data;
diff --git a/app/test-compress-perf/comp_perf_test_cyclecount.c 
b/app/test-compress-perf/comp_perf_test_cyclecount.c
index 4d336ec..64e8faa 100644
--- a/app/test-compress-perf/comp_perf_test_cyclecount.c
+++ b/app/test-compress-perf/comp_perf_test_cyclecount.c
@@ -498,8 +498,8 @@ struct cperf_cyclecount_ctx {
/*
 * printing information about current compression thread
 */
-   if (__atomic_compare_exchange_n(&ctx->ver.mem.print_info_once, &exp,
-   1, 0, __ATOMIC_RELAXED,  __ATOMIC_RELAXED))
+   if 
(rte_atomic_compare_exchange_strong_explicit(&ctx->ver.mem.print_info_once, 
&exp,
+   1, rte_memory_order_relaxed,  
rte_memory_order_relaxed))
printf("lcore: %u,"
" driver name: %s,"
" device name: %s,"
diff --git a/app/test-compress-perf/comp_perf_test_throughput.c 
b/app/test-compress-perf/comp_perf_test_throughput.c
index 1f7072d..089d19c 100644
--- a/app/test-compress-perf/comp_perf_test_throughput.c
+++ b/app/test-compress-perf/comp_perf_test_throughput.c
@@ -336,7 +336,7 @@
struct cperf_benchmark_ctx *ctx = test_ctx;
struct comp_test_data *test_data = ctx->ver.options;
uint32_t lcore = rte_lcore_id();
-   static uint16_t display_once;
+   static RTE_ATOMIC(uint16_t) display_once;
int i, ret = EXIT_SUCCESS;
 
ctx->ver.mem.lcore_id = lcore;
@@ -345,8 +345,8 @@
/*
 * printing information about current compression thread
 */
-   if (__atomic_compare_exchange_n(&ctx->ver.mem.print_info_once, &exp,
-   1, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED))
+   if 
(rte_atomic_compare_exchange_strong_explicit(&ctx->ver.mem.print_info_once, 
&exp,
+   1, rte_memory_order_relaxed, 
rte_memory_order_relaxed))
printf("lcore: %u,"
" driver name: %s,"
" device name: %s,"
@@ -413,8 +413,8 @@
}
 
exp = 0;
-   if (__atomic_compare_exchange_n(&display_once, &exp, 1, 0,
-   __ATOMIC_RELAXED, __ATOMIC_RELAXED)) {
+   if (rte_atomic_compare_exchange_strong_explicit(&display_once, &exp, 1,
+   rte_memory_order_relaxed, rte_memory_order_relaxed)) {
printf("\n%12s%6s%12s%17s%15s%16s\n",
"lcore id", "Level", "Comp size", "Comp ratio [%]",
"Comp [Gbps]", "Decomp [Gbps]");
diff --git a/app/test-compress-perf/comp_perf_test_verify.c 
b/app/test-compress-perf/comp_perf_test_verify.c
index 7bd1807..09d97c5 100644
--- a/app/test-compress-perf/comp_perf_test_verify.c
+++ b/app/test-compress-perf/comp_perf_test_verify.c
@@ -396,7 +396,7 @@
struct cperf_verify_ctx *ctx = test_ctx;
struct comp_test_data *test_data = ctx->options;
int ret = EXIT_SUCCESS;
-   static uint16_t display_once;
+   static RTE_ATOMIC(uint16_t) display_once;
uint32_t lcore = rte_lcore_id();
uint16_t exp = 0;
 
@@ -452,8 +452,8 @@
test_data->input_data_sz * 100;
 
if (!ctx->silent) {
-   if (__atomic_compare_exchange_n(&display_once, &exp, 1, 0,
-   __ATOMIC_RELAXED, __ATOMIC_RELAXED)) {
+   if (rte_atomic_compare_exchange_strong_explicit(&display_once, 
&exp, 1,
+   rte_memory_order_relaxed, 
rte_memory_order_relaxed)) {
printf("%12s%6s%12s%17s\n",
"lcore id", "Level", "Comp size", "Comp ratio [%]");
}
-- 
1.8.3.1



[RFC] fix semicolon at the end of RTE_LOG_REGISTER_DEFAULT

2024-04-19 Thread Stephen Hemminger
The macro RTE_LOG_REGISTER_DEFAULT emits code for an initialization
function. If a driver (and most do) adds a semicolon after the macro.

RTE_LOG_REGISTER_DEFAULT(logtype_foo, INFO);

Is equivalent to:

int logtype_foo;
static void __logtype_foo(void) {
logtype_foo = rte_log_register_type_and_pick_level(name,
 RTE_LOG_INFO);
if (type < 0)
logtype_foo = RTE_LOGTYPE_EAL;
};
The problem is that extra semi-colon after the function.

If code is built with highest level of warnings (pedantic),
then it will generate a warning.
  warning: ISO C does not allow extra ‘;’ outside of a function

This is a treewide fix for this.

Signed-off-by: Stephen Hemminger 
---
 .../fpga_5gnr_fec/rte_fpga_5gnr_fec.c |  4 +--
 drivers/baseband/fpga_lte_fec/fpga_lte_fec.c  |  4 +--
 drivers/baseband/la12xx/bbdev_la12xx.c|  2 +-
 drivers/baseband/null/bbdev_null.c|  2 +-
 .../baseband/turbo_sw/bbdev_turbo_software.c  |  2 +-
 drivers/bus/auxiliary/auxiliary_common.c  |  2 +-
 drivers/bus/cdx/cdx.c |  2 +-
 drivers/bus/dpaa/dpaa_bus.c   |  2 +-
 drivers/bus/fslmc/fslmc_bus.c |  2 +-
 drivers/bus/ifpga/ifpga_bus.c |  2 +-
 drivers/bus/platform/platform.c   |  2 +-
 drivers/bus/uacce/uacce.c |  2 +-
 drivers/bus/vdev/vdev.c   |  2 +-
 drivers/bus/vmbus/vmbus_common.c  |  2 +-
 drivers/common/cnxk/roc_platform.c| 28 +--
 drivers/common/dpaax/dpaax_iova_table.c   |  2 +-
 drivers/common/iavf/iavf_impl.c   |  2 +-
 drivers/common/idpf/base/idpf_common.c|  2 +-
 drivers/common/nitrox/nitrox_logs.c   |  2 +-
 drivers/common/sfc_efx/sfc_efx.c  |  2 +-
 drivers/compress/isal/isal_compress_pmd.c |  2 +-
 drivers/compress/octeontx/otx_zip_pmd.c   |  2 +-
 drivers/compress/zlib/zlib_pmd.c  |  2 +-
 drivers/crypto/armv8/rte_armv8_pmd.c  |  2 +-
 drivers/crypto/mvsam/rte_mrvl_pmd.c   |  2 +-
 drivers/crypto/null/null_crypto_pmd.c |  2 +-
 drivers/crypto/octeontx/otx_cryptodev.c   |  2 +-
 drivers/crypto/openssl/rte_openssl_pmd.c  |  2 +-
 .../scheduler/rte_cryptodev_scheduler.c   |  2 +-
 drivers/crypto/uadk/uadk_crypto_pmd.c |  2 +-
 drivers/dma/dpaa/dpaa_qdma.c  |  2 +-
 drivers/dma/dpaa2/dpaa2_qdma.c|  2 +-
 drivers/dma/hisilicon/hisi_dmadev.c   |  2 +-
 drivers/dma/idxd/idxd_common.c|  2 +-
 drivers/dma/ioat/ioat_dmadev.c|  2 +-
 drivers/dma/skeleton/skeleton_dmadev.c|  2 +-
 drivers/event/dlb2/dlb2.c |  2 +-
 drivers/event/dpaa/dpaa_eventdev.c|  2 +-
 drivers/event/dpaa2/dpaa2_eventdev.c  |  2 +-
 drivers/event/octeontx/ssovf_evdev.c  |  2 +-
 drivers/event/skeleton/skeleton_eventdev.c|  2 +-
 drivers/event/sw/sw_evdev.c   |  2 +-
 drivers/gpu/cuda/cuda.c   |  2 +-
 drivers/mempool/dpaa/dpaa_mempool.c   |  2 +-
 drivers/mempool/dpaa2/dpaa2_hw_mempool.c  |  2 +-
 drivers/net/af_packet/rte_eth_af_packet.c |  2 +-
 drivers/net/af_xdp/rte_eth_af_xdp.c   |  2 +-
 drivers/net/ark/ark_ethdev.c  |  2 +-
 drivers/net/cxgbe/cxgbe_ethdev.c  |  2 +-
 drivers/net/dpaa/dpaa_ethdev.c|  2 +-
 drivers/net/dpaa2/dpaa2_ethdev.c  |  2 +-
 drivers/net/enetc/enetc_ethdev.c  |  2 +-
 drivers/net/enetfec/enet_ethdev.c |  2 +-
 drivers/net/enic/enic_ethdev.c|  2 +-
 drivers/net/hinic/hinic_pmd_ethdev.c  |  2 +-
 drivers/net/ionic/ionic_ethdev.c  |  2 +-
 drivers/net/ipn3ke/ipn3ke_ethdev.c|  2 +-
 drivers/net/memif/rte_eth_memif.c |  2 +-
 drivers/net/mvneta/mvneta_ethdev.c|  2 +-
 drivers/net/mvpp2/mrvl_ethdev.c   |  2 +-
 drivers/net/nfb/nfb_ethdev.c  |  2 +-
 drivers/net/null/rte_eth_null.c   |  2 +-
 drivers/net/octeon_ep/otx_ep_ethdev.c |  2 +-
 drivers/net/pcap/pcap_ethdev.c|  2 +-
 drivers/net/pfe/pfe_ethdev.c  |  2 +-
 drivers/net/ring/rte_eth_ring.c   |  2 +-
 drivers/net/softnic/rte_eth_softnic.c |  2 +-
 drivers/net/tap/rte_eth_tap.c |  2 +-
 drivers/net/vdev_netvsc/vdev_netvsc.c |  2 +-
 drivers/net/vhost/rte_eth_vhost.c |  2 +-
 drivers/raw/ifpga/ifpga_rawdev.c  |  2 +-
 drivers/raw/ntb/ntb.c |  2 +-
 drivers/raw/skeleton/skeleton_rawdev.c|  2 +-
 lib/acl/rte_acl.c |  2 +-
 lib/argparse/rte_argparse.c   |  2 +-
 lib/bbdev/rte_bbdev.c |  2 +-
 lib/bpf/bpf.c  

RE: [PATCH v2 0/2] Wangxun support vector Rx/Tx

2024-04-19 Thread Jiawen Wu
On Friday, April 19, 2024 8:12 AM, ferruh.yi...@amd.com wrote:
> On 4/9/2024 7:31 AM, Jiawen Wu wrote:
> > Add SSE/NEON vector instructions for TXGBE and NGBE driver to process
> > packets.
> >
> > v2:
> > - Add performance test results.
> > - Cleanup codes and rebase.
> > - Remove GCC "-Wcast-qual".
> >
> > Jiawen Wu (2):
> >   net/txgbe: add vectorized functions for Rx/Tx
> >   net/ngbe: add vectorized functions for Rx/Tx
> >
> 
> Do you want to update release notes to announce new vector datapath support?

Yes, it should be added, I forgot it.



Re: [PATCH v2] app/testpmd: fix lcore ID restriction

2024-04-19 Thread Ferruh Yigit
On 4/16/2024 10:55 AM, Sivaprasad Tummala wrote:
> With modern CPUs, it is possible to have higher
> CPU count thus we can have higher RTE_MAX_LCORES.
> In testpmd application, the current config forwarding
> cores option "--nb-cores" is hard limited to 255.
> 
> The patch fixes this constraint and also adjusts the lcore
> data structure to 32-bit to align with rte lcore APIs.
> 
> Fixes: af75078fece3 ("first public release")
> Cc: sta...@dpdk.org
> 
> Signed-off-by: Sivaprasad Tummala 
>

Recheck-request: iol-unit-amd64-testing



Re: [PATCH v2] net/vmxnet3: fix a missing vmxnet3 register command

2024-04-19 Thread Ferruh Yigit
On 4/18/2024 7:36 AM, Ronak Doshi wrote:
> Vmxnet3 uses capability registers to advertise the supported
> capabilities of UPT device. It uses DCR0_REG command register
> for this purpose. However, the register command enum misses
> one command which technically is not used by dpdk vmxnet3
> driver yet. This can cause issue for commands added later.
> 
> This patch fixes this issue by adding a reserved
> command in its place.
> 
> Fixes: 5241d61bd865 ("net/vmxnet3: support capability registers")
> Cc: sta...@dpdk.org
> 
> Signed-off-by: Ronak Doshi 
> --
> Change log
> v2: fixed typo
> ---
>  drivers/net/vmxnet3/base/vmxnet3_defs.h | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/drivers/net/vmxnet3/base/vmxnet3_defs.h 
> b/drivers/net/vmxnet3/base/vmxnet3_defs.h
> index 24c235876e..a6bb281d8d 100644
> --- a/drivers/net/vmxnet3/base/vmxnet3_defs.h
> +++ b/drivers/net/vmxnet3/base/vmxnet3_defs.h
> @@ -126,6 +126,7 @@ typedef enum {
> VMXNET3_CMD_RESERVED7,
> VMXNET3_CMD_RESERVED8,
> VMXNET3_CMD_GET_MAX_QUEUES_CONF,
> +   VMXNET3_CMD_RESERVED11,
> VMXNET3_CMD_GET_MAX_CAPABILITIES,
> VMXNET3_CMD_GET_DCR0_REG,
>  } Vmxnet3_Cmd;

Hi Ronak,

Thank you for the patch.

What is the fixed from user perspective, what is the impact.

I can see two commands effected from missing enum, they are GET_DCR0_REG
& GET_MAX_CAPABILITIES.
Was user not able to read capabilities, or get random capabilities, from
device up until now?
If so what were these capabilities and how user impacted from missing them?
Can you please detail the impact more in the commit log?




[PATCH v1] dts: rename execution to test run

2024-04-19 Thread Juraj Linkeš
The configuration containing the combination of:
1. what testbed to use,
2. which tests to run,
3. and what build targets to test

is called an execution. This is confusing since we're using the exact
same term to describe other things and execution does not really capture
that well the three items listed above.

A new term is thus needed to describe the configuration. Test run is
much less confusing and better captures what the configuration contains.

Signed-off-by: Juraj Linkeš 
---
 doc/guides/tools/dts.rst   |  10 +-
 dts/conf.yaml  |   8 +-
 dts/framework/config/__init__.py   |  46 +++---
 dts/framework/config/conf_yaml_schema.json |   6 +-
 dts/framework/config/types.py  |   8 +-
 dts/framework/logger.py|  16 +-
 dts/framework/runner.py| 170 +++--
 dts/framework/test_result.py   |  70 -
 dts/framework/testbed_model/node.py|  38 ++---
 dts/tests/TestSuite_pmd_buffer_scatter.py  |   2 +-
 10 files changed, 193 insertions(+), 181 deletions(-)

diff --git a/doc/guides/tools/dts.rst b/doc/guides/tools/dts.rst
index d1c3c2af7a..c34f750c96 100644
--- a/doc/guides/tools/dts.rst
+++ b/doc/guides/tools/dts.rst
@@ -198,7 +198,7 @@ and then run the tests with the newly built binaries.
 Configuring DTS
 ~~~
 
-DTS configuration is split into nodes and executions and build targets within 
executions,
+DTS configuration is split into nodes and test runs and build targets within 
test runs,
 and follows a defined schema as described in `Configuration Schema`_.
 By default, DTS will try to use the ``dts/conf.yaml`` :ref:`config file 
`,
 which is a template that illustrates what can be configured in DTS.
@@ -537,12 +537,12 @@ _`Test target`
 Properties
 ~~
 
-The configuration requires listing all the execution environments and nodes
+The configuration requires listing all the test run environments and nodes
 involved in the testing. These can be defined with the following mappings:
 
-``executions``
+``test runs``
`sequence 
`_
 listing
-   the execution environments. Each entry is described as per the following
+   the test run environments. Each entry is described as per the following
`mapping 
`_:
 

++---+
@@ -637,4 +637,4 @@ And they both have two network ports which are physically 
connected to each othe
 
 .. literalinclude:: ../../../dts/conf.yaml
:language: yaml
-   :start-at: executions:
+   :start-at: test_runs:
diff --git a/dts/conf.yaml b/dts/conf.yaml
index 8068345dd5..7d9173fe7c 100644
--- a/dts/conf.yaml
+++ b/dts/conf.yaml
@@ -2,8 +2,8 @@
 # Copyright 2022-2023 The DPDK contributors
 # Copyright 2023 Arm Limited
 
-executions:
-  # define one execution environment
+test_runs:
+  # define one test run environment
   - build_targets:
   - arch: x86_64
 os: linux
@@ -20,9 +20,9 @@ executions:
 # The machine running the DPDK test executable
 system_under_test_node:
   node_name: "SUT 1"
-  vdevs: # optional; if removed, vdevs won't be used in the execution
+  vdevs: # optional; if removed, vdevs won't be used in the test run
 - "crypto_openssl"
-# Traffic generator node to use for this execution environment
+# Traffic generator node to use for this test run
 traffic_generator_node: "TG 1"
 nodes:
   # Define a system under test node, having two network ports physically
diff --git a/dts/framework/config/__init__.py b/dts/framework/config/__init__.py
index 4cb5c74059..5faad1bf50 100644
--- a/dts/framework/config/__init__.py
+++ b/dts/framework/config/__init__.py
@@ -16,7 +16,7 @@
 
 The test run configuration has two main sections:
 
-* The :class:`ExecutionConfiguration` which defines what tests are going 
to be run
+* The :class:`TestRunConfiguration` which defines what tests are going to 
be run
   and how DPDK will be built. It also references the testbed where these 
tests and DPDK
   are going to be run,
 * The nodes of the testbed are defined in the other section,
@@ -46,9 +46,9 @@
 from framework.config.types import (
 BuildTargetConfigDict,
 ConfigurationDict,
-ExecutionConfigDict,
 NodeConfigDict,
 PortConfigDict,
+TestRunConfigDict,
 TestSuiteConfigDict,
 TrafficGeneratorConfigDict,
 )
@@ -428,8 +428,8 @@ def from_dict(
 
 
 @dataclass(slots=True, frozen=True)
-class ExecutionConfiguration:
-"""The configuration of an execution.
+class TestRunConfiguration:
+"""The configuration of a test run.
 
 The configuration contains testbed information, what tests to execute
 and with what DPDK build.
@@ -440,8 +440,8 @@ class ExecutionConfiguration:
 func: Whe

[PATCH v3 0/2] Wangxun support vector Rx/Tx

2024-04-19 Thread Jiawen Wu
Add SSE/NEON vector instructions for TXGBE and NGBE driver to process
packets.

v3:
- Update release note.
- Use spaces instead of tab in meson.build.

v2:
- Add performance test results.
- Cleanup codes and rebase.
- Remove GCC "-Wcast-qual".

Jiawen Wu (2):
  net/txgbe: add vectorized functions for Rx/Tx
  net/ngbe: add vectorized functions for Rx/Tx

 doc/guides/rel_notes/release_24_07.rst|   8 +
 drivers/net/ngbe/meson.build  |   6 +
 drivers/net/ngbe/ngbe_ethdev.c|   5 +
 drivers/net/ngbe/ngbe_ethdev.h|   1 +
 drivers/net/ngbe/ngbe_rxtx.c  | 162 -
 drivers/net/ngbe/ngbe_rxtx.h  |  32 +-
 drivers/net/ngbe/ngbe_rxtx_vec_common.h   | 296 +
 drivers/net/ngbe/ngbe_rxtx_vec_neon.c | 602 ++
 drivers/net/ngbe/ngbe_rxtx_vec_sse.c  | 688 
 drivers/net/txgbe/meson.build |   6 +
 drivers/net/txgbe/txgbe_ethdev.c  |   5 +
 drivers/net/txgbe/txgbe_ethdev.h  |   1 +
 drivers/net/txgbe/txgbe_ethdev_vf.c   |   1 +
 drivers/net/txgbe/txgbe_rxtx.c| 151 -
 drivers/net/txgbe/txgbe_rxtx.h|  18 +
 drivers/net/txgbe/txgbe_rxtx_vec_common.h | 301 +
 drivers/net/txgbe/txgbe_rxtx_vec_neon.c   | 602 ++
 drivers/net/txgbe/txgbe_rxtx_vec_sse.c| 732 ++
 18 files changed, 3607 insertions(+), 10 deletions(-)
 create mode 100644 drivers/net/ngbe/ngbe_rxtx_vec_common.h
 create mode 100644 drivers/net/ngbe/ngbe_rxtx_vec_neon.c
 create mode 100644 drivers/net/ngbe/ngbe_rxtx_vec_sse.c
 create mode 100644 drivers/net/txgbe/txgbe_rxtx_vec_common.h
 create mode 100644 drivers/net/txgbe/txgbe_rxtx_vec_neon.c
 create mode 100644 drivers/net/txgbe/txgbe_rxtx_vec_sse.c

-- 
2.27.0



[PATCH v3 1/2] net/txgbe: add vectorized functions for Rx/Tx

2024-04-19 Thread Jiawen Wu
To optimize Rx/Tx burst process, add SSE/NEON vector instructions on
x86/arm architecture.

The performance test results on Phytium D2000(ARM64) show that,
throughput for 64-byte packets increased from 64.844% to 73.984%.

Signed-off-by: Jiawen Wu 
---
 doc/guides/rel_notes/release_24_07.rst|   4 +
 drivers/net/txgbe/meson.build |   6 +
 drivers/net/txgbe/txgbe_ethdev.c  |   5 +
 drivers/net/txgbe/txgbe_ethdev.h  |   1 +
 drivers/net/txgbe/txgbe_ethdev_vf.c   |   1 +
 drivers/net/txgbe/txgbe_rxtx.c| 151 -
 drivers/net/txgbe/txgbe_rxtx.h|  18 +
 drivers/net/txgbe/txgbe_rxtx_vec_common.h | 301 +
 drivers/net/txgbe/txgbe_rxtx_vec_neon.c   | 602 ++
 drivers/net/txgbe/txgbe_rxtx_vec_sse.c| 732 ++
 10 files changed, 1815 insertions(+), 6 deletions(-)
 create mode 100644 drivers/net/txgbe/txgbe_rxtx_vec_common.h
 create mode 100644 drivers/net/txgbe/txgbe_rxtx_vec_neon.c
 create mode 100644 drivers/net/txgbe/txgbe_rxtx_vec_sse.c

diff --git a/doc/guides/rel_notes/release_24_07.rst 
b/doc/guides/rel_notes/release_24_07.rst
index a69f24cf99..6c125c5d4c 100644
--- a/doc/guides/rel_notes/release_24_07.rst
+++ b/doc/guides/rel_notes/release_24_07.rst
@@ -55,6 +55,10 @@ New Features
  Also, make sure to start the actual text at the margin.
  ===
 
+* **Updated Wangxun txgbe driver.**
+
+  * Added SSE/NEON vector datapath.
+
 
 Removed Items
 -
diff --git a/drivers/net/txgbe/meson.build b/drivers/net/txgbe/meson.build
index 14729a6cf3..ba7167a511 100644
--- a/drivers/net/txgbe/meson.build
+++ b/drivers/net/txgbe/meson.build
@@ -24,6 +24,12 @@ sources = files(
 
 deps += ['hash', 'security']
 
+if arch_subdir == 'x86'
+sources += files('txgbe_rxtx_vec_sse.c')
+elif arch_subdir == 'arm'
+sources += files('txgbe_rxtx_vec_neon.c')
+endif
+
 includes += include_directories('base')
 
 install_headers('rte_pmd_txgbe.h')
diff --git a/drivers/net/txgbe/txgbe_ethdev.c b/drivers/net/txgbe/txgbe_ethdev.c
index b75e8898e2..4a23e840aa 100644
--- a/drivers/net/txgbe/txgbe_ethdev.c
+++ b/drivers/net/txgbe/txgbe_ethdev.c
@@ -1544,6 +1544,7 @@ txgbe_dev_configure(struct rte_eth_dev *dev)
 * allocation Rx preconditions we will reset it.
 */
adapter->rx_bulk_alloc_allowed = true;
+   adapter->rx_vec_allowed = true;
 
return 0;
 }
@@ -2730,6 +2731,10 @@ const uint32_t *
 txgbe_dev_supported_ptypes_get(struct rte_eth_dev *dev, size_t *no_of_elements)
 {
if (dev->rx_pkt_burst == txgbe_recv_pkts ||
+#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM)
+   dev->rx_pkt_burst == txgbe_recv_pkts_vec ||
+   dev->rx_pkt_burst == txgbe_recv_scattered_pkts_vec ||
+#endif
dev->rx_pkt_burst == txgbe_recv_pkts_lro_single_alloc ||
dev->rx_pkt_burst == txgbe_recv_pkts_lro_bulk_alloc ||
dev->rx_pkt_burst == txgbe_recv_pkts_bulk_alloc)
diff --git a/drivers/net/txgbe/txgbe_ethdev.h b/drivers/net/txgbe/txgbe_ethdev.h
index 7e8067ca05..609f4fb3f0 100644
--- a/drivers/net/txgbe/txgbe_ethdev.h
+++ b/drivers/net/txgbe/txgbe_ethdev.h
@@ -364,6 +364,7 @@ struct txgbe_adapter {
struct txgbe_ipsec  ipsec;
 #endif
bool rx_bulk_alloc_allowed;
+   bool rx_vec_allowed;
struct rte_timecounter  systime_tc;
struct rte_timecounter  rx_tstamp_tc;
struct rte_timecounter  tx_tstamp_tc;
diff --git a/drivers/net/txgbe/txgbe_ethdev_vf.c 
b/drivers/net/txgbe/txgbe_ethdev_vf.c
index f1341fbf7e..7d8327e7ad 100644
--- a/drivers/net/txgbe/txgbe_ethdev_vf.c
+++ b/drivers/net/txgbe/txgbe_ethdev_vf.c
@@ -603,6 +603,7 @@ txgbevf_dev_configure(struct rte_eth_dev *dev)
 * allocation or vector Rx preconditions we will reset it.
 */
adapter->rx_bulk_alloc_allowed = true;
+   adapter->rx_vec_allowed = true;
 
return 0;
 }
diff --git a/drivers/net/txgbe/txgbe_rxtx.c b/drivers/net/txgbe/txgbe_rxtx.c
index 1cd4b25965..3b85eae85e 100644
--- a/drivers/net/txgbe/txgbe_rxtx.c
+++ b/drivers/net/txgbe/txgbe_rxtx.c
@@ -36,6 +36,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "txgbe_logs.h"
 #include "base/txgbe.h"
@@ -314,6 +315,27 @@ txgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf 
**tx_pkts,
return nb_tx;
 }
 
+static uint16_t
+txgbe_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts)
+{
+   struct txgbe_tx_queue *txq = (struct txgbe_tx_queue *)tx_queue;
+   uint16_t nb_tx = 0;
+
+   while (nb_pkts) {
+   uint16_t ret, num;
+
+   num = (uint16_t)RTE_MIN(nb_pkts, txq->tx_free_thresh);
+   ret = txgbe_xmit_fixed_burst_vec(tx_queue, &tx_pkts[nb_tx], 
num);
+   nb_tx += ret;
+   nb_pkts -= ret;
+   if (ret < num)
+   break;
+   }
+
+   return nb_tx;

[PATCH v3 2/2] net/ngbe: add vectorized functions for Rx/Tx

2024-04-19 Thread Jiawen Wu
To optimize Rx/Tx burst process, add SSE/NEON vector instructions on
x86/arm architecture.

The performance test results on Phytium D2000(ARM64) show that,
throughput for 128-byte packets increased from 76.797% to 94.375%.

Signed-off-by: Jiawen Wu 
---
 doc/guides/rel_notes/release_24_07.rst  |   4 +
 drivers/net/ngbe/meson.build|   6 +
 drivers/net/ngbe/ngbe_ethdev.c  |   5 +
 drivers/net/ngbe/ngbe_ethdev.h  |   1 +
 drivers/net/ngbe/ngbe_rxtx.c| 162 +-
 drivers/net/ngbe/ngbe_rxtx.h|  32 +-
 drivers/net/ngbe/ngbe_rxtx_vec_common.h | 296 ++
 drivers/net/ngbe/ngbe_rxtx_vec_neon.c   | 602 +
 drivers/net/ngbe/ngbe_rxtx_vec_sse.c| 688 
 9 files changed, 1792 insertions(+), 4 deletions(-)
 create mode 100644 drivers/net/ngbe/ngbe_rxtx_vec_common.h
 create mode 100644 drivers/net/ngbe/ngbe_rxtx_vec_neon.c
 create mode 100644 drivers/net/ngbe/ngbe_rxtx_vec_sse.c

diff --git a/doc/guides/rel_notes/release_24_07.rst 
b/doc/guides/rel_notes/release_24_07.rst
index 6c125c5d4c..5175764ae4 100644
--- a/doc/guides/rel_notes/release_24_07.rst
+++ b/doc/guides/rel_notes/release_24_07.rst
@@ -59,6 +59,10 @@ New Features
 
   * Added SSE/NEON vector datapath.
 
+* **Updated Wangxun ngbe driver.**
+
+  * Added SSE/NEON vector datapath.
+
 
 Removed Items
 -
diff --git a/drivers/net/ngbe/meson.build b/drivers/net/ngbe/meson.build
index 8b5195aab3..402cea1c13 100644
--- a/drivers/net/ngbe/meson.build
+++ b/drivers/net/ngbe/meson.build
@@ -19,4 +19,10 @@ sources = files(
 
 deps += ['hash']
 
+if arch_subdir == 'x86'
+sources += files('ngbe_rxtx_vec_sse.c')
+elif arch_subdir == 'arm'
+sources += files('ngbe_rxtx_vec_neon.c')
+endif
+
 includes += include_directories('base')
diff --git a/drivers/net/ngbe/ngbe_ethdev.c b/drivers/net/ngbe/ngbe_ethdev.c
index 4cd07a0030..c2e186c3d6 100644
--- a/drivers/net/ngbe/ngbe_ethdev.c
+++ b/drivers/net/ngbe/ngbe_ethdev.c
@@ -932,6 +932,7 @@ ngbe_dev_configure(struct rte_eth_dev *dev)
 * allocation Rx preconditions we will reset it.
 */
adapter->rx_bulk_alloc_allowed = true;
+   adapter->rx_vec_allowed = true;
 
return 0;
 }
@@ -1867,6 +1868,10 @@ const uint32_t *
 ngbe_dev_supported_ptypes_get(struct rte_eth_dev *dev, size_t *no_of_elements)
 {
if (dev->rx_pkt_burst == ngbe_recv_pkts ||
+#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM)
+   dev->rx_pkt_burst == ngbe_recv_pkts_vec ||
+   dev->rx_pkt_burst == ngbe_recv_scattered_pkts_vec ||
+#endif
dev->rx_pkt_burst == ngbe_recv_pkts_sc_single_alloc ||
dev->rx_pkt_burst == ngbe_recv_pkts_sc_bulk_alloc ||
dev->rx_pkt_burst == ngbe_recv_pkts_bulk_alloc)
diff --git a/drivers/net/ngbe/ngbe_ethdev.h b/drivers/net/ngbe/ngbe_ethdev.h
index 70ed1920dd..c748bfbe4d 100644
--- a/drivers/net/ngbe/ngbe_ethdev.h
+++ b/drivers/net/ngbe/ngbe_ethdev.h
@@ -130,6 +130,7 @@ struct ngbe_adapter {
struct ngbe_vf_info*vfdata;
struct ngbe_uta_info   uta_info;
bool   rx_bulk_alloc_allowed;
+   bool   rx_vec_allowed;
struct rte_timecounter systime_tc;
struct rte_timecounter rx_tstamp_tc;
struct rte_timecounter tx_tstamp_tc;
diff --git a/drivers/net/ngbe/ngbe_rxtx.c b/drivers/net/ngbe/ngbe_rxtx.c
index 8a873b858e..e92f241e46 100644
--- a/drivers/net/ngbe/ngbe_rxtx.c
+++ b/drivers/net/ngbe/ngbe_rxtx.c
@@ -10,6 +10,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "ngbe_logs.h"
 #include "base/ngbe.h"
@@ -267,6 +268,27 @@ ngbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf 
**tx_pkts,
return nb_tx;
 }
 
+static uint16_t
+ngbe_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
+  uint16_t nb_pkts)
+{
+   struct ngbe_tx_queue *txq = (struct ngbe_tx_queue *)tx_queue;
+   uint16_t nb_tx = 0;
+
+   while (nb_pkts) {
+   uint16_t ret, num;
+
+   num = (uint16_t)RTE_MIN(nb_pkts, txq->tx_free_thresh);
+   ret = ngbe_xmit_fixed_burst_vec(tx_queue, &tx_pkts[nb_tx], num);
+   nb_tx += ret;
+   nb_pkts -= ret;
+   if (ret < num)
+   break;
+   }
+
+   return nb_tx;
+}
+
 static inline void
 ngbe_set_xmit_ctx(struct ngbe_tx_queue *txq,
volatile struct ngbe_tx_ctx_desc *ctx_txd,
@@ -1858,8 +1880,16 @@ ngbe_set_tx_function(struct rte_eth_dev *dev, struct 
ngbe_tx_queue *txq)
if (txq->offloads == 0 &&
txq->tx_free_thresh >= RTE_PMD_NGBE_TX_MAX_BURST) {
PMD_INIT_LOG(DEBUG, "Using simple tx code path");
-   dev->tx_pkt_burst = ngbe_xmit_pkts_simple;
dev->tx_pkt_prepare = NULL;
+   if (txq->tx_free_thresh <= RTE_NGBE_TX_MAX_FREE_BUF_SZ &&
+   rte_vect_get_max_simd_bitwi