Stop using directly __builtin_ bit operations,
prefer existing DPDK wrappers.

Note: this is a brute sed all over drivers (skipping base drivers)
for __builtin_* that have a direct replacement in EAL bitops.
There is more work to do, like adding some missing macros inspired from
kernel (FIELD_*) macros but this is left for later.

Signed-off-by: David Marchand <david.march...@redhat.com>
---
 drivers/common/nfp/nfp_platform.h       |  4 +++-
 drivers/dma/hisilicon/hisi_dmadev.h     |  3 ++-
 drivers/ml/cnxk/cn10k_ml_ocm.c          |  7 ++++---
 drivers/net/bnxt/bnxt_rxtx_vec_neon.c   |  4 ++--
 drivers/net/bnxt/tf_ulp/ulp_flow_db.c   |  6 ++++--
 drivers/net/bnxt/tf_ulp/ulp_gen_hash.c  |  4 +++-
 drivers/net/bonding/rte_eth_bond_pmd.c  |  3 ++-
 drivers/net/cpfl/cpfl_flow_engine_fxp.c |  5 ++++-
 drivers/net/enetfec/enet_ethdev.c       |  5 +++--
 drivers/net/enetfec/enet_ethdev.h       |  6 ------
 drivers/net/hns3/hns3_rxtx_vec_neon.h   |  4 +++-
 drivers/net/i40e/i40e_rxtx_vec_neon.c   |  4 +++-
 drivers/net/iavf/iavf_rxtx_vec_neon.c   |  4 +++-
 drivers/net/mlx5/hws/mlx5dr_definer.c   |  8 +++++---
 drivers/net/mlx5/mlx5_flow_dv.c         |  3 ++-
 drivers/net/mlx5/mlx5_rxtx_vec_neon.h   | 12 ++++++------
 drivers/net/mlx5/mlx5_tx.c              |  2 +-
 17 files changed, 50 insertions(+), 34 deletions(-)

diff --git a/drivers/common/nfp/nfp_platform.h 
b/drivers/common/nfp/nfp_platform.h
index 1687942e41..0b02fcf1e8 100644
--- a/drivers/common/nfp/nfp_platform.h
+++ b/drivers/common/nfp/nfp_platform.h
@@ -8,6 +8,8 @@
 
 #include <stdint.h>
 
+#include <rte_bitops.h>
+
 #define DIV_ROUND_UP(n, d)             (((n) + (d) - 1) / (d))
 
 #define DMA_BIT_MASK(n)    ((1ULL << (n)) - 1)
@@ -21,7 +23,7 @@
 #define GENMASK_ULL(h, l) \
        ((~0ULL << (l)) & (~0ULL >> (BITS_PER_LONG_LONG - (h) - 1)))
 
-#define __bf_shf(x) (__builtin_ffsll(x) - 1)
+#define __bf_shf(x) rte_bsf64(x)
 
 #define FIELD_GET(_mask, _reg) \
        (__extension__ ({ \
diff --git a/drivers/dma/hisilicon/hisi_dmadev.h 
b/drivers/dma/hisilicon/hisi_dmadev.h
index a57b5c759a..786fe3cc0e 100644
--- a/drivers/dma/hisilicon/hisi_dmadev.h
+++ b/drivers/dma/hisilicon/hisi_dmadev.h
@@ -5,6 +5,7 @@
 #ifndef HISI_DMADEV_H
 #define HISI_DMADEV_H
 
+#include <rte_bitops.h>
 #include <rte_byteorder.h>
 #include <rte_common.h>
 #include <rte_memzone.h>
@@ -14,7 +15,7 @@
 #define BITS_PER_LONG  (__SIZEOF_LONG__ * 8)
 #define GENMASK(h, l) \
                (((~0UL) << (l)) & (~0UL >> (BITS_PER_LONG - 1 - (h))))
-#define BF_SHF(x) (__builtin_ffsll(x) - 1)
+#define BF_SHF(x) rte_bsf64(x)
 #define FIELD_GET(mask, reg) \
                ((typeof(mask))(((reg) & (mask)) >> BF_SHF(mask)))
 
diff --git a/drivers/ml/cnxk/cn10k_ml_ocm.c b/drivers/ml/cnxk/cn10k_ml_ocm.c
index 749ddeb344..0032fe82da 100644
--- a/drivers/ml/cnxk/cn10k_ml_ocm.c
+++ b/drivers/ml/cnxk/cn10k_ml_ocm.c
@@ -2,6 +2,7 @@
  * Copyright (c) 2022 Marvell.
  */
 
+#include <rte_bitops.h>
 #include <rte_mldev_pmd.h>
 
 #include <roc_api.h>
@@ -203,11 +204,11 @@ cn10k_ml_ocm_tilecount(uint64_t tilemask, int *start, int 
*end)
 
        PLT_ASSERT(tilemask != 0);
 
-       *start = __builtin_ctzl(tilemask);
-       *end = 64 - __builtin_clzl(tilemask) - 1;
+       *start = rte_ctz64(tilemask);
+       *end = 64 - rte_clz64(tilemask) - 1;
        count = *end - *start + 1;
 
-       PLT_ASSERT(count == __builtin_popcountl(tilemask));
+       PLT_ASSERT(count == rte_popcount64(tilemask));
        return count;
 }
 
diff --git a/drivers/net/bnxt/bnxt_rxtx_vec_neon.c 
b/drivers/net/bnxt/bnxt_rxtx_vec_neon.c
index 355d41bbd3..840b21cef9 100644
--- a/drivers/net/bnxt/bnxt_rxtx_vec_neon.c
+++ b/drivers/net/bnxt/bnxt_rxtx_vec_neon.c
@@ -4,7 +4,7 @@
 #include <inttypes.h>
 #include <stdbool.h>
 
-#include <rte_bitmap.h>
+#include <rte_bitops.h>
 #include <rte_byteorder.h>
 #include <rte_malloc.h>
 #include <rte_memory.h>
@@ -290,7 +290,7 @@ recv_burst_vec_neon(void *rx_queue, struct rte_mbuf 
**rx_pkts, uint16_t nb_pkts)
                if (valid == 0)
                        num_valid = 4;
                else
-                       num_valid = __builtin_ctzl(valid) / 16;
+                       num_valid = rte_ctz64(valid) / 16;
 
                if (num_valid == 0)
                        break;
diff --git a/drivers/net/bnxt/tf_ulp/ulp_flow_db.c 
b/drivers/net/bnxt/tf_ulp/ulp_flow_db.c
index 2e6ea43ac1..aac974a970 100644
--- a/drivers/net/bnxt/tf_ulp/ulp_flow_db.c
+++ b/drivers/net/bnxt/tf_ulp/ulp_flow_db.c
@@ -3,7 +3,9 @@
  * All rights reserved.
  */
 
+#include <rte_bitops.h>
 #include <rte_malloc.h>
+
 #include "bnxt.h"
 #include "bnxt_tf_common.h"
 #include "ulp_utils.h"
@@ -938,7 +940,7 @@ ulp_flow_db_next_entry_get(struct bnxt_ulp_flow_db *flow_db,
                 */
                if (s_idx == idx)
                        bs &= (-1UL >> mod_fid);
-               lfid = (idx * ULP_INDEX_BITMAP_SIZE) + __builtin_clzl(bs);
+               lfid = (idx * ULP_INDEX_BITMAP_SIZE) + rte_clz64(bs);
                if (*fid >= lfid) {
                        BNXT_TF_DBG(ERR, "Flow Database is corrupt\n");
                        return -ENOENT;
@@ -1480,7 +1482,7 @@ ulp_flow_db_parent_child_flow_next_entry_get(struct 
bnxt_ulp_flow_db *flow_db,
                 */
                if (s_idx == idx)
                        bs &= (-1UL >> mod_fid);
-               next_fid = (idx * ULP_INDEX_BITMAP_SIZE) + __builtin_clzl(bs);
+               next_fid = (idx * ULP_INDEX_BITMAP_SIZE) + rte_clz64(bs);
                if (*child_fid >= next_fid) {
                        BNXT_TF_DBG(ERR, "Parent Child Database is corrupt\n");
                        return -ENOENT;
diff --git a/drivers/net/bnxt/tf_ulp/ulp_gen_hash.c 
b/drivers/net/bnxt/tf_ulp/ulp_gen_hash.c
index d746fbbd4e..9f27b56334 100644
--- a/drivers/net/bnxt/tf_ulp/ulp_gen_hash.c
+++ b/drivers/net/bnxt/tf_ulp/ulp_gen_hash.c
@@ -3,8 +3,10 @@
  * All rights reserved.
  */
 
+#include <rte_bitops.h>
 #include <rte_log.h>
 #include <rte_malloc.h>
+
 #include "bnxt_tf_common.h"
 #include "ulp_gen_hash.h"
 #include "ulp_utils.h"
@@ -25,7 +27,7 @@ int32_t ulp_bit_alloc_list_alloc(struct bit_alloc_list *blist,
 
        if (idx <= bsize_64) {
                if (bentry)
-                       jdx = __builtin_clzl(~bentry);
+                       jdx = rte_clz64(~bentry);
                *index = ((idx - 1) * ULP_INDEX_BITMAP_SIZE) + jdx;
                ULP_INDEX_BITMAP_SET(blist->bdata[(idx - 1)], jdx);
                return 0;
diff --git a/drivers/net/bonding/rte_eth_bond_pmd.c 
b/drivers/net/bonding/rte_eth_bond_pmd.c
index cda1c37124..91bf2c2345 100644
--- a/drivers/net/bonding/rte_eth_bond_pmd.c
+++ b/drivers/net/bonding/rte_eth_bond_pmd.c
@@ -5,6 +5,7 @@
 #include <stdbool.h>
 #include <netinet/in.h>
 
+#include <rte_bitops.h>
 #include <rte_mbuf.h>
 #include <rte_malloc.h>
 #include <ethdev_driver.h>
@@ -3982,7 +3983,7 @@ bond_ethdev_configure(struct rte_eth_dev *dev)
                 * Two '1' in binary of 'link_speeds': bit0 and a unique
                 * speed bit.
                 */
-               if (__builtin_popcountl(link_speeds) != 2) {
+               if (rte_popcount64(link_speeds) != 2) {
                        RTE_BOND_LOG(ERR, "please set a unique speed.");
                        return -EINVAL;
                }
diff --git a/drivers/net/cpfl/cpfl_flow_engine_fxp.c 
b/drivers/net/cpfl/cpfl_flow_engine_fxp.c
index 2c75ea6577..0101c30911 100644
--- a/drivers/net/cpfl/cpfl_flow_engine_fxp.c
+++ b/drivers/net/cpfl/cpfl_flow_engine_fxp.c
@@ -10,6 +10,8 @@
 #include <unistd.h>
 #include <stdarg.h>
 #include <math.h>
+
+#include <rte_bitops.h>
 #include <rte_debug.h>
 #include <rte_ether.h>
 #include <rte_log.h>
@@ -20,6 +22,7 @@
 #include <rte_flow.h>
 #include <rte_bitmap.h>
 #include <ethdev_driver.h>
+
 #include "cpfl_rules.h"
 #include "cpfl_logs.h"
 #include "cpfl_ethdev.h"
@@ -608,7 +611,7 @@ cpfl_fxp_mod_idx_alloc(struct cpfl_adapter_ext *ad)
        if (!rte_bitmap_scan(ad->mod_bm, &pos, &slab))
                return CPFL_MAX_MOD_CONTENT_INDEX;
 
-       pos += __builtin_ffsll(slab) - 1;
+       pos += rte_bsf64(slab);
        rte_bitmap_clear(ad->mod_bm, pos);
 
        return pos;
diff --git a/drivers/net/enetfec/enet_ethdev.c 
b/drivers/net/enetfec/enet_ethdev.c
index 8c7067fbb5..4151d7fca3 100644
--- a/drivers/net/enetfec/enet_ethdev.c
+++ b/drivers/net/enetfec/enet_ethdev.c
@@ -6,6 +6,7 @@
 
 #include <ethdev_vdev.h>
 #include <ethdev_driver.h>
+#include <rte_bitops.h>
 #include <rte_io.h>
 
 #include "enet_pmd_logs.h"
@@ -374,7 +375,7 @@ enetfec_tx_queue_setup(struct rte_eth_dev *dev,
        unsigned int size;
        unsigned int dsize = fep->bufdesc_ex ? sizeof(struct bufdesc_ex) :
                sizeof(struct bufdesc);
-       unsigned int dsize_log2 = fls64(dsize);
+       unsigned int dsize_log2 = rte_fls_u64(dsize);
 
        /* Tx deferred start is not supported */
        if (tx_conf->tx_deferred_start) {
@@ -453,7 +454,7 @@ enetfec_rx_queue_setup(struct rte_eth_dev *dev,
        unsigned int size;
        unsigned int dsize = fep->bufdesc_ex ? sizeof(struct bufdesc_ex) :
                        sizeof(struct bufdesc);
-       unsigned int dsize_log2 = fls64(dsize);
+       unsigned int dsize_log2 = rte_fls_u64(dsize);
 
        /* Rx deferred start is not supported */
        if (rx_conf->rx_deferred_start) {
diff --git a/drivers/net/enetfec/enet_ethdev.h 
b/drivers/net/enetfec/enet_ethdev.h
index 02a3397890..4e196b8552 100644
--- a/drivers/net/enetfec/enet_ethdev.h
+++ b/drivers/net/enetfec/enet_ethdev.h
@@ -125,12 +125,6 @@ bufdesc *enet_get_nextdesc(struct bufdesc *bdp, struct 
bufdesc_prop *bd)
                : (struct bufdesc *)(((uintptr_t)bdp) + bd->d_size);
 }
 
-static inline int
-fls64(unsigned long word)
-{
-       return (64 - __builtin_clzl(word)) - 1;
-}
-
 static inline struct
 bufdesc *enet_get_prevdesc(struct bufdesc *bdp, struct bufdesc_prop *bd)
 {
diff --git a/drivers/net/hns3/hns3_rxtx_vec_neon.h 
b/drivers/net/hns3/hns3_rxtx_vec_neon.h
index 0dc6b9f0a2..bbb5478015 100644
--- a/drivers/net/hns3/hns3_rxtx_vec_neon.h
+++ b/drivers/net/hns3/hns3_rxtx_vec_neon.h
@@ -5,6 +5,8 @@
 #ifndef HNS3_RXTX_VEC_NEON_H
 #define HNS3_RXTX_VEC_NEON_H
 
+#include <rte_bitops.h>
+
 #include <arm_neon.h>
 
 #pragma GCC diagnostic ignored "-Wcast-qual"
@@ -189,7 +191,7 @@ hns3_recv_burst_vec(struct hns3_rx_queue *__restrict rxq,
                if (likely(stat == 0))
                        bd_valid_num = HNS3_DEFAULT_DESCS_PER_LOOP;
                else
-                       bd_valid_num = __builtin_ctzl(stat) / HNS3_UINT16_BIT;
+                       bd_valid_num = rte_ctz64(stat) / HNS3_UINT16_BIT;
                if (bd_valid_num == 0)
                        break;
 
diff --git a/drivers/net/i40e/i40e_rxtx_vec_neon.c 
b/drivers/net/i40e/i40e_rxtx_vec_neon.c
index 3a99137b5e..e1c5c7041b 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_neon.c
+++ b/drivers/net/i40e/i40e_rxtx_vec_neon.c
@@ -4,7 +4,9 @@
  */
 
 #include <stdint.h>
+
 #include <ethdev_driver.h>
+#include <rte_bitops.h>
 #include <rte_malloc.h>
 #include <rte_vect.h>
 
@@ -558,7 +560,7 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *__rte_restrict rxq,
                if (unlikely(stat == 0)) {
                        nb_pkts_recd += RTE_I40E_DESCS_PER_LOOP;
                } else {
-                       nb_pkts_recd += __builtin_ctzl(stat) / I40E_UINT16_BIT;
+                       nb_pkts_recd += rte_ctz64(stat) / I40E_UINT16_BIT;
                        break;
                }
        }
diff --git a/drivers/net/iavf/iavf_rxtx_vec_neon.c 
b/drivers/net/iavf/iavf_rxtx_vec_neon.c
index 20b656e899..04be574683 100644
--- a/drivers/net/iavf/iavf_rxtx_vec_neon.c
+++ b/drivers/net/iavf/iavf_rxtx_vec_neon.c
@@ -4,7 +4,9 @@
  */
 
 #include <stdint.h>
+
 #include <ethdev_driver.h>
+#include <rte_bitops.h>
 #include <rte_malloc.h>
 #include <rte_vect.h>
 
@@ -366,7 +368,7 @@ _recv_raw_pkts_vec(struct iavf_rx_queue *__rte_restrict rxq,
                if (unlikely(stat == 0)) {
                        nb_pkts_recd += IAVF_VPMD_DESCS_PER_LOOP;
                } else {
-                       nb_pkts_recd += __builtin_ctzl(stat) / IAVF_UINT16_BIT;
+                       nb_pkts_recd += rte_ctz64(stat) / IAVF_UINT16_BIT;
                        break;
                }
        }
diff --git a/drivers/net/mlx5/hws/mlx5dr_definer.c 
b/drivers/net/mlx5/hws/mlx5dr_definer.c
index a9fa5d06ed..5c2e889444 100644
--- a/drivers/net/mlx5/hws/mlx5dr_definer.c
+++ b/drivers/net/mlx5/hws/mlx5dr_definer.c
@@ -2,6 +2,8 @@
  * Copyright (c) 2022 NVIDIA Corporation & Affiliates
  */
 
+#include <rte_bitops.h>
+
 #include "mlx5dr_internal.h"
 
 #define GTP_PDU_SC     0x85
@@ -1548,7 +1550,7 @@ mlx5dr_definer_conv_item_port(struct 
mlx5dr_definer_conv_data *cd,
                fc->tag_set = &mlx5dr_definer_vport_set;
                fc->tag_mask_set = &mlx5dr_definer_ones_set;
                DR_CALC_SET_HDR(fc, registers, register_c_0);
-               fc->bit_off = __builtin_ctz(caps->wire_regc_mask);
+               fc->bit_off = rte_ctz32(caps->wire_regc_mask);
                fc->bit_mask = caps->wire_regc_mask >> fc->bit_off;
                fc->dr_ctx = cd->ctx;
        } else {
@@ -2666,8 +2668,8 @@ mlx5dr_definer_conv_item_geneve_opt(struct 
mlx5dr_definer_conv_data *cd,
                fc->item_idx = item_idx;
                fc->tag_set = &mlx5dr_definer_ones_set;
                fc->byte_off = hl_ok_bit->dw_offset * DW_SIZE +
-                               __builtin_clz(hl_ok_bit->dw_mask) / 8;
-               fc->bit_off = __builtin_ctz(hl_ok_bit->dw_mask);
+                               rte_clz32(hl_ok_bit->dw_mask) / 8;
+               fc->bit_off = rte_ctz32(hl_ok_bit->dw_mask);
                fc->bit_mask = 0x1;
        }
 
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index 201e215e4b..040727f2e8 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -8,6 +8,7 @@
 #include <string.h>
 #include <unistd.h>
 
+#include <rte_bitops.h>
 #include <rte_common.h>
 #include <rte_ether.h>
 #include <ethdev_driver.h>
@@ -9068,7 +9069,7 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct 
rte_flow_attr *attr,
                    !(non_shared_age && count) &&
                    (attr->group || (attr->transfer && priv->fdb_def_rule)) &&
                    priv->sh->flow_hit_aso_en);
-       if (__builtin_popcountl(aso_mask) > 1)
+       if (rte_popcount64(aso_mask) > 1)
                return rte_flow_error_set(error, ENOTSUP, 
RTE_FLOW_ERROR_TYPE_ACTION,
                                          NULL, "unsupported combining AGE, 
METER, CT ASO actions in a single rule");
        /*
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h 
b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
index 510f60b25d..0ce9827ed9 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
+++ b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
@@ -11,6 +11,7 @@
 #include <stdlib.h>
 #include <arm_neon.h>
 
+#include <rte_bitops.h>
 #include <rte_mbuf.h>
 #include <rte_mempool.h>
 #include <rte_prefetch.h>
@@ -620,7 +621,7 @@ rxq_cq_process_v(struct mlx5_rxq_data *rxq, volatile struct 
mlx5_cqe *cq,
 
        /*
         * Note that vectors have reverse order - {v3, v2, v1, v0}, because
-        * there's no instruction to count trailing zeros. __builtin_clzl() is
+        * there's no instruction to count trailing zeros. rte_clz64() is
         * used instead.
         *
         * A. copy 4 mbuf pointers from elts ring to returning pkts.
@@ -808,13 +809,12 @@ rxq_cq_process_v(struct mlx5_rxq_data *rxq, volatile 
struct mlx5_cqe *cq,
                /* E.2 mask out invalid entries. */
                comp_mask = vbic_u16(comp_mask, invalid_mask);
                /* E.3 get the first compressed CQE. */
-               comp_idx = __builtin_clzl(vget_lane_u64(vreinterpret_u64_u16(
-                                         comp_mask), 0)) /
-                                         (sizeof(uint16_t) * 8);
+               comp_idx = 
rte_clz64(vget_lane_u64(vreinterpret_u64_u16(comp_mask), 0)) /
+                       (sizeof(uint16_t) * 8);
                invalid_mask = vorr_u16(invalid_mask, comp_mask);
                /* D.7 count non-compressed valid CQEs. */
-               n = __builtin_clzl(vget_lane_u64(vreinterpret_u64_u16(
-                                  invalid_mask), 0)) / (sizeof(uint16_t) * 8);
+               n = rte_clz64(vget_lane_u64(vreinterpret_u64_u16(invalid_mask), 
0)) /
+                       (sizeof(uint16_t) * 8);
                nocmp_n += n;
                /*
                 * D.2 mask out entries after the compressed CQE.
diff --git a/drivers/net/mlx5/mlx5_tx.c b/drivers/net/mlx5/mlx5_tx.c
index 04f80bb9bd..fc105970a3 100644
--- a/drivers/net/mlx5/mlx5_tx.c
+++ b/drivers/net/mlx5/mlx5_tx.c
@@ -619,7 +619,7 @@ mlx5_select_tx_function(struct rte_eth_dev *dev)
                 * Check whether it has minimal amount
                 * of not requested offloads.
                 */
-               tmp = __builtin_popcountl(tmp & ~olx);
+               tmp = rte_popcount64(tmp & ~olx);
                if (m >= RTE_DIM(txoff_func) || tmp < diff) {
                        /* First or better match, save and continue. */
                        m = i;
-- 
2.46.2

Reply via email to