On Thu, Oct 22, 2020 at 11:51 AM Lance Richardson <lance.richard...@broadcom.com> wrote: > > Make SIMD initialization code less verbose by using appropriate > intrinsics when all lanes of a vector are initialized to the > same value. > > Signed-off-by: Lance Richardson <lance.richard...@broadcom.com> > Reviewed-by: Ajit Khaparde <ajit.khapa...@broadcom.com> Patch applied to dpdk-next-net-brcm.
> --- > drivers/net/bnxt/bnxt_rxtx_vec_neon.c | 58 +++++++-------------------- > drivers/net/bnxt/bnxt_rxtx_vec_sse.c | 37 +++++------------ > 2 files changed, 23 insertions(+), 72 deletions(-) > > diff --git a/drivers/net/bnxt/bnxt_rxtx_vec_neon.c > b/drivers/net/bnxt/bnxt_rxtx_vec_neon.c > index f49e29ccb..de1d96570 100644 > --- a/drivers/net/bnxt/bnxt_rxtx_vec_neon.c > +++ b/drivers/net/bnxt/bnxt_rxtx_vec_neon.c > @@ -67,40 +67,17 @@ descs_to_mbufs(uint32x4_t mm_rxcmp[4], uint32x4_t > mm_rxcmp1[4], > 0xFF, 0xFF, /* vlan_tci (zeroes) */ > 12, 13, 14, 15 /* rss hash */ > }; > - const uint32x4_t flags_type_mask = { > - RX_PKT_CMPL_FLAGS_ITYPE_MASK, > - RX_PKT_CMPL_FLAGS_ITYPE_MASK, > - RX_PKT_CMPL_FLAGS_ITYPE_MASK, > - RX_PKT_CMPL_FLAGS_ITYPE_MASK > - }; > - const uint32x4_t flags2_mask1 = { > - RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN | > - RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC, > - RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN | > - RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC, > - RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN | > - RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC, > - RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN | > - RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC > - }; > - const uint32x4_t flags2_mask2 = { > - RX_PKT_CMPL_FLAGS2_IP_TYPE, > - RX_PKT_CMPL_FLAGS2_IP_TYPE, > - RX_PKT_CMPL_FLAGS2_IP_TYPE, > - RX_PKT_CMPL_FLAGS2_IP_TYPE > - }; > - const uint32x4_t rss_mask = { > - RX_PKT_CMPL_FLAGS_RSS_VALID, > - RX_PKT_CMPL_FLAGS_RSS_VALID, > - RX_PKT_CMPL_FLAGS_RSS_VALID, > - RX_PKT_CMPL_FLAGS_RSS_VALID > - }; > - const uint32x4_t flags2_index_mask = { > - 0x1F, 0x1F, 0x1F, 0x1F > - }; > - const uint32x4_t flags2_error_mask = { > - 0xF, 0xF, 0xF, 0xF > - }; > + const uint32x4_t flags_type_mask = > + vdupq_n_u32(RX_PKT_CMPL_FLAGS_ITYPE_MASK); > + const uint32x4_t flags2_mask1 = > + vdupq_n_u32(RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN | > + RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC); > + const uint32x4_t flags2_mask2 = > + vdupq_n_u32(RX_PKT_CMPL_FLAGS2_IP_TYPE); > + const uint32x4_t rss_mask = > + vdupq_n_u32(RX_PKT_CMPL_FLAGS_RSS_VALID); > + const uint32x4_t flags2_index_mask = vdupq_n_u32(0x1F); > + const uint32x4_t flags2_error_mask = vdupq_n_u32(0x0F); > uint32x4_t flags_type, flags2, index, errors, rss_flags; > uint32x4_t tmp, ptype_idx; > uint64x2_t t0, t1; > @@ -180,20 +157,13 @@ bnxt_recv_pkts_vec(void *rx_queue, struct rte_mbuf > **rx_pkts, > uint16_t rx_ring_size = rxr->rx_ring_struct->ring_size; > struct cmpl_base *cp_desc_ring = cpr->cp_desc_ring; > uint64_t valid, desc_valid_mask = ~0UL; > - const uint32x4_t info3_v_mask = { > - CMPL_BASE_V, CMPL_BASE_V, > - CMPL_BASE_V, CMPL_BASE_V > - }; > + const uint32x4_t info3_v_mask = vdupq_n_u32(CMPL_BASE_V); > uint32_t raw_cons = cpr->cp_raw_cons; > uint32_t cons, mbcons; > int nb_rx_pkts = 0; > const uint64x2_t mb_init = {rxq->mbuf_initializer, 0}; > - const uint32x4_t valid_target = { > - !!(raw_cons & cp_ring_size), > - !!(raw_cons & cp_ring_size), > - !!(raw_cons & cp_ring_size), > - !!(raw_cons & cp_ring_size) > - }; > + const uint32x4_t valid_target = > + vdupq_n_u32(!!(raw_cons & cp_ring_size)); > int i; > > /* If Rx Q was stopped return */ > diff --git a/drivers/net/bnxt/bnxt_rxtx_vec_sse.c > b/drivers/net/bnxt/bnxt_rxtx_vec_sse.c > index e4ba63551..e12bf8bb7 100644 > --- a/drivers/net/bnxt/bnxt_rxtx_vec_sse.c > +++ b/drivers/net/bnxt/bnxt_rxtx_vec_sse.c > @@ -63,29 +63,14 @@ descs_to_mbufs(__m128i mm_rxcmp[4], __m128i mm_rxcmp1[4], > 0xFF, 0xFF, 3, 2, /* pkt_len */ > 0xFF, 0xFF, 0xFF, 0xFF); /* pkt_type (zeroes) */ > const __m128i flags_type_mask = > - _mm_set_epi32(RX_PKT_CMPL_FLAGS_ITYPE_MASK, > - RX_PKT_CMPL_FLAGS_ITYPE_MASK, > - RX_PKT_CMPL_FLAGS_ITYPE_MASK, > - RX_PKT_CMPL_FLAGS_ITYPE_MASK); > + _mm_set1_epi32(RX_PKT_CMPL_FLAGS_ITYPE_MASK); > const __m128i flags2_mask1 = > - _mm_set_epi32(RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN | > - RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC, > - RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN | > - RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC, > - RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN | > - RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC, > - RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN | > - RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC); > + _mm_set1_epi32(RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN | > + RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC); > const __m128i flags2_mask2 = > - _mm_set_epi32(RX_PKT_CMPL_FLAGS2_IP_TYPE, > - RX_PKT_CMPL_FLAGS2_IP_TYPE, > - RX_PKT_CMPL_FLAGS2_IP_TYPE, > - RX_PKT_CMPL_FLAGS2_IP_TYPE); > + _mm_set1_epi32(RX_PKT_CMPL_FLAGS2_IP_TYPE); > const __m128i rss_mask = > - _mm_set_epi32(RX_PKT_CMPL_FLAGS_RSS_VALID, > - RX_PKT_CMPL_FLAGS_RSS_VALID, > - RX_PKT_CMPL_FLAGS_RSS_VALID, > - RX_PKT_CMPL_FLAGS_RSS_VALID); > + _mm_set1_epi32(RX_PKT_CMPL_FLAGS_RSS_VALID); > __m128i t0, t1, flags_type, flags2, index, errors, rss_flags; > __m128i ptype_idx; > uint32_t ol_flags; > @@ -114,10 +99,10 @@ descs_to_mbufs(__m128i mm_rxcmp[4], __m128i mm_rxcmp1[4], > t1 = _mm_unpackhi_epi32(mm_rxcmp1[2], mm_rxcmp1[3]); > > /* Compute ol_flags and checksum error indexes for four packets. */ > - flags2 = _mm_and_si128(flags2, _mm_set_epi32(0x1F, 0x1F, 0x1F, 0x1F)); > + flags2 = _mm_and_si128(flags2, _mm_set1_epi32(0x1F)); > > errors = _mm_srli_epi32(_mm_unpacklo_epi64(t0, t1), 4); > - errors = _mm_and_si128(errors, _mm_set_epi32(0xF, 0xF, 0xF, 0xF)); > + errors = _mm_and_si128(errors, _mm_set1_epi32(0xF)); > errors = _mm_and_si128(errors, flags2); > > index = _mm_andnot_si128(errors, flags2); > @@ -165,16 +150,12 @@ bnxt_recv_pkts_vec(void *rx_queue, struct rte_mbuf > **rx_pkts, > uint16_t rx_ring_size = rxr->rx_ring_struct->ring_size; > struct cmpl_base *cp_desc_ring = cpr->cp_desc_ring; > uint64_t valid, desc_valid_mask = ~0ULL; > - const __m128i info3_v_mask = _mm_set_epi32(CMPL_BASE_V, CMPL_BASE_V, > - CMPL_BASE_V, CMPL_BASE_V); > + const __m128i info3_v_mask = _mm_set1_epi32(CMPL_BASE_V); > uint32_t raw_cons = cpr->cp_raw_cons; > uint32_t cons, mbcons; > int nb_rx_pkts = 0; > const __m128i valid_target = > - _mm_set_epi32(!!(raw_cons & cp_ring_size), > - !!(raw_cons & cp_ring_size), > - !!(raw_cons & cp_ring_size), > - !!(raw_cons & cp_ring_size)); > + _mm_set1_epi32(!!(raw_cons & cp_ring_size)); > int i; > > /* If Rx Q was stopped return */ > -- > 2.25.1 >