> -----Original Message----- > From: Lance Richardson <lance.richard...@broadcom.com> > Sent: Friday, July 9, 2021 3:15 AM > To: Ajit Khaparde (ajit.khapa...@broadcom.com) > <ajit.khapa...@broadcom.com>; Somnath Kotur > <somnath.ko...@broadcom.com>; Bruce Richardson > <bruce.richard...@intel.com>; Konstantin Ananyev > <konstantin.anan...@intel.com>; jer...@marvell.com; Ruifeng Wang > <ruifeng.w...@arm.com>; Stephen Hurd <stephen.h...@broadcom.com>; > David Christensen <david.christen...@broadcom.com> > Cc: dev@dpdk.org; sta...@dpdk.org > Subject: [PATCH] net/bnxt: fix missing barriers in completion handling > > Ensure that Rx/Tx/Async completion entry fields are accessed > only after the completion's valid flag has been loaded and > verified. This is needed for correct operation on systems that > use relaxed memory consistency models. > > Fixes: 2eb53b134aae ("net/bnxt: add initial Rx code") > Fixes: 6eb3cc2294fd ("net/bnxt: add initial Tx code") > Cc: sta...@dpdk.org > Signed-off-by: Lance Richardson <lance.richard...@broadcom.com> > Reviewed-by: Ajit Khaparde <ajit.khapa...@broadcom.com> > --- > drivers/net/bnxt/bnxt_cpr.h | 36 ++++++++++++++++++++++++--- > drivers/net/bnxt/bnxt_ethdev.c | 16 ++++++------ > drivers/net/bnxt/bnxt_irq.c | 7 +++--- > drivers/net/bnxt/bnxt_rxr.c | 9 ++++--- > drivers/net/bnxt/bnxt_rxtx_vec_avx2.c | 2 +- > drivers/net/bnxt/bnxt_rxtx_vec_neon.c | 2 +- > drivers/net/bnxt/bnxt_rxtx_vec_sse.c | 2 +- > drivers/net/bnxt/bnxt_txr.c | 2 +- > 8 files changed, 54 insertions(+), 22 deletions(-) > > diff --git a/drivers/net/bnxt/bnxt_cpr.h b/drivers/net/bnxt/bnxt_cpr.h > index 2a56ec52c..3ee6b74bc 100644 > --- a/drivers/net/bnxt/bnxt_cpr.h > +++ b/drivers/net/bnxt/bnxt_cpr.h > @@ -8,13 +8,10 @@ > #include <stdbool.h> > > #include <rte_io.h> > +#include "hsi_struct_def_dpdk.h" > > struct bnxt_db_info; > > -#define CMP_VALID(cmp, raw_cons, ring) > \ > - (!!(rte_le_to_cpu_32(((struct cmpl_base *)(cmp))->info3_v) & > \ > - CMPL_BASE_V) == !((raw_cons) & ((ring)->ring_size))) > - > #define CMP_TYPE(cmp) \ > (((struct cmpl_base *)cmp)->type & CMPL_BASE_TYPE_MASK) > > @@ -121,4 +118,35 @@ bool bnxt_is_recovery_enabled(struct bnxt *bp); > bool bnxt_is_master_func(struct bnxt *bp); > > void bnxt_stop_rxtx(struct bnxt *bp); > + > +/** > + * Check validity of a completion ring entry. If the entry is valid, include > a > + * C11 __ATOMIC_ACQUIRE fence to ensure that subsequent loads of fields > in the > + * completion are not hoisted by the compiler or by the CPU to come before > the > + * loading of the "valid" field. > + * > + * Note: the caller must not access any fields in the specified completion > + * entry prior to calling this function. > + * > + * @param cmp Nit, cmpl
> + * Pointer to an entry in the completion ring. > + * @param raw_cons > + * Raw consumer index of entry in completion ring. > + * @param ring_size > + * Size of completion ring. > + */ > +static __rte_always_inline bool > +bnxt_cpr_cmp_valid(const void *cmpl, uint32_t raw_cons, uint32_t > ring_size) > +{ > + const struct cmpl_base *c = cmpl; > + bool expected, valid; > + > + expected = !(raw_cons & ring_size); > + valid = !!(rte_le_to_cpu_32(c->info3_v) & CMPL_BASE_V); > + if (valid == expected) { > + rte_atomic_thread_fence(__ATOMIC_ACQUIRE); > + return true; > + } > + return false; > +} > #endif > diff --git a/drivers/net/bnxt/bnxt_ethdev.c > b/drivers/net/bnxt/bnxt_ethdev.c > index ed09f1bf5..ee6929692 100644 > --- a/drivers/net/bnxt/bnxt_ethdev.c > +++ b/drivers/net/bnxt/bnxt_ethdev.c <snip> > > /* Check to see if hw has posted a completion for the descriptor. */ > @@ -3327,7 +3327,7 @@ bnxt_tx_descriptor_status_op(void *tx_queue, > uint16_t offset) > cons = RING_CMPL(ring_mask, raw_cons); > txcmp = (struct tx_cmpl *)&cp_desc_ring[cons]; > > - if (!CMP_VALID(txcmp, raw_cons, cp_ring_struct)) > + if (!bnxt_cpr_cmp_valid(txcmp, raw_cons, ring_mask + 1)) cpr->cp_ring_struct->ring_size can be used instead of 'ring_mask + 1'? > break; > > if (CMP_TYPE(txcmp) == TX_CMPL_TYPE_TX_L2) <snip> > diff --git a/drivers/net/bnxt/bnxt_rxtx_vec_neon.c > b/drivers/net/bnxt/bnxt_rxtx_vec_neon.c > index 263e6ec3c..13211060c 100644 > --- a/drivers/net/bnxt/bnxt_rxtx_vec_neon.c > +++ b/drivers/net/bnxt/bnxt_rxtx_vec_neon.c > @@ -339,7 +339,7 @@ bnxt_handle_tx_cp_vec(struct bnxt_tx_queue *txq) > cons = RING_CMPL(ring_mask, raw_cons); > txcmp = (struct tx_cmpl *)&cp_desc_ring[cons]; > > - if (!CMP_VALID(txcmp, raw_cons, cp_ring_struct)) > + if (!bnxt_cpr_cmp_valid(txcmp, raw_cons, ring_mask + 1)) Same here. I think cpr->cp_ring_struct->ring_size can be used and it avoids calculation. Also some places in other vector files. > break; > > if (likely(CMP_TYPE(txcmp) == TX_CMPL_TYPE_TX_L2)) > diff --git a/drivers/net/bnxt/bnxt_rxtx_vec_sse.c > b/drivers/net/bnxt/bnxt_rxtx_vec_sse.c > index 9a53d1fba..6e5630532 100644 > --- a/drivers/net/bnxt/bnxt_rxtx_vec_sse.c > +++ b/drivers/net/bnxt/bnxt_rxtx_vec_sse.c > @@ -320,7 +320,7 @@ bnxt_handle_tx_cp_vec(struct bnxt_tx_queue *txq) > cons = RING_CMPL(ring_mask, raw_cons); > txcmp = (struct tx_cmpl *)&cp_desc_ring[cons]; > > - if (!CMP_VALID(txcmp, raw_cons, cp_ring_struct)) > + if (!bnxt_cpr_cmp_valid(txcmp, raw_cons, ring_mask + 1)) > break; > > if (likely(CMP_TYPE(txcmp) == TX_CMPL_TYPE_TX_L2)) > diff --git a/drivers/net/bnxt/bnxt_txr.c b/drivers/net/bnxt/bnxt_txr.c > index 9a6b96e04..47824334a 100644 > --- a/drivers/net/bnxt/bnxt_txr.c > +++ b/drivers/net/bnxt/bnxt_txr.c > @@ -461,7 +461,7 @@ static int bnxt_handle_tx_cp(struct bnxt_tx_queue > *txq) > cons = RING_CMPL(ring_mask, raw_cons); > txcmp = (struct tx_cmpl *)&cpr->cp_desc_ring[cons]; > > - if (!CMP_VALID(txcmp, raw_cons, cp_ring_struct)) > + if (!bnxt_cpr_cmp_valid(txcmp, raw_cons, ring_mask + 1)) > break; > > opaque = rte_le_to_cpu_32(txcmp->opaque); > -- > 2.25.1