On Fri, Nov 18, 2016 at 04:52:13AM -0800, Satha Rao wrote: > i40e_asq_send_command: rd32 & wr32 under ThunderX gives unpredictable > results. To solve this include rte memory barriers > > Signed-off-by: Satha Rao <skoteshwar at caviumnetworks.com> > --- > drivers/net/i40e/base/i40e_osdep.h | 14 ++++++++++++++ > 1 file changed, 14 insertions(+) > > diff --git a/drivers/net/i40e/base/i40e_osdep.h > b/drivers/net/i40e/base/i40e_osdep.h > index 38e7ba5..ffa3160 100644 > --- a/drivers/net/i40e/base/i40e_osdep.h > +++ b/drivers/net/i40e/base/i40e_osdep.h > @@ -158,7 +158,13 @@ do { > \ > ((volatile uint32_t *)((char *)(a)->hw_addr + (reg))) > static inline uint32_t i40e_read_addr(volatile void *addr) > { > +#if defined(RTE_ARCH_ARM64) > + uint32_t val = rte_le_to_cpu_32(I40E_PCI_REG(addr)); > + rte_rmb(); > + return val; > +#else > return rte_le_to_cpu_32(I40E_PCI_REG(addr)); > +#endif > } > #define I40E_PCI_REG_WRITE(reg, value) \ > do { I40E_PCI_REG((reg)) = rte_cpu_to_le_32(value); } while (0) > @@ -171,8 +177,16 @@ static inline uint32_t i40e_read_addr(volatile void > *addr) > I40E_PCI_REG_WRITE(I40E_PCI_REG_ADDR((hw), (reg)), (value)) > > #define rd32(a, reg) i40e_read_addr(I40E_PCI_REG_ADDR((a), (reg))) > +#if defined(RTE_ARCH_ARM64) > +#define wr32(a, reg, value) \ > + do { \ > + I40E_PCI_REG_WRITE(I40E_PCI_REG_ADDR((a), (reg)), (value)); \ > + rte_wmb(); \ > + } while (0) > +#else > #define wr32(a, reg, value) \ > I40E_PCI_REG_WRITE(I40E_PCI_REG_ADDR((a), (reg)), (value)) > +#endif > #define flush(a) i40e_read_addr(I40E_PCI_REG_ADDR((a), (I40E_GLGEN_STAT))) > > #define ARRAY_SIZE(arr) (sizeof(arr)/sizeof(arr[0])) > --
Would rte_smp_*mb() functions allow you to get a similar result without the need for #ifdefs? It should be a full barrier on weakly ordered platforms which just a compiler barrier on IA. /Bruce