> -----Original Message----- > From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of WangDong > Sent: Sunday, June 28, 2015 4:23 PM > To: dev at dpdk.org > Subject: [dpdk-dev] [PATCH 2/2] ixgbe:replace compiler memory barrier and > rte_wmb with rte_dma_rmb and rte_dma_wmb. > > --- > drivers/net/ixgbe/ixgbe_rxtx.c | 30 +++++++++--------------------- > drivers/net/ixgbe/ixgbe_rxtx_vec.c | 3 +++ > 2 files changed, 12 insertions(+), 21 deletions(-) > > diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c > index 3ace8a8..3316488 100644 > --- a/drivers/net/ixgbe/ixgbe_rxtx.c > +++ b/drivers/net/ixgbe/ixgbe_rxtx.c > @@ -130,6 +130,7 @@ ixgbe_tx_free_bufs(struct ixgbe_tx_queue *txq) > > /* check DD bit on threshold descriptor */ > status = txq->tx_ring[txq->tx_next_dd].wb.status; > + rte_dma_rmb(); > if (! (status & IXGBE_ADVTXD_STAT_DD)) > return 0;
Could you explain, why do we need rmb here for weak ordering model? We don't read rest of TXD later, so nothing could be reordered here. > > @@ -320,7 +321,7 @@ tx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, > txq->tx_tail = 0; > > /* update tail pointer */ > - rte_wmb(); > + rte_dma_wmb(); > IXGBE_PCI_REG_WRITE(txq->tdt_reg_addr, txq->tx_tail); > > return nb_pkts; > @@ -841,7 +842,6 @@ ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, > txd->read.cmd_type_len |= rte_cpu_to_le_32(cmd_type_len); > } > end_of_tx: > - rte_wmb(); > > /* > * Set the Transmit Descriptor Tail (TDT) > @@ -849,6 +849,7 @@ end_of_tx: > PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u", > (unsigned) txq->port_id, (unsigned) txq->queue_id, > (unsigned) tx_id, (unsigned) nb_tx); > + rte_dma_wmb(); > IXGBE_PCI_REG_WRITE(txq->tdt_reg_addr, tx_id); > txq->tx_tail = tx_id; > > @@ -975,6 +976,7 @@ ixgbe_rx_scan_hw_ring(struct ixgbe_rx_queue *rxq) > > /* Compute how many status bits were set */ > nb_dd = 0; > + rte_dma_rmb(); I think that's a bit too late for rmb() here. We need to preserve order of reading all 8 statuses, so I am afraid we need to: /* Read desc statuses backwards to avoid race condition */ -for (j = LOOK_AHEAD-1; j >= 0; --j) +for (j = LOOK_AHEAD-1; j >= 0; --j) { + rte_dma_wmb(); s[j] = rxdp[j].wb.upper.status_error; +} > for (j = 0; j < LOOK_AHEAD; ++j) > nb_dd += s[j] & IXGBE_RXDADV_STAT_DD; > > @@ -1138,7 +1140,7 @@ rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, > } > > /* update tail pointer */ > - rte_wmb(); > + rte_dma_wmb(); > IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, cur_free_trigger); > } > > @@ -1229,13 +1231,10 @@ ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf > **rx_pkts, > /* > * The order of operations here is important as the DD status > * bit must not be read after any other descriptor fields. > - * rx_ring and rxdp are pointing to volatile data so the order > - * of accesses cannot be reordered by the compiler. If they were > - * not volatile, they could be reordered which could lead to > - * using invalid descriptor fields when read from rxd. > */ > rxdp = &rx_ring[rx_id]; > staterr = rxdp->wb.upper.status_error; > + rte_dma_rmb(); > if (! (staterr & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))) > break; > rxd = *rxdp; > @@ -1373,6 +1372,7 @@ ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf > **rx_pkts, > (unsigned) nb_rx); > rx_id = (uint16_t) ((rx_id == 0) ? > (rxq->nb_rx_desc - 1) : (rx_id - 1)); > + rte_dma_wmb(); > IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id); > nb_hold = 0; > } > @@ -1494,17 +1494,6 @@ ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf > **rx_pkts, uint16_t nb_pkts, > > next_desc: > /* > - * The code in this whole file uses the volatile pointer to > - * ensure the read ordering of the status and the rest of the > - * descriptor fields (on the compiler level only!!!). This is so > - * UGLY - why not to just use the compiler barrier instead? DPDK > - * even has the rte_compiler_barrier() for that. > - * > - * But most importantly this is just wrong because this doesn't > - * ensure memory ordering in a general case at all. For > - * instance, DPDK is supposed to work on Power CPUs where > - * compiler barrier may just not be enough! > - * > * I tried to write only this function properly to have a > * starting point (as a part of an LRO/RSC series) but the > * compiler cursed at me when I tried to cast away the > @@ -1519,12 +1508,11 @@ next_desc: > * TODO: > * - Get rid of "volatile" crap and let the compiler do its > * job. > - * - Use the proper memory barrier (rte_rmb()) to ensure the > - * memory ordering below. > */ > rxdp = &rx_ring[rx_id]; > staterr = rte_le_to_cpu_32(rxdp->wb.upper.status_error); > > + rte_dma_rmb(); > if (!(staterr & IXGBE_RXDADV_STAT_DD)) > break; > > @@ -1704,7 +1692,7 @@ next_desc: > "nb_hold=%u nb_rx=%u", > rxq->port_id, rxq->queue_id, rx_id, nb_hold, nb_rx); > > - rte_wmb(); > + rte_dma_wmb(); > IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, prev_id); > nb_hold = 0; > } I think you missed one more wmb() in that function: ixgbe_recv_pkts_lro(...) { ... } else if (nb_hold > rxq->rx_free_thresh) { uint16_t next_rdt = rxq->rx_free_trigger; if (!ixgbe_rx_alloc_bufs(rxq, false)) { rte_wmb(); IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, next_rdt); nb_hold -= rxq->rx_free_thresh; } else { > diff --git a/drivers/net/ixgbe/ixgbe_rxtx_vec.c > b/drivers/net/ixgbe/ixgbe_rxtx_vec.c > index abd10f6..af4d779 100644 > --- a/drivers/net/ixgbe/ixgbe_rxtx_vec.c > +++ b/drivers/net/ixgbe/ixgbe_rxtx_vec.c In fact, I think there is no much point to modify that one. Vector routines use IA specific instrincts, so that code wouldn't work on any other architecture anyway. > @@ -123,6 +123,7 @@ ixgbe_rxq_rearm(struct ixgbe_rx_queue *rxq) > (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1)); > > /* Update the tail pointer on the NIC */ > + rte_dma_wmb(); > IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id); > } > > @@ -528,6 +529,7 @@ ixgbe_tx_free_bufs(struct ixgbe_tx_queue *txq) > > /* check DD bit on threshold descriptor */ > status = txq->tx_ring[txq->tx_next_dd].wb.status; > + rte_dma_rmb(); > if (!(status & IXGBE_ADVTXD_STAT_DD)) > return 0; Again, as with its scalar counterpart, I don't think we need rmb here. We read only status from one TXD, that's it. But as I said above, there is probably no need to touch that file at all. Konstantin > > @@ -645,6 +647,7 @@ ixgbe_xmit_pkts_vec(void *tx_queue, struct rte_mbuf > **tx_pkts, > > txq->tx_tail = tx_id; > > + rte_dma_wmb(); > IXGBE_PCI_REG_WRITE(txq->tdt_reg_addr, txq->tx_tail); > > return nb_pkts; > -- > 2.1.0