On Tue, Apr 16, 2019 at 5:10 PM Xiaolong Ye <xiaolong...@intel.com> wrote:
> As David pointed out, if we reserve N slots for Tx, but only submit n > slots, we would end up with an incorrect opinion of the number of available > slots later, we also would get wrong idx when we call > xsk_ring_prod__reserve next time. It also applies to > xsk_ring_cons__peek()/xsk_ring_cons__release(). > > This patch ensures that both reserve/submit and peek/release are > consistent. > > Fixes: f1debd77efaf ("net/af_xdp: introduce AF_XDP PMD") > > Suggested-by: David Marchand <david.march...@redhat.com> > Signed-off-by: Xiaolong Ye <xiaolong...@intel.com> > --- > drivers/net/af_xdp/rte_eth_af_xdp.c | 77 +++++++++++++++-------------- > 1 file changed, 39 insertions(+), 38 deletions(-) > > diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c > b/drivers/net/af_xdp/rte_eth_af_xdp.c > index 5cc643ce2..b00cd6e03 100644 > --- a/drivers/net/af_xdp/rte_eth_af_xdp.c > +++ b/drivers/net/af_xdp/rte_eth_af_xdp.c > @@ -134,30 +134,34 @@ static const struct rte_eth_link pmd_link = { > }; > > static inline int > -reserve_fill_queue(struct xsk_umem_info *umem, int reserve_size) > +reserve_fill_queue(struct xsk_umem_info *umem, uint16_t reserve_size) > { > struct xsk_ring_prod *fq = &umem->fq; > + void *addrs[reserve_size]; > uint32_t idx; > - int i, ret; > + uint16_t i; > + > + if (rte_ring_dequeue_bulk(umem->buf_ring, addrs, reserve_size, > NULL) > + != reserve_size) { > + AF_XDP_LOG(DEBUG, "Failed to get enough buffers for > fq.\n"); > + return -1; > + } > > - ret = xsk_ring_prod__reserve(fq, reserve_size, &idx); > - if (unlikely(!ret)) { > + if (unlikely(!xsk_ring_prod__reserve(fq, reserve_size, &idx))) { > AF_XDP_LOG(ERR, "Failed to reserve enough fq descs.\n"); > Is this situation happening a lot ? If this is the case, I would prefer see this as a DEBUG message rather than ERR. - return ret; > + rte_ring_enqueue_bulk(umem->buf_ring, addrs, > + reserve_size, NULL); > + return -1; > } > > for (i = 0; i < reserve_size; i++) { > __u64 *fq_addr; > - void *addr = NULL; > - if (rte_ring_dequeue(umem->buf_ring, &addr)) { > - i--; > - break; > - } > + > fq_addr = xsk_ring_prod__fill_addr(fq, idx++); > - *fq_addr = (uint64_t)addr; > + *fq_addr = (uint64_t)addrs[i]; > } > > - xsk_ring_prod__submit(fq, i); > + xsk_ring_prod__submit(fq, reserve_size); > > return 0; > } > @@ -174,21 +178,20 @@ eth_af_xdp_rx(void *queue, struct rte_mbuf **bufs, > uint16_t nb_pkts) > struct rte_mbuf *mbufs[ETH_AF_XDP_TX_BATCH_SIZE]; > unsigned long dropped = 0; > unsigned long rx_bytes = 0; > - uint16_t count = 0; > int rcvd, i; > > nb_pkts = RTE_MIN(nb_pkts, ETH_AF_XDP_TX_BATCH_SIZE); > Nothing to do with your patch, but it should be ETH_AF_XDP_ "R" X_BATCH_SIZE (we have two occurences of this in eth_af_xdp_rx). I can send a follow up patch after yours. Or you do it, your choice :-) > + if (unlikely(rte_pktmbuf_alloc_bulk(rxq->mb_pool, mbufs, nb_pkts) > != 0)) > + return 0; > + > rcvd = xsk_ring_cons__peek(rx, nb_pkts, &idx_rx); > if (rcvd == 0) > - return 0; > + goto out; > > if (xsk_prod_nb_free(fq, free_thresh) >= free_thresh) > (void)reserve_fill_queue(umem, ETH_AF_XDP_RX_BATCH_SIZE); > > - if (unlikely(rte_pktmbuf_alloc_bulk(rxq->mb_pool, mbufs, rcvd) != > 0)) > - return 0; > - > for (i = 0; i < rcvd; i++) { > const struct xdp_desc *desc; > uint64_t addr; > @@ -204,7 +207,7 @@ eth_af_xdp_rx(void *queue, struct rte_mbuf **bufs, > uint16_t nb_pkts) > rte_pktmbuf_pkt_len(mbufs[i]) = len; > rte_pktmbuf_data_len(mbufs[i]) = len; > rx_bytes += len; > - bufs[count++] = mbufs[i]; > + bufs[i] = mbufs[i]; > > rte_ring_enqueue(umem->buf_ring, (void *)addr); > } > @@ -215,7 +218,12 @@ eth_af_xdp_rx(void *queue, struct rte_mbuf **bufs, > uint16_t nb_pkts) > rxq->stats.rx_pkts += (rcvd - dropped); > rxq->stats.rx_bytes += rx_bytes; > > - return count; > + out: > No space before label. + if (rcvd != nb_pkts) > + rte_mempool_put_bulk(rxq->mb_pool, (void **)&mbufs[rcvd], > + nb_pkts - rcvd); > + > + return rcvd; > } > > static void > @@ -262,7 +270,7 @@ eth_af_xdp_tx(void *queue, struct rte_mbuf **bufs, > uint16_t nb_pkts) > struct rte_mbuf *mbuf; > void *addrs[ETH_AF_XDP_TX_BATCH_SIZE]; > unsigned long tx_bytes = 0; > - int i, valid = 0; > + int i; > uint32_t idx_tx; > > nb_pkts = RTE_MIN(nb_pkts, ETH_AF_XDP_TX_BATCH_SIZE); > @@ -283,20 +291,18 @@ eth_af_xdp_tx(void *queue, struct rte_mbuf **bufs, > uint16_t nb_pkts) > for (i = 0; i < nb_pkts; i++) { > struct xdp_desc *desc; > void *pkt; > - uint32_t buf_len = ETH_AF_XDP_FRAME_SIZE > - - ETH_AF_XDP_DATA_HEADROOM; > + > desc = xsk_ring_prod__tx_desc(&txq->tx, idx_tx + i); > mbuf = bufs[i]; > - if (mbuf->pkt_len <= buf_len) { > - desc->addr = (uint64_t)addrs[valid]; > - desc->len = mbuf->pkt_len; > - pkt = xsk_umem__get_data(umem->mz->addr, > - desc->addr); > - rte_memcpy(pkt, rte_pktmbuf_mtod(mbuf, void *), > - desc->len); > - valid++; > - tx_bytes += mbuf->pkt_len; > - } > + > + desc->addr = (uint64_t)addrs[i]; > + desc->len = mbuf->pkt_len; > + pkt = xsk_umem__get_data(umem->mz->addr, > + desc->addr); > + rte_memcpy(pkt, rte_pktmbuf_mtod(mbuf, void *), > + desc->len); > + tx_bytes += mbuf->pkt_len; > + > rte_pktmbuf_free(mbuf); > } > > I can see that the buffers in umem can embed ETH_AF_XDP_FRAME_SIZE - ETH_AF_XDP_DATA_HEADROOM bytes. And this driver does not support multi segment. So we are missing a check in eth_dev_mtu_set(). The min_mtu / max_mtu fields are not filled in eth_dev_info(). You should fix this in a preparation patch before this change. @@ -304,12 +310,7 @@ eth_af_xdp_tx(void *queue, struct rte_mbuf **bufs, > uint16_t nb_pkts) > > kick_tx(txq); > > - if (valid < nb_pkts) > - rte_ring_enqueue_bulk(umem->buf_ring, &addrs[valid], > - nb_pkts - valid, NULL); > - > - txq->stats.err_pkts += nb_pkts - valid; > err_pkts stats is not used anymore afaics. - txq->stats.tx_pkts += valid; > + txq->stats.tx_pkts += nb_pkts; > txq->stats.tx_bytes += tx_bytes; > > return nb_pkts; > -- > 2.17.1 > > -- David Marchand