When checking rx ring queue, it's possible that loop will break at the tail while there are packets still in the queue header.
Signed-off-by: Jianbo Liu <jianbo.liu at linaro.org> --- drivers/net/ixgbe/ixgbe_rxtx_vec.c | 68 +++++++++++++++++++++----------------- 1 file changed, 38 insertions(+), 30 deletions(-) diff --git a/drivers/net/ixgbe/ixgbe_rxtx_vec.c b/drivers/net/ixgbe/ixgbe_rxtx_vec.c index ccd93c7..611e431 100644 --- a/drivers/net/ixgbe/ixgbe_rxtx_vec.c +++ b/drivers/net/ixgbe/ixgbe_rxtx_vec.c @@ -206,10 +206,9 @@ static inline uint16_t _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts, uint8_t *split_packet) { - volatile union ixgbe_adv_rx_desc *rxdp; + volatile union ixgbe_adv_rx_desc *rxdp, *rxdp_end; struct ixgbe_rx_entry *sw_ring; - uint16_t nb_pkts_recd; - int pos; + uint16_t rev; uint64_t var; __m128i shuf_msk; __m128i crc_adjust = _mm_set_epi16( @@ -232,6 +231,7 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts, /* Just the act of getting into the function from the application is * going to cost about 7 cycles */ rxdp = rxq->rx_ring + rxq->rx_tail; + rxdp_end = rxq->rx_ring + rxq->nb_rx_desc; _mm_prefetch((const void *)rxdp, _MM_HINT_T0); @@ -275,9 +275,7 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts, * [C*. extract the end-of-packet bit, if requested] * D. fill info. from desc to mbuf */ - for (pos = 0, nb_pkts_recd = 0; pos < nb_pkts; - pos += RTE_IXGBE_DESCS_PER_LOOP, - rxdp += RTE_IXGBE_DESCS_PER_LOOP) { + for (rev = 0; rev < nb_pkts; ) { __m128i descs0[RTE_IXGBE_DESCS_PER_LOOP]; __m128i descs[RTE_IXGBE_DESCS_PER_LOOP]; __m128i pkt_mb1, pkt_mb2, pkt_mb3, pkt_mb4; @@ -285,17 +283,17 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts, __m128i mbp1, mbp2; /* two mbuf pointer in one XMM reg. */ /* B.1 load 1 mbuf point */ - mbp1 = _mm_loadu_si128((__m128i *)&sw_ring[pos]); + mbp1 = _mm_loadu_si128((__m128i *)&sw_ring[0]); /* Read desc statuses backwards to avoid race condition */ /* A.1 load 4 pkts desc */ descs0[3] = _mm_loadu_si128((__m128i *)(rxdp + 3)); /* B.2 copy 2 mbuf point into rx_pkts */ - _mm_storeu_si128((__m128i *)&rx_pkts[pos], mbp1); + _mm_storeu_si128((__m128i *)&rx_pkts[rev], mbp1); /* B.1 load 1 mbuf point */ - mbp2 = _mm_loadu_si128((__m128i *)&sw_ring[pos+2]); + mbp2 = _mm_loadu_si128((__m128i *)&sw_ring[2]); descs0[2] = _mm_loadu_si128((__m128i *)(rxdp + 2)); /* B.1 load 2 mbuf point */ @@ -303,13 +301,13 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts, descs0[0] = _mm_loadu_si128((__m128i *)(rxdp)); /* B.2 copy 2 mbuf point into rx_pkts */ - _mm_storeu_si128((__m128i *)&rx_pkts[pos+2], mbp2); + _mm_storeu_si128((__m128i *)&rx_pkts[rev + 2], mbp2); if (split_packet) { - rte_prefetch0(&rx_pkts[pos]->cacheline1); - rte_prefetch0(&rx_pkts[pos + 1]->cacheline1); - rte_prefetch0(&rx_pkts[pos + 2]->cacheline1); - rte_prefetch0(&rx_pkts[pos + 3]->cacheline1); + rte_prefetch0(&rx_pkts[rev]->cacheline1); + rte_prefetch0(&rx_pkts[rev + 1]->cacheline1); + rte_prefetch0(&rx_pkts[rev + 2]->cacheline1); + rte_prefetch0(&rx_pkts[rev + 3]->cacheline1); } /* A* mask out 0~3 bits RSS type */ @@ -333,7 +331,7 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts, sterr_tmp1 = _mm_unpackhi_epi32(descs[1], descs[0]); /* set ol_flags with vlan packet type */ - desc_to_olflags_v(descs0, &rx_pkts[pos]); + desc_to_olflags_v(descs0, &rx_pkts[rev]); /* D.2 pkt 3,4 set in_port/nb_seg and remove crc */ pkt_mb4 = _mm_add_epi16(pkt_mb4, crc_adjust); @@ -348,9 +346,9 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts, staterr = _mm_unpacklo_epi32(sterr_tmp1, sterr_tmp2); /* D.3 copy final 3,4 data to rx_pkts */ - _mm_storeu_si128((void *)&rx_pkts[pos+3]->rx_descriptor_fields1, + _mm_storeu_si128((void *)&rx_pkts[rev+3]->rx_descriptor_fields1, pkt_mb4); - _mm_storeu_si128((void *)&rx_pkts[pos+2]->rx_descriptor_fields1, + _mm_storeu_si128((void *)&rx_pkts[rev+2]->rx_descriptor_fields1, pkt_mb3); /* D.2 pkt 1,2 set in_port/nb_seg and remove crc */ @@ -375,13 +373,12 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts, eop_bits = _mm_shuffle_epi8(eop_bits, eop_shuf_mask); /* store the resulting 32-bit value */ *(int *)split_packet = _mm_cvtsi128_si32(eop_bits); - split_packet += RTE_IXGBE_DESCS_PER_LOOP; /* zero-out next pointers */ - rx_pkts[pos]->next = NULL; - rx_pkts[pos + 1]->next = NULL; - rx_pkts[pos + 2]->next = NULL; - rx_pkts[pos + 3]->next = NULL; + rx_pkts[rev]->next = NULL; + rx_pkts[rev + 1]->next = NULL; + rx_pkts[rev + 2]->next = NULL; + rx_pkts[rev + 3]->next = NULL; } /* C.3 calc available number of desc */ @@ -389,24 +386,35 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts, staterr = _mm_packs_epi32(staterr, zero); /* D.3 copy final 1,2 data to rx_pkts */ - _mm_storeu_si128((void *)&rx_pkts[pos+1]->rx_descriptor_fields1, + _mm_storeu_si128((void *)&rx_pkts[rev+1]->rx_descriptor_fields1, pkt_mb2); - _mm_storeu_si128((void *)&rx_pkts[pos]->rx_descriptor_fields1, + _mm_storeu_si128((void *)&rx_pkts[rev]->rx_descriptor_fields1, pkt_mb1); /* C.4 calc avaialbe number of desc */ var = __builtin_popcountll(_mm_cvtsi128_si64(staterr)); - nb_pkts_recd += var; - if (likely(var != RTE_IXGBE_DESCS_PER_LOOP)) + if (unlikely(var == 0)) break; + else { + if (split_packet) + split_packet += var; + + rev += var; + sw_ring += var; + rxdp += var; + if (rxdp == rxdp_end) { + sw_ring = rxq->sw_ring; + rxdp = rxq->rx_ring; + } else if (var < RTE_IXGBE_DESCS_PER_LOOP) + break; + } } /* Update our internal tail pointer */ - rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_pkts_recd); - rxq->rx_tail = (uint16_t)(rxq->rx_tail & (rxq->nb_rx_desc - 1)); - rxq->rxrearm_nb = (uint16_t)(rxq->rxrearm_nb + nb_pkts_recd); + rxq->rx_tail = rxdp - rxq->rx_ring; + rxq->rxrearm_nb = (uint16_t)(rxq->rxrearm_nb + rev); - return nb_pkts_recd; + return rev; } /* -- 1.8.3.1