When checking rx ring queue, it's possible that loop will break at the tail
while there are packets still in the queue header.

Signed-off-by: Jianbo Liu <jianbo.liu at linaro.org>
---
 drivers/net/ixgbe/ixgbe_rxtx_vec.c | 68 +++++++++++++++++++++-----------------
 1 file changed, 38 insertions(+), 30 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe_rxtx_vec.c 
b/drivers/net/ixgbe/ixgbe_rxtx_vec.c
index ccd93c7..611e431 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx_vec.c
+++ b/drivers/net/ixgbe/ixgbe_rxtx_vec.c
@@ -206,10 +206,9 @@ static inline uint16_t
 _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
                uint16_t nb_pkts, uint8_t *split_packet)
 {
-       volatile union ixgbe_adv_rx_desc *rxdp;
+       volatile union ixgbe_adv_rx_desc *rxdp, *rxdp_end;
        struct ixgbe_rx_entry *sw_ring;
-       uint16_t nb_pkts_recd;
-       int pos;
+       uint16_t rev;
        uint64_t var;
        __m128i shuf_msk;
        __m128i crc_adjust = _mm_set_epi16(
@@ -232,6 +231,7 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct 
rte_mbuf **rx_pkts,
        /* Just the act of getting into the function from the application is
         * going to cost about 7 cycles */
        rxdp = rxq->rx_ring + rxq->rx_tail;
+       rxdp_end = rxq->rx_ring + rxq->nb_rx_desc;

        _mm_prefetch((const void *)rxdp, _MM_HINT_T0);

@@ -275,9 +275,7 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct 
rte_mbuf **rx_pkts,
         * [C*. extract the end-of-packet bit, if requested]
         * D. fill info. from desc to mbuf
         */
-       for (pos = 0, nb_pkts_recd = 0; pos < nb_pkts;
-                       pos += RTE_IXGBE_DESCS_PER_LOOP,
-                       rxdp += RTE_IXGBE_DESCS_PER_LOOP) {
+       for (rev = 0; rev < nb_pkts; ) {
                __m128i descs0[RTE_IXGBE_DESCS_PER_LOOP];
                __m128i descs[RTE_IXGBE_DESCS_PER_LOOP];
                __m128i pkt_mb1, pkt_mb2, pkt_mb3, pkt_mb4;
@@ -285,17 +283,17 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct 
rte_mbuf **rx_pkts,
                __m128i mbp1, mbp2; /* two mbuf pointer in one XMM reg. */

                /* B.1 load 1 mbuf point */
-               mbp1 = _mm_loadu_si128((__m128i *)&sw_ring[pos]);
+               mbp1 = _mm_loadu_si128((__m128i *)&sw_ring[0]);

                /* Read desc statuses backwards to avoid race condition */
                /* A.1 load 4 pkts desc */
                descs0[3] = _mm_loadu_si128((__m128i *)(rxdp + 3));

                /* B.2 copy 2 mbuf point into rx_pkts  */
-               _mm_storeu_si128((__m128i *)&rx_pkts[pos], mbp1);
+               _mm_storeu_si128((__m128i *)&rx_pkts[rev], mbp1);

                /* B.1 load 1 mbuf point */
-               mbp2 = _mm_loadu_si128((__m128i *)&sw_ring[pos+2]);
+               mbp2 = _mm_loadu_si128((__m128i *)&sw_ring[2]);

                descs0[2] = _mm_loadu_si128((__m128i *)(rxdp + 2));
                /* B.1 load 2 mbuf point */
@@ -303,13 +301,13 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct 
rte_mbuf **rx_pkts,
                descs0[0] = _mm_loadu_si128((__m128i *)(rxdp));

                /* B.2 copy 2 mbuf point into rx_pkts  */
-               _mm_storeu_si128((__m128i *)&rx_pkts[pos+2], mbp2);
+               _mm_storeu_si128((__m128i *)&rx_pkts[rev + 2], mbp2);

                if (split_packet) {
-                       rte_prefetch0(&rx_pkts[pos]->cacheline1);
-                       rte_prefetch0(&rx_pkts[pos + 1]->cacheline1);
-                       rte_prefetch0(&rx_pkts[pos + 2]->cacheline1);
-                       rte_prefetch0(&rx_pkts[pos + 3]->cacheline1);
+                       rte_prefetch0(&rx_pkts[rev]->cacheline1);
+                       rte_prefetch0(&rx_pkts[rev + 1]->cacheline1);
+                       rte_prefetch0(&rx_pkts[rev + 2]->cacheline1);
+                       rte_prefetch0(&rx_pkts[rev + 3]->cacheline1);
                }

                /* A* mask out 0~3 bits RSS type */
@@ -333,7 +331,7 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct 
rte_mbuf **rx_pkts,
                sterr_tmp1 = _mm_unpackhi_epi32(descs[1], descs[0]);

                /* set ol_flags with vlan packet type */
-               desc_to_olflags_v(descs0, &rx_pkts[pos]);
+               desc_to_olflags_v(descs0, &rx_pkts[rev]);

                /* D.2 pkt 3,4 set in_port/nb_seg and remove crc */
                pkt_mb4 = _mm_add_epi16(pkt_mb4, crc_adjust);
@@ -348,9 +346,9 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct 
rte_mbuf **rx_pkts,
                staterr = _mm_unpacklo_epi32(sterr_tmp1, sterr_tmp2);

                /* D.3 copy final 3,4 data to rx_pkts */
-               _mm_storeu_si128((void *)&rx_pkts[pos+3]->rx_descriptor_fields1,
+               _mm_storeu_si128((void *)&rx_pkts[rev+3]->rx_descriptor_fields1,
                                pkt_mb4);
-               _mm_storeu_si128((void *)&rx_pkts[pos+2]->rx_descriptor_fields1,
+               _mm_storeu_si128((void *)&rx_pkts[rev+2]->rx_descriptor_fields1,
                                pkt_mb3);

                /* D.2 pkt 1,2 set in_port/nb_seg and remove crc */
@@ -375,13 +373,12 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct 
rte_mbuf **rx_pkts,
                        eop_bits = _mm_shuffle_epi8(eop_bits, eop_shuf_mask);
                        /* store the resulting 32-bit value */
                        *(int *)split_packet = _mm_cvtsi128_si32(eop_bits);
-                       split_packet += RTE_IXGBE_DESCS_PER_LOOP;

                        /* zero-out next pointers */
-                       rx_pkts[pos]->next = NULL;
-                       rx_pkts[pos + 1]->next = NULL;
-                       rx_pkts[pos + 2]->next = NULL;
-                       rx_pkts[pos + 3]->next = NULL;
+                       rx_pkts[rev]->next = NULL;
+                       rx_pkts[rev + 1]->next = NULL;
+                       rx_pkts[rev + 2]->next = NULL;
+                       rx_pkts[rev + 3]->next = NULL;
                }

                /* C.3 calc available number of desc */
@@ -389,24 +386,35 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct 
rte_mbuf **rx_pkts,
                staterr = _mm_packs_epi32(staterr, zero);

                /* D.3 copy final 1,2 data to rx_pkts */
-               _mm_storeu_si128((void *)&rx_pkts[pos+1]->rx_descriptor_fields1,
+               _mm_storeu_si128((void *)&rx_pkts[rev+1]->rx_descriptor_fields1,
                                pkt_mb2);
-               _mm_storeu_si128((void *)&rx_pkts[pos]->rx_descriptor_fields1,
+               _mm_storeu_si128((void *)&rx_pkts[rev]->rx_descriptor_fields1,
                                pkt_mb1);

                /* C.4 calc avaialbe number of desc */
                var = __builtin_popcountll(_mm_cvtsi128_si64(staterr));
-               nb_pkts_recd += var;
-               if (likely(var != RTE_IXGBE_DESCS_PER_LOOP))
+               if (unlikely(var == 0))
                        break;
+               else {
+                       if (split_packet)
+                                split_packet += var;
+
+                       rev += var;
+                       sw_ring += var;
+                       rxdp += var;
+                       if (rxdp == rxdp_end) {
+                               sw_ring = rxq->sw_ring;
+                               rxdp = rxq->rx_ring;
+                       } else if (var < RTE_IXGBE_DESCS_PER_LOOP)
+                               break;
+               }
        }

        /* Update our internal tail pointer */
-       rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_pkts_recd);
-       rxq->rx_tail = (uint16_t)(rxq->rx_tail & (rxq->nb_rx_desc - 1));
-       rxq->rxrearm_nb = (uint16_t)(rxq->rxrearm_nb + nb_pkts_recd);
+       rxq->rx_tail = rxdp - rxq->rx_ring;
+       rxq->rxrearm_nb = (uint16_t)(rxq->rxrearm_nb + rev);

-       return nb_pkts_recd;
+       return rev;
 }

 /*
-- 
1.8.3.1

Reply via email to