On 08/20/15 18:37, Vlad Zolotarov wrote:
> According to 82599 and x540 HW specifications RS bit *must* be
> set in the last descriptor of *every* packet.
>
> Before this patch there were 3 types of Tx callbacks that were setting
> RS bit every tx_rs_thresh descriptors. This patch introduces a set of
> new callbacks, one for each type mentioned above, that will set the RS
> bit in every EOP descriptor.
>
> ixgbe_set_tx_function() will set the appropriate Tx callback according
> to the device family.

[+Jesse and Jeff]

I've started to look at the i40e PMD and it has the same RS bit 
deferring logic
as ixgbe PMD has (surprise, surprise!.. ;)). To recall, i40e PMD uses a 
descriptor write-back
completion mode.

 From the HW Spec it's unclear if RS bit should be set on *every* descriptor
with EOP bit. However I noticed that Linux driver, before it moved to 
HEAD write-back mode, was setting RS
bit on every EOP descriptor.

So, here is a question to Intel guys: could u, pls., clarify this point?

Thanks in advance,
vlad

>
> This patch fixes the Tx hang we were constantly hitting with a
> seastar-based application on x540 NIC.
>
> Signed-off-by: Vlad Zolotarov <vladz at cloudius-systems.com>
> ---
> New in v4:
>     - Styling (white spaces) fixes.
>
> New in v3:
>     - Enforce the RS bit setting instead of enforcing tx_rs_thresh to be 1.
> ---
>   drivers/net/ixgbe/ixgbe_ethdev.c   |  14 +++-
>   drivers/net/ixgbe/ixgbe_ethdev.h   |   4 ++
>   drivers/net/ixgbe/ixgbe_rxtx.c     | 139 
> ++++++++++++++++++++++++++++---------
>   drivers/net/ixgbe/ixgbe_rxtx.h     |   2 +
>   drivers/net/ixgbe/ixgbe_rxtx_vec.c |  29 ++++++--
>   5 files changed, 149 insertions(+), 39 deletions(-)
>
> diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c 
> b/drivers/net/ixgbe/ixgbe_ethdev.c
> index b8ee1e9..355882c 100644
> --- a/drivers/net/ixgbe/ixgbe_ethdev.c
> +++ b/drivers/net/ixgbe/ixgbe_ethdev.c
> @@ -866,12 +866,17 @@ eth_ixgbe_dev_init(struct rte_eth_dev *eth_dev)
>       uint32_t ctrl_ext;
>       uint16_t csum;
>       int diag, i;
> +     bool rs_deferring_allowed = (hw->mac.type <= ixgbe_mac_82598EB);
>   
>       PMD_INIT_FUNC_TRACE();
>   
>       eth_dev->dev_ops = &ixgbe_eth_dev_ops;
>       eth_dev->rx_pkt_burst = &ixgbe_recv_pkts;
> -     eth_dev->tx_pkt_burst = &ixgbe_xmit_pkts;
> +
> +     if (rs_deferring_allowed)
> +             eth_dev->tx_pkt_burst = &ixgbe_xmit_pkts;
> +     else
> +             eth_dev->tx_pkt_burst = &ixgbe_xmit_pkts_always_rs;
>   
>       /*
>        * For secondary processes, we don't initialise any further as primary
> @@ -1147,12 +1152,17 @@ eth_ixgbevf_dev_init(struct rte_eth_dev *eth_dev)
>       struct ixgbe_hwstrip *hwstrip =
>               IXGBE_DEV_PRIVATE_TO_HWSTRIP_BITMAP(eth_dev->data->dev_private);
>       struct ether_addr *perm_addr = (struct ether_addr *) hw->mac.perm_addr;
> +     bool rs_deferring_allowed = (hw->mac.type <= ixgbe_mac_82598EB);
>   
>       PMD_INIT_FUNC_TRACE();
>   
>       eth_dev->dev_ops = &ixgbevf_eth_dev_ops;
>       eth_dev->rx_pkt_burst = &ixgbe_recv_pkts;
> -     eth_dev->tx_pkt_burst = &ixgbe_xmit_pkts;
> +
> +     if (rs_deferring_allowed)
> +             eth_dev->tx_pkt_burst = &ixgbe_xmit_pkts;
> +     else
> +             eth_dev->tx_pkt_burst = &ixgbe_xmit_pkts_always_rs;
>   
>       /* for secondary processes, we don't initialise any further as primary
>        * has already done this work. Only check we don't need a different
> diff --git a/drivers/net/ixgbe/ixgbe_ethdev.h 
> b/drivers/net/ixgbe/ixgbe_ethdev.h
> index c3d4f4f..390356d 100644
> --- a/drivers/net/ixgbe/ixgbe_ethdev.h
> +++ b/drivers/net/ixgbe/ixgbe_ethdev.h
> @@ -367,9 +367,13 @@ uint16_t ixgbe_recv_pkts_lro_bulk_alloc(void *rx_queue,
>   
>   uint16_t ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
>               uint16_t nb_pkts);
> +uint16_t ixgbe_xmit_pkts_always_rs(
> +     void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts);
>   
>   uint16_t ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
>               uint16_t nb_pkts);
> +uint16_t ixgbe_xmit_pkts_simple_always_rs(
> +     void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts);
>   
>   int ixgbe_dev_rss_hash_update(struct rte_eth_dev *dev,
>                             struct rte_eth_rss_conf *rss_conf);
> diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c
> index 91023b9..044f72c 100644
> --- a/drivers/net/ixgbe/ixgbe_rxtx.c
> +++ b/drivers/net/ixgbe/ixgbe_rxtx.c
> @@ -164,11 +164,16 @@ ixgbe_tx_free_bufs(struct ixgbe_tx_queue *txq)
>   
>   /* Populate 4 descriptors with data from 4 mbufs */
>   static inline void
> -tx4(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
> +tx4(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts,
> +    bool always_rs)
>   {
>       uint64_t buf_dma_addr;
>       uint32_t pkt_len;
>       int i;
> +     uint32_t flags = DCMD_DTYP_FLAGS;
> +
> +     if (always_rs)
> +             flags |= IXGBE_ADVTXD_DCMD_RS;
>   
>       for (i = 0; i < 4; ++i, ++txdp, ++pkts) {
>               buf_dma_addr = RTE_MBUF_DATA_DMA_ADDR(*pkts);
> @@ -178,7 +183,7 @@ tx4(volatile union ixgbe_adv_tx_desc *txdp, struct 
> rte_mbuf **pkts)
>               txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
>   
>               txdp->read.cmd_type_len =
> -                     rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
> +                     rte_cpu_to_le_32(flags | pkt_len);
>   
>               txdp->read.olinfo_status =
>                       rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
> @@ -189,10 +194,15 @@ tx4(volatile union ixgbe_adv_tx_desc *txdp, struct 
> rte_mbuf **pkts)
>   
>   /* Populate 1 descriptor with data from 1 mbuf */
>   static inline void
> -tx1(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
> +tx1(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts,
> +    bool always_rs)
>   {
>       uint64_t buf_dma_addr;
>       uint32_t pkt_len;
> +     uint32_t flags = DCMD_DTYP_FLAGS;
> +
> +     if (always_rs)
> +             flags |= IXGBE_ADVTXD_DCMD_RS;
>   
>       buf_dma_addr = RTE_MBUF_DATA_DMA_ADDR(*pkts);
>       pkt_len = (*pkts)->data_len;
> @@ -200,7 +210,7 @@ tx1(volatile union ixgbe_adv_tx_desc *txdp, struct 
> rte_mbuf **pkts)
>       /* write data to descriptor */
>       txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
>       txdp->read.cmd_type_len =
> -                     rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
> +                     rte_cpu_to_le_32(flags | pkt_len);
>       txdp->read.olinfo_status =
>                       rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
>       rte_prefetch0(&(*pkts)->pool);
> @@ -212,7 +222,7 @@ tx1(volatile union ixgbe_adv_tx_desc *txdp, struct 
> rte_mbuf **pkts)
>    */
>   static inline void
>   ixgbe_tx_fill_hw_ring(struct ixgbe_tx_queue *txq, struct rte_mbuf **pkts,
> -                   uint16_t nb_pkts)
> +                   uint16_t nb_pkts, bool always_rs)
>   {
>       volatile union ixgbe_adv_tx_desc *txdp = &(txq->tx_ring[txq->tx_tail]);
>       struct ixgbe_tx_entry *txep = &(txq->sw_ring[txq->tx_tail]);
> @@ -232,20 +242,21 @@ ixgbe_tx_fill_hw_ring(struct ixgbe_tx_queue *txq, 
> struct rte_mbuf **pkts,
>               for (j = 0; j < N_PER_LOOP; ++j) {
>                       (txep + i + j)->mbuf = *(pkts + i + j);
>               }
> -             tx4(txdp + i, pkts + i);
> +             tx4(txdp + i, pkts + i, always_rs);
>       }
>   
>       if (unlikely(leftover > 0)) {
>               for (i = 0; i < leftover; ++i) {
>                       (txep + mainpart + i)->mbuf = *(pkts + mainpart + i);
> -                     tx1(txdp + mainpart + i, pkts + mainpart + i);
> +                     tx1(txdp + mainpart + i, pkts + mainpart + i,
> +                         always_rs);
>               }
>       }
>   }
>   
>   static inline uint16_t
>   tx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
> -          uint16_t nb_pkts)
> +          uint16_t nb_pkts, bool always_rs)
>   {
>       struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
>       volatile union ixgbe_adv_tx_desc *tx_r = txq->tx_ring;
> @@ -281,22 +292,25 @@ tx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
>        */
>       if ((txq->tx_tail + nb_pkts) > txq->nb_tx_desc) {
>               n = (uint16_t)(txq->nb_tx_desc - txq->tx_tail);
> -             ixgbe_tx_fill_hw_ring(txq, tx_pkts, n);
> +             ixgbe_tx_fill_hw_ring(txq, tx_pkts, n, always_rs);
>   
> -             /*
> -              * We know that the last descriptor in the ring will need to
> -              * have its RS bit set because tx_rs_thresh has to be
> -              * a divisor of the ring size
> -              */
> -             tx_r[txq->tx_next_rs].read.cmd_type_len |=
> -                     rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
> -             txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
> +             if (!always_rs) {
> +                     /*
> +                      * We know that the last descriptor in the ring will
> +                      * need to have its RS bit set because tx_rs_thresh has
> +                      * to be a divisor of the ring size
> +                      */
> +                     tx_r[txq->tx_next_rs].read.cmd_type_len |=
> +                             rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
> +                     txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
> +             }
>   
>               txq->tx_tail = 0;
>       }
>   
>       /* Fill H/W descriptor ring with mbuf data */
> -     ixgbe_tx_fill_hw_ring(txq, tx_pkts + n, (uint16_t)(nb_pkts - n));
> +     ixgbe_tx_fill_hw_ring(txq, tx_pkts + n, (uint16_t)(nb_pkts - n),
> +                           always_rs);
>       txq->tx_tail = (uint16_t)(txq->tx_tail + (nb_pkts - n));
>   
>       /*
> @@ -306,7 +320,7 @@ tx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
>        * but instead of subtracting 1 and doing >=, we can just do
>        * greater than without subtracting.
>        */
> -     if (txq->tx_tail > txq->tx_next_rs) {
> +     if (!always_rs && txq->tx_tail > txq->tx_next_rs) {
>               tx_r[txq->tx_next_rs].read.cmd_type_len |=
>                       rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
>               txq->tx_next_rs = (uint16_t)(txq->tx_next_rs +
> @@ -329,22 +343,23 @@ tx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
>       return nb_pkts;
>   }
>   
> -uint16_t
> -ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
> -                    uint16_t nb_pkts)
> +/* if always_rs is TRUE set RS bit on every descriptor with EOP bit */
> +static inline uint16_t
> +_ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
> +                     uint16_t nb_pkts, bool always_rs)
>   {
>       uint16_t nb_tx;
>   
>       /* Try to transmit at least chunks of TX_MAX_BURST pkts */
>       if (likely(nb_pkts <= RTE_PMD_IXGBE_TX_MAX_BURST))
> -             return tx_xmit_pkts(tx_queue, tx_pkts, nb_pkts);
> +             return tx_xmit_pkts(tx_queue, tx_pkts, nb_pkts, always_rs);
>   
>       /* transmit more than the max burst, in chunks of TX_MAX_BURST */
>       nb_tx = 0;
>       while (nb_pkts) {
>               uint16_t ret, n;
>               n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_TX_MAX_BURST);
> -             ret = tx_xmit_pkts(tx_queue, &(tx_pkts[nb_tx]), n);
> +             ret = tx_xmit_pkts(tx_queue, &(tx_pkts[nb_tx]), n, always_rs);
>               nb_tx = (uint16_t)(nb_tx + ret);
>               nb_pkts = (uint16_t)(nb_pkts - ret);
>               if (ret < n)
> @@ -354,6 +369,20 @@ ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf 
> **tx_pkts,
>       return nb_tx;
>   }
>   
> +uint16_t
> +ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
> +                    uint16_t nb_pkts)
> +{
> +     return _ixgbe_xmit_pkts_simple(tx_queue, tx_pkts, nb_pkts, false);
> +}
> +
> +uint16_t
> +ixgbe_xmit_pkts_simple_always_rs(void *tx_queue, struct rte_mbuf **tx_pkts,
> +                              uint16_t nb_pkts)
> +{
> +     return _ixgbe_xmit_pkts_simple(tx_queue, tx_pkts, nb_pkts, true);
> +}
> +
>   static inline void
>   ixgbe_set_xmit_ctx(struct ixgbe_tx_queue *txq,
>               volatile struct ixgbe_adv_tx_context_desc *ctx_txd,
> @@ -565,9 +594,10 @@ ixgbe_xmit_cleanup(struct ixgbe_tx_queue *txq)
>       return (0);
>   }
>   
> -uint16_t
> -ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
> -             uint16_t nb_pkts)
> +/* if always_rs is TRUE set RS bit on every descriptor with EOP bit */
> +static inline uint16_t
> +_ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
> +              uint16_t nb_pkts, bool always_rs)
>   {
>       struct ixgbe_tx_queue *txq;
>       struct ixgbe_tx_entry *sw_ring;
> @@ -827,11 +857,20 @@ ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf 
> **tx_pkts,
>                * The last packet data descriptor needs End Of Packet (EOP)
>                */
>               cmd_type_len |= IXGBE_TXD_CMD_EOP;
> -             txq->nb_tx_used = (uint16_t)(txq->nb_tx_used + nb_used);
> +
> +             if (always_rs) {
> +                     PMD_TX_FREE_LOG(DEBUG,
> +                                     "Setting RS bit on TXD id=%4u (port=%d 
> queue=%d)",
> +                                     tx_last, txq->port_id, txq->queue_id);
> +                     cmd_type_len |= IXGBE_TXD_CMD_RS;
> +             } else {
> +                     txq->nb_tx_used = (uint16_t)(txq->nb_tx_used + nb_used);
> +             }
> +
>               txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_used);
>   
>               /* Set RS bit only on threshold packets' last descriptor */
> -             if (txq->nb_tx_used >= txq->tx_rs_thresh) {
> +             if (!always_rs && txq->nb_tx_used >= txq->tx_rs_thresh) {
>                       PMD_TX_FREE_LOG(DEBUG,
>                                       "Setting RS bit on TXD id="
>                                       "%4u (port=%d queue=%d)",
> @@ -859,6 +898,20 @@ end_of_tx:
>       return (nb_tx);
>   }
>   
> +uint16_t
> +ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
> +             uint16_t nb_pkts)
> +{
> +     return _ixgbe_xmit_pkts(tx_queue, tx_pkts, nb_pkts, false);
> +}
> +
> +uint16_t
> +ixgbe_xmit_pkts_always_rs(void *tx_queue, struct rte_mbuf **tx_pkts,
> +                       uint16_t nb_pkts)
> +{
> +     return _ixgbe_xmit_pkts(tx_queue, tx_pkts, nb_pkts, true);
> +}
> +
>   /*********************************************************************
>    *
>    *  RX functions
> @@ -2047,6 +2100,22 @@ static const struct ixgbe_txq_ops def_txq_ops = {
>   void __attribute__((cold))
>   ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ixgbe_tx_queue *txq)
>   {
> +     struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> +
> +     /*
> +      * According to 82599 and x540 specifications RS bit *must* be set on
> +      * the last descriptor of *every* packet. Therefore we will not allow
> +      * the tx_rs_thresh above 1 for all NICs newer than 82598. Since VFs are
> +      * available only on devices starting from 82599, tx_rs_thresh should be
> +      * set to 1 for ALL VF devices.
> +      */
> +     bool rs_deferring_allowed = (hw->mac.type <= ixgbe_mac_82598EB);
> +
> +     if (rs_deferring_allowed)
> +             PMD_INIT_LOG(DEBUG, "Using an RS bit setting deferring 
> callback");
> +     else
> +             PMD_INIT_LOG(DEBUG, "Using an always setting RS bit callback");
> +
>       /* Use a simple Tx queue (no offloads, no multi segs) if possible */
>       if (((txq->txq_flags & IXGBE_SIMPLE_FLAGS) == IXGBE_SIMPLE_FLAGS)
>                       && (txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST)) {
> @@ -2056,10 +2125,14 @@ ixgbe_set_tx_function(struct rte_eth_dev *dev, struct 
> ixgbe_tx_queue *txq)
>                               (rte_eal_process_type() != RTE_PROC_PRIMARY ||
>                                       ixgbe_txq_vec_setup(txq) == 0)) {
>                       PMD_INIT_LOG(DEBUG, "Vector tx enabled.");
> -                     dev->tx_pkt_burst = ixgbe_xmit_pkts_vec;
> +                     dev->tx_pkt_burst = (rs_deferring_allowed ?
> +                                          ixgbe_xmit_pkts_vec :
> +                                          ixgbe_xmit_pkts_vec_always_rs);
>               } else
>   #endif
> -             dev->tx_pkt_burst = ixgbe_xmit_pkts_simple;
> +             dev->tx_pkt_burst = (rs_deferring_allowed ?
> +                                  ixgbe_xmit_pkts_simple :
> +                                  ixgbe_xmit_pkts_simple_always_rs);
>       } else {
>               PMD_INIT_LOG(DEBUG, "Using full-featured tx code path");
>               PMD_INIT_LOG(DEBUG,
> @@ -2070,7 +2143,9 @@ ixgbe_set_tx_function(struct rte_eth_dev *dev, struct 
> ixgbe_tx_queue *txq)
>                               " - tx_rs_thresh = %lu " 
> "[RTE_PMD_IXGBE_TX_MAX_BURST=%lu]",
>                               (unsigned long)txq->tx_rs_thresh,
>                               (unsigned long)RTE_PMD_IXGBE_TX_MAX_BURST);
> -             dev->tx_pkt_burst = ixgbe_xmit_pkts;
> +             dev->tx_pkt_burst = (rs_deferring_allowed ?
> +                                  ixgbe_xmit_pkts :
> +                                  ixgbe_xmit_pkts_always_rs);
>       }
>   }
>   
> diff --git a/drivers/net/ixgbe/ixgbe_rxtx.h b/drivers/net/ixgbe/ixgbe_rxtx.h
> index 113682a..c7ed71b 100644
> --- a/drivers/net/ixgbe/ixgbe_rxtx.h
> +++ b/drivers/net/ixgbe/ixgbe_rxtx.h
> @@ -285,6 +285,8 @@ void ixgbe_rx_queue_release_mbufs_vec(struct 
> ixgbe_rx_queue *rxq);
>   
>   uint16_t ixgbe_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
>               uint16_t nb_pkts);
> +uint16_t ixgbe_xmit_pkts_vec_always_rs(
> +     void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts);
>   int ixgbe_txq_vec_setup(struct ixgbe_tx_queue *txq);
>   
>   #endif /* RTE_IXGBE_INC_VECTOR */
> diff --git a/drivers/net/ixgbe/ixgbe_rxtx_vec.c 
> b/drivers/net/ixgbe/ixgbe_rxtx_vec.c
> index cf25a53..a680fc2 100644
> --- a/drivers/net/ixgbe/ixgbe_rxtx_vec.c
> +++ b/drivers/net/ixgbe/ixgbe_rxtx_vec.c
> @@ -723,9 +723,10 @@ tx_backlog_entry(struct ixgbe_tx_entry_v *txep,
>               txep[i].mbuf = tx_pkts[i];
>   }
>   
> -uint16_t
> -ixgbe_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
> -                    uint16_t nb_pkts)
> +/* if always_rs is TRUE set RS bit on every descriptor with EOP bit */
> +static inline uint16_t
> +_ixgbe_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
> +                  uint16_t nb_pkts, bool always_rs)
>   {
>       struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
>       volatile union ixgbe_adv_tx_desc *txdp;
> @@ -735,6 +736,9 @@ ixgbe_xmit_pkts_vec(void *tx_queue, struct rte_mbuf 
> **tx_pkts,
>       uint64_t rs = IXGBE_ADVTXD_DCMD_RS|DCMD_DTYP_FLAGS;
>       int i;
>   
> +     if (always_rs)
> +             flags = rs;
> +
>       if (unlikely(nb_pkts > RTE_IXGBE_VPMD_TX_BURST))
>               nb_pkts = RTE_IXGBE_VPMD_TX_BURST;
>   
> @@ -764,7 +768,8 @@ ixgbe_xmit_pkts_vec(void *tx_queue, struct rte_mbuf 
> **tx_pkts,
>               nb_commit = (uint16_t)(nb_commit - n);
>   
>               tx_id = 0;
> -             txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
> +             if (!always_rs)
> +                     txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
>   
>               /* avoid reach the end of ring */
>               txdp = &(txq->tx_ring[tx_id]);
> @@ -776,7 +781,7 @@ ixgbe_xmit_pkts_vec(void *tx_queue, struct rte_mbuf 
> **tx_pkts,
>       vtx(txdp, tx_pkts, nb_commit, flags);
>   
>       tx_id = (uint16_t)(tx_id + nb_commit);
> -     if (tx_id > txq->tx_next_rs) {
> +     if (!always_rs && tx_id > txq->tx_next_rs) {
>               txq->tx_ring[txq->tx_next_rs].read.cmd_type_len |=
>                       rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
>               txq->tx_next_rs = (uint16_t)(txq->tx_next_rs +
> @@ -790,6 +795,20 @@ ixgbe_xmit_pkts_vec(void *tx_queue, struct rte_mbuf 
> **tx_pkts,
>       return nb_pkts;
>   }
>   
> +uint16_t
> +ixgbe_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
> +                 uint16_t nb_pkts)
> +{
> +     return _ixgbe_xmit_pkts_vec(tx_queue, tx_pkts, nb_pkts, false);
> +}
> +
> +uint16_t
> +ixgbe_xmit_pkts_vec_always_rs(void *tx_queue, struct rte_mbuf **tx_pkts,
> +                           uint16_t nb_pkts)
> +{
> +     return _ixgbe_xmit_pkts_vec(tx_queue, tx_pkts, nb_pkts, true);
> +}
> +
>   static void __attribute__((cold))
>   ixgbe_tx_queue_release_mbufs_vec(struct ixgbe_tx_queue *txq)
>   {

Reply via email to