This follows David Miller advice and tries to fix coalesce timer in
multi-queue scenarios.

We are now using per-queue coalesce values and per-queue TX timer.

Coalesce timer default values was changed to 1ms and the coalesce frames
to 25.

Tested in B2B setup between XGMAC2 and GMAC5.

Signed-off-by: Jose Abreu <joab...@synopsys.com>
Cc: Jerome Brunet <jbru...@baylibre.com>
Cc: Martin Blumenstingl <martin.blumensti...@googlemail.com>
Cc: David S. Miller <da...@davemloft.net>
Cc: Joao Pinto <jpi...@synopsys.com>
Cc: Giuseppe Cavallaro <peppe.cavall...@st.com>
Cc: Alexandre Torgue <alexandre.tor...@st.com>
---
Jerome,

Can you please test if this one is okay ?

Thanks and Best Regards,
Jose Miguel Abreu
---
 drivers/net/ethernet/stmicro/stmmac/common.h      |   4 +-
 drivers/net/ethernet/stmicro/stmmac/stmmac.h      |   6 +-
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 207 ++++++++++++++--------
 3 files changed, 135 insertions(+), 82 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h 
b/drivers/net/ethernet/stmicro/stmmac/common.h
index 1854f270ad66..b1b305f8f414 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -258,10 +258,10 @@ struct stmmac_safety_stats {
 #define MAX_DMA_RIWT           0xff
 #define MIN_DMA_RIWT           0x20
 /* Tx coalesce parameters */
-#define STMMAC_COAL_TX_TIMER   40000
+#define STMMAC_COAL_TX_TIMER   1000
 #define STMMAC_MAX_COAL_TX_TICK        100000
 #define STMMAC_TX_MAX_FRAMES   256
-#define STMMAC_TX_FRAMES       64
+#define STMMAC_TX_FRAMES       25
 
 /* Packets types */
 enum packets_types {
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h 
b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index c0a855b7ab3b..957030cfb833 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -48,6 +48,9 @@ struct stmmac_tx_info {
 
 /* Frequently used values are kept adjacent for cache effect */
 struct stmmac_tx_queue {
+       u32 tx_count_frames;
+       int tx_timer_active;
+       struct timer_list txtimer;
        u32 queue_index;
        struct stmmac_priv *priv_data;
        struct dma_extended_desc *dma_etx ____cacheline_aligned_in_smp;
@@ -59,6 +62,7 @@ struct stmmac_tx_queue {
        dma_addr_t dma_tx_phy;
        u32 tx_tail_addr;
        u32 mss;
+       struct napi_struct napi ____cacheline_aligned_in_smp;
 };
 
 struct stmmac_rx_queue {
@@ -109,14 +113,12 @@ struct stmmac_pps_cfg {
 
 struct stmmac_priv {
        /* Frequently used values are kept adjacent for cache effect */
-       u32 tx_count_frames;
        u32 tx_coal_frames;
        u32 tx_coal_timer;
 
        int tx_coalesce;
        int hwts_tx_en;
        bool tx_path_in_lpi_mode;
-       struct timer_list txtimer;
        bool tso;
 
        unsigned int dma_buf_sz;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c 
b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 9f458bb16f2a..9809c2b319fe 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -148,6 +148,7 @@ static void stmmac_verify_args(void)
 static void stmmac_disable_all_queues(struct stmmac_priv *priv)
 {
        u32 rx_queues_cnt = priv->plat->rx_queues_to_use;
+       u32 tx_queues_cnt = priv->plat->tx_queues_to_use;
        u32 queue;
 
        for (queue = 0; queue < rx_queues_cnt; queue++) {
@@ -155,6 +156,12 @@ static void stmmac_disable_all_queues(struct stmmac_priv 
*priv)
 
                napi_disable(&rx_q->napi);
        }
+
+       for (queue = 0; queue < tx_queues_cnt; queue++) {
+               struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
+
+               napi_disable(&tx_q->napi);
+       }
 }
 
 /**
@@ -164,6 +171,7 @@ static void stmmac_disable_all_queues(struct stmmac_priv 
*priv)
 static void stmmac_enable_all_queues(struct stmmac_priv *priv)
 {
        u32 rx_queues_cnt = priv->plat->rx_queues_to_use;
+       u32 tx_queues_cnt = priv->plat->tx_queues_to_use;
        u32 queue;
 
        for (queue = 0; queue < rx_queues_cnt; queue++) {
@@ -171,6 +179,12 @@ static void stmmac_enable_all_queues(struct stmmac_priv 
*priv)
 
                napi_enable(&rx_q->napi);
        }
+
+       for (queue = 0; queue < tx_queues_cnt; queue++) {
+               struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
+
+               napi_enable(&tx_q->napi);
+       }
 }
 
 /**
@@ -1843,7 +1857,8 @@ static void stmmac_dma_operation_mode(struct stmmac_priv 
*priv)
  * @queue: TX queue index
  * Description: it reclaims the transmit resources after transmission 
completes.
  */
-static void stmmac_tx_clean(struct stmmac_priv *priv, u32 queue)
+static int stmmac_tx_clean(struct stmmac_priv *priv, int limit, u32 queue,
+                          bool *more)
 {
        struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
        unsigned int bytes_compl = 0, pkts_compl = 0;
@@ -1851,10 +1866,13 @@ static void stmmac_tx_clean(struct stmmac_priv *priv, 
u32 queue)
 
        netif_tx_lock(priv->dev);
 
+       if (more)
+               *more = false;
+
        priv->xstats.tx_clean++;
 
        entry = tx_q->dirty_tx;
-       while (entry != tx_q->cur_tx) {
+       while ((entry != tx_q->cur_tx) && (pkts_compl < limit)) {
                struct sk_buff *skb = tx_q->tx_skbuff[entry];
                struct dma_desc *p;
                int status;
@@ -1937,7 +1955,13 @@ static void stmmac_tx_clean(struct stmmac_priv *priv, 
u32 queue)
                stmmac_enable_eee_mode(priv);
                mod_timer(&priv->eee_ctrl_timer, STMMAC_LPI_T(eee_timer));
        }
+
+       if (more && (tx_q->dirty_tx != tx_q->cur_tx))
+               *more = true;
+
        netif_tx_unlock(priv->dev);
+
+       return pkts_compl;
 }
 
 /**
@@ -2020,6 +2044,34 @@ static bool stmmac_safety_feat_interrupt(struct 
stmmac_priv *priv)
        return false;
 }
 
+static int stmmac_napi_check(struct stmmac_priv *priv, u32 chan)
+{
+       int status = stmmac_dma_interrupt_status(priv, priv->ioaddr,
+                                                &priv->xstats, chan);
+
+       if ((status & handle_rx) && (chan < priv->plat->rx_queues_to_use)) {
+               struct stmmac_rx_queue *rx_q = &priv->rx_queue[chan];
+
+               if (likely(napi_schedule_prep(&rx_q->napi))) {
+                       stmmac_disable_dma_irq(priv, priv->ioaddr, chan);
+                       __napi_schedule(&rx_q->napi);
+               }
+       } else {
+               status &= ~handle_rx;
+       }
+
+       if ((status & handle_tx) && (chan < priv->plat->tx_queues_to_use)) {
+               struct stmmac_tx_queue *tx_q = &priv->tx_queue[chan];
+
+               if (likely(napi_schedule_prep(&tx_q->napi)))
+                       __napi_schedule(&tx_q->napi);
+       } else {
+               status &= ~handle_tx;
+       }
+
+       return status;
+}
+
 /**
  * stmmac_dma_interrupt - DMA ISR
  * @priv: driver private structure
@@ -2034,57 +2086,14 @@ static void stmmac_dma_interrupt(struct stmmac_priv 
*priv)
        u32 channels_to_check = tx_channel_count > rx_channel_count ?
                                tx_channel_count : rx_channel_count;
        u32 chan;
-       bool poll_scheduled = false;
        int status[max_t(u32, MTL_MAX_TX_QUEUES, MTL_MAX_RX_QUEUES)];
 
        /* Make sure we never check beyond our status buffer. */
        if (WARN_ON_ONCE(channels_to_check > ARRAY_SIZE(status)))
                channels_to_check = ARRAY_SIZE(status);
 
-       /* Each DMA channel can be used for rx and tx simultaneously, yet
-        * napi_struct is embedded in struct stmmac_rx_queue rather than in a
-        * stmmac_channel struct.
-        * Because of this, stmmac_poll currently checks (and possibly wakes)
-        * all tx queues rather than just a single tx queue.
-        */
        for (chan = 0; chan < channels_to_check; chan++)
-               status[chan] = stmmac_dma_interrupt_status(priv, priv->ioaddr,
-                               &priv->xstats, chan);
-
-       for (chan = 0; chan < rx_channel_count; chan++) {
-               if (likely(status[chan] & handle_rx)) {
-                       struct stmmac_rx_queue *rx_q = &priv->rx_queue[chan];
-
-                       if (likely(napi_schedule_prep(&rx_q->napi))) {
-                               stmmac_disable_dma_irq(priv, priv->ioaddr, 
chan);
-                               __napi_schedule(&rx_q->napi);
-                               poll_scheduled = true;
-                       }
-               }
-       }
-
-       /* If we scheduled poll, we already know that tx queues will be checked.
-        * If we didn't schedule poll, see if any DMA channel (used by tx) has a
-        * completed transmission, if so, call stmmac_poll (once).
-        */
-       if (!poll_scheduled) {
-               for (chan = 0; chan < tx_channel_count; chan++) {
-                       if (status[chan] & handle_tx) {
-                               /* It doesn't matter what rx queue we choose
-                                * here. We use 0 since it always exists.
-                                */
-                               struct stmmac_rx_queue *rx_q =
-                                       &priv->rx_queue[0];
-
-                               if (likely(napi_schedule_prep(&rx_q->napi))) {
-                                       stmmac_disable_dma_irq(priv,
-                                                       priv->ioaddr, chan);
-                                       __napi_schedule(&rx_q->napi);
-                               }
-                               break;
-                       }
-               }
-       }
+               status[chan] = stmmac_napi_check(priv, chan);
 
        for (chan = 0; chan < tx_channel_count; chan++) {
                if (unlikely(status[chan] & tx_hard_error_bump_tc)) {
@@ -2241,13 +2250,11 @@ static int stmmac_init_dma_engine(struct stmmac_priv 
*priv)
  */
 static void stmmac_tx_timer(struct timer_list *t)
 {
-       struct stmmac_priv *priv = from_timer(priv, t, txtimer);
-       u32 tx_queues_count = priv->plat->tx_queues_to_use;
-       u32 queue;
+       struct stmmac_tx_queue *tx_q = from_timer(tx_q, t, txtimer);
 
-       /* let's scan all the tx queues */
-       for (queue = 0; queue < tx_queues_count; queue++)
-               stmmac_tx_clean(priv, queue);
+       if (likely(napi_schedule_prep(&tx_q->napi)))
+               __napi_schedule(&tx_q->napi);
+       tx_q->tx_timer_active = 0;
 }
 
 /**
@@ -2260,11 +2267,17 @@ static void stmmac_tx_timer(struct timer_list *t)
  */
 static void stmmac_init_tx_coalesce(struct stmmac_priv *priv)
 {
+       u32 tx_channel_count = priv->plat->tx_queues_to_use;
+       u32 chan;
+
        priv->tx_coal_frames = STMMAC_TX_FRAMES;
        priv->tx_coal_timer = STMMAC_COAL_TX_TIMER;
-       timer_setup(&priv->txtimer, stmmac_tx_timer, 0);
-       priv->txtimer.expires = STMMAC_COAL_TIMER(priv->tx_coal_timer);
-       add_timer(&priv->txtimer);
+
+       for (chan = 0; chan < tx_channel_count; chan++) {
+               struct stmmac_tx_queue *tx_q = &priv->tx_queue[chan];
+
+               timer_setup(&tx_q->txtimer, stmmac_tx_timer, 0);
+       }
 }
 
 static void stmmac_set_rings_length(struct stmmac_priv *priv)
@@ -2592,6 +2605,7 @@ static void stmmac_hw_teardown(struct net_device *dev)
 static int stmmac_open(struct net_device *dev)
 {
        struct stmmac_priv *priv = netdev_priv(dev);
+       u32 chan;
        int ret;
 
        stmmac_check_ether_addr(priv);
@@ -2688,7 +2702,9 @@ static int stmmac_open(struct net_device *dev)
        if (dev->phydev)
                phy_stop(dev->phydev);
 
-       del_timer_sync(&priv->txtimer);
+       for (chan = 0; chan < priv->plat->tx_queues_to_use; chan++)
+               del_timer_sync(&priv->tx_queue[chan].txtimer);
+
        stmmac_hw_teardown(dev);
 init_error:
        free_dma_desc_resources(priv);
@@ -2708,6 +2724,7 @@ static int stmmac_open(struct net_device *dev)
 static int stmmac_release(struct net_device *dev)
 {
        struct stmmac_priv *priv = netdev_priv(dev);
+       u32 chan;
 
        if (priv->eee_enabled)
                del_timer_sync(&priv->eee_ctrl_timer);
@@ -2722,7 +2739,8 @@ static int stmmac_release(struct net_device *dev)
 
        stmmac_disable_all_queues(priv);
 
-       del_timer_sync(&priv->txtimer);
+       for (chan = 0; chan < priv->plat->tx_queues_to_use; chan++)
+               del_timer_sync(&priv->tx_queue[chan].txtimer);
 
        /* Free the IRQ lines */
        free_irq(dev->irq, dev);
@@ -2936,14 +2954,11 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, 
struct net_device *dev)
        priv->xstats.tx_tso_nfrags += nfrags;
 
        /* Manage tx mitigation */
-       priv->tx_count_frames += nfrags + 1;
-       if (likely(priv->tx_coal_frames > priv->tx_count_frames)) {
-               mod_timer(&priv->txtimer,
-                         STMMAC_COAL_TIMER(priv->tx_coal_timer));
-       } else {
-               priv->tx_count_frames = 0;
+       tx_q->tx_count_frames += nfrags + 1;
+       if (priv->tx_coal_frames <= tx_q->tx_count_frames) {
                stmmac_set_tx_ic(priv, desc);
                priv->xstats.tx_set_ic_bit++;
+               tx_q->tx_count_frames = 0;
        }
 
        skb_tx_timestamp(skb);
@@ -2994,6 +3009,12 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, 
struct net_device *dev)
 
        stmmac_set_tx_tail_ptr(priv, priv->ioaddr, tx_q->tx_tail_addr, queue);
 
+       if (priv->tx_coal_timer && !tx_q->tx_timer_active) {
+               tx_q->tx_timer_active = 1;
+               mod_timer(&tx_q->txtimer,
+                               STMMAC_COAL_TIMER(priv->tx_coal_timer));
+       }
+
        return NETDEV_TX_OK;
 
 dma_map_err:
@@ -3146,14 +3167,11 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, 
struct net_device *dev)
         * This approach takes care about the fragments: desc is the first
         * element in case of no SG.
         */
-       priv->tx_count_frames += nfrags + 1;
-       if (likely(priv->tx_coal_frames > priv->tx_count_frames)) {
-               mod_timer(&priv->txtimer,
-                         STMMAC_COAL_TIMER(priv->tx_coal_timer));
-       } else {
-               priv->tx_count_frames = 0;
+       tx_q->tx_count_frames += nfrags + 1;
+       if (priv->tx_coal_frames <= tx_q->tx_count_frames) {
                stmmac_set_tx_ic(priv, desc);
                priv->xstats.tx_set_ic_bit++;
+               tx_q->tx_count_frames = 0;
        }
 
        skb_tx_timestamp(skb);
@@ -3199,8 +3217,15 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, 
struct net_device *dev)
        netdev_tx_sent_queue(netdev_get_tx_queue(dev, queue), skb->len);
 
        stmmac_enable_dma_transmission(priv, priv->ioaddr);
+
        stmmac_set_tx_tail_ptr(priv, priv->ioaddr, tx_q->tx_tail_addr, queue);
 
+       if (priv->tx_coal_timer && !tx_q->tx_timer_active) {
+               tx_q->tx_timer_active = 1;
+               mod_timer(&tx_q->txtimer,
+                               STMMAC_COAL_TIMER(priv->tx_coal_timer));
+       }
+
        return NETDEV_TX_OK;
 
 dma_map_err:
@@ -3514,27 +3539,41 @@ static int stmmac_rx(struct stmmac_priv *priv, int 
limit, u32 queue)
  *  Description :
  *  To look at the incoming frames and clear the tx resources.
  */
-static int stmmac_poll(struct napi_struct *napi, int budget)
+static int stmmac_rx_poll(struct napi_struct *napi, int budget)
 {
        struct stmmac_rx_queue *rx_q =
                container_of(napi, struct stmmac_rx_queue, napi);
        struct stmmac_priv *priv = rx_q->priv_data;
-       u32 tx_count = priv->plat->tx_queues_to_use;
        u32 chan = rx_q->queue_index;
        int work_done = 0;
-       u32 queue;
 
        priv->xstats.napi_poll++;
 
-       /* check all the queues */
-       for (queue = 0; queue < tx_count; queue++)
-               stmmac_tx_clean(priv, queue);
-
        work_done = stmmac_rx(priv, budget, rx_q->queue_index);
+       if (work_done < budget && napi_complete_done(napi, work_done))
+               stmmac_enable_dma_irq(priv, priv->ioaddr, chan);
+
+       return work_done;
+}
+
+static int stmmac_tx_poll(struct napi_struct *napi, int budget)
+{
+       struct stmmac_tx_queue *tx_q =
+               container_of(napi, struct stmmac_tx_queue, napi);
+       struct stmmac_priv *priv = tx_q->priv_data;
+       u32 chan = tx_q->queue_index;
+       int work_done = 0;
+       bool more;
+
+       priv->xstats.napi_poll++;
+
+       work_done = stmmac_tx_clean(priv, budget, chan, &more);
        if (work_done < budget) {
                napi_complete_done(napi, work_done);
-               stmmac_enable_dma_irq(priv, priv->ioaddr, chan);
+               if (more)
+                       napi_reschedule(napi);
        }
+
        return work_done;
 }
 
@@ -4325,10 +4364,17 @@ int stmmac_dvr_probe(struct device *device,
        for (queue = 0; queue < priv->plat->rx_queues_to_use; queue++) {
                struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
 
-               netif_napi_add(ndev, &rx_q->napi, stmmac_poll,
+               netif_napi_add(ndev, &rx_q->napi, stmmac_rx_poll,
                               (8 * priv->plat->rx_queues_to_use));
        }
 
+       for (queue = 0; queue < priv->plat->tx_queues_to_use; queue++) {
+               struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
+
+               netif_napi_add(ndev, &tx_q->napi, stmmac_tx_poll,
+                              (8 * priv->plat->tx_queues_to_use));
+       }
+
        mutex_init(&priv->lock);
 
        /* If a specific clk_csr value is passed from the platform
@@ -4377,6 +4423,11 @@ int stmmac_dvr_probe(struct device *device,
 
                netif_napi_del(&rx_q->napi);
        }
+       for (queue = 0; queue < priv->plat->tx_queues_to_use; queue++) {
+               struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
+
+               netif_napi_del(&tx_q->napi);
+       }
 error_hw_init:
        destroy_workqueue(priv->wq);
 error_wq:
-- 
2.7.4


Reply via email to