Commit 8fce33317023 introduced the concept of NAPI per-channel and
independent cleaning of TX path.

This is currently breaking performance in some cases. The scenario
happens when all packets are being received in Queue 0 but the TX is
performed in Queue != 0.

Fix this by using different NAPI instances per each TX and RX queue, as
suggested by Florian.

Signed-off-by: Jose Abreu <joab...@synopsys.com>
Cc: Florian Fainelli <f.faine...@gmail.com>
Cc: Joao Pinto <jpi...@synopsys.com>
Cc: David S. Miller <da...@davemloft.net>
Cc: Giuseppe Cavallaro <peppe.cavall...@st.com>
Cc: Alexandre Torgue <alexandre.tor...@st.com>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac.h      |   5 +-
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 112 ++++++++++++----------
 2 files changed, 64 insertions(+), 53 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h 
b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index 63e1064b27a2..e697ecd9b0a6 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -78,11 +78,10 @@ struct stmmac_rx_queue {
 };
 
 struct stmmac_channel {
-       struct napi_struct napi ____cacheline_aligned_in_smp;
+       struct napi_struct rx_napi ____cacheline_aligned_in_smp;
+       struct napi_struct tx_napi ____cacheline_aligned_in_smp;
        struct stmmac_priv *priv_data;
        u32 index;
-       int has_rx;
-       int has_tx;
 };
 
 struct stmmac_tc_entry {
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c 
b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 685d20472358..89a4dd75af76 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -155,7 +155,10 @@ static void stmmac_disable_all_queues(struct stmmac_priv 
*priv)
        for (queue = 0; queue < maxq; queue++) {
                struct stmmac_channel *ch = &priv->channel[queue];
 
-               napi_disable(&ch->napi);
+               if (queue < rx_queues_cnt)
+                       napi_disable(&ch->rx_napi);
+               if (queue < tx_queues_cnt)
+                       napi_disable(&ch->tx_napi);
        }
 }
 
@@ -173,7 +176,10 @@ static void stmmac_enable_all_queues(struct stmmac_priv 
*priv)
        for (queue = 0; queue < maxq; queue++) {
                struct stmmac_channel *ch = &priv->channel[queue];
 
-               napi_enable(&ch->napi);
+               if (queue < rx_queues_cnt)
+                       napi_enable(&ch->rx_napi);
+               if (queue < tx_queues_cnt)
+                       napi_enable(&ch->tx_napi);
        }
 }
 
@@ -1939,6 +1945,10 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int 
budget, u32 queue)
                mod_timer(&priv->eee_ctrl_timer, STMMAC_LPI_T(eee_timer));
        }
 
+       /* We still have pending packets, let's call for a new scheduling */
+       if (tx_q->dirty_tx != tx_q->cur_tx)
+               mod_timer(&tx_q->txtimer, STMMAC_COAL_TIMER(10));
+
        __netif_tx_unlock_bh(netdev_get_tx_queue(priv->dev, queue));
 
        return count;
@@ -2029,23 +2039,15 @@ static int stmmac_napi_check(struct stmmac_priv *priv, 
u32 chan)
        int status = stmmac_dma_interrupt_status(priv, priv->ioaddr,
                                                 &priv->xstats, chan);
        struct stmmac_channel *ch = &priv->channel[chan];
-       bool needs_work = false;
-
-       if ((status & handle_rx) && ch->has_rx) {
-               needs_work = true;
-       } else {
-               status &= ~handle_rx;
-       }
 
-       if ((status & handle_tx) && ch->has_tx) {
-               needs_work = true;
-       } else {
-               status &= ~handle_tx;
+       if ((status & handle_rx) && (chan < priv->plat->rx_queues_to_use)) {
+               stmmac_disable_dma_irq(priv, priv->ioaddr, chan);
+               napi_schedule_irqoff(&ch->rx_napi);
        }
 
-       if (needs_work && napi_schedule_prep(&ch->napi)) {
+       if ((status & handle_tx) && (chan < priv->plat->tx_queues_to_use)) {
                stmmac_disable_dma_irq(priv, priv->ioaddr, chan);
-               __napi_schedule(&ch->napi);
+               napi_schedule_irqoff(&ch->tx_napi);
        }
 
        return status;
@@ -2241,8 +2243,14 @@ static void stmmac_tx_timer(struct timer_list *t)
 
        ch = &priv->channel[tx_q->queue_index];
 
-       if (likely(napi_schedule_prep(&ch->napi)))
-               __napi_schedule(&ch->napi);
+       /*
+        * If NAPI is already running we can miss some events. Let's rearm
+        * the timer and try again.
+        */
+       if (likely(napi_schedule_prep(&ch->tx_napi)))
+               __napi_schedule(&ch->tx_napi);
+       else
+               mod_timer(&tx_q->txtimer, STMMAC_COAL_TIMER(10));
 }
 
 /**
@@ -3498,7 +3506,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, 
u32 queue)
                        else
                                skb->ip_summed = CHECKSUM_UNNECESSARY;
 
-                       napi_gro_receive(&ch->napi, skb);
+                       napi_gro_receive(&ch->rx_napi, skb);
 
                        priv->dev->stats.rx_packets++;
                        priv->dev->stats.rx_bytes += frame_len;
@@ -3513,41 +3521,41 @@ static int stmmac_rx(struct stmmac_priv *priv, int 
limit, u32 queue)
        return count;
 }
 
-/**
- *  stmmac_poll - stmmac poll method (NAPI)
- *  @napi : pointer to the napi structure.
- *  @budget : maximum number of packets that the current CPU can receive from
- *           all interfaces.
- *  Description :
- *  To look at the incoming frames and clear the tx resources.
- */
-static int stmmac_napi_poll(struct napi_struct *napi, int budget)
+static int stmmac_napi_poll_rx(struct napi_struct *napi, int budget)
 {
        struct stmmac_channel *ch =
-               container_of(napi, struct stmmac_channel, napi);
+               container_of(napi, struct stmmac_channel, rx_napi);
        struct stmmac_priv *priv = ch->priv_data;
-       int work_done, rx_done = 0, tx_done = 0;
        u32 chan = ch->index;
+       int work_done;
 
        priv->xstats.napi_poll++;
 
-       if (ch->has_tx)
-               tx_done = stmmac_tx_clean(priv, budget, chan);
-       if (ch->has_rx)
-               rx_done = stmmac_rx(priv, budget, chan);
+       work_done = stmmac_rx(priv, budget, chan);
+       if (work_done < budget && napi_complete_done(napi, work_done))
+               stmmac_enable_dma_irq(priv, priv->ioaddr, chan);
+       return work_done;
+}
 
-       work_done = max(rx_done, tx_done);
-       work_done = min(work_done, budget);
+static int stmmac_napi_poll_tx(struct napi_struct *napi, int budget)
+{
+       struct stmmac_channel *ch =
+               container_of(napi, struct stmmac_channel, tx_napi);
+       struct stmmac_priv *priv = ch->priv_data;
+       struct stmmac_tx_queue *tx_q;
+       u32 chan = ch->index;
+       int work_done;
 
-       if (work_done < budget && napi_complete_done(napi, work_done)) {
-               int stat;
+       priv->xstats.napi_poll++;
 
+       work_done = stmmac_tx_clean(priv, budget, chan);
+       if (work_done < budget && napi_complete_done(napi, work_done))
                stmmac_enable_dma_irq(priv, priv->ioaddr, chan);
-               stat = stmmac_dma_interrupt_status(priv, priv->ioaddr,
-                                                  &priv->xstats, chan);
-               if (stat && napi_reschedule(napi))
-                       stmmac_disable_dma_irq(priv, priv->ioaddr, chan);
-       }
+
+       /* Force transmission restart */
+       tx_q = &priv->tx_queue[chan];
+       stmmac_enable_dma_transmission(priv, priv->ioaddr);
+       stmmac_set_tx_tail_ptr(priv, priv->ioaddr, tx_q->tx_tail_addr, chan);
 
        return work_done;
 }
@@ -4323,13 +4331,14 @@ int stmmac_dvr_probe(struct device *device,
                ch->priv_data = priv;
                ch->index = queue;
 
-               if (queue < priv->plat->rx_queues_to_use)
-                       ch->has_rx = true;
-               if (queue < priv->plat->tx_queues_to_use)
-                       ch->has_tx = true;
-
-               netif_napi_add(ndev, &ch->napi, stmmac_napi_poll,
-                              NAPI_POLL_WEIGHT);
+               if (queue < priv->plat->rx_queues_to_use) {
+                       netif_napi_add(ndev, &ch->rx_napi, stmmac_napi_poll_rx,
+                                      NAPI_POLL_WEIGHT);
+               }
+               if (queue < priv->plat->tx_queues_to_use) {
+                       netif_napi_add(ndev, &ch->tx_napi, stmmac_napi_poll_tx,
+                                      NAPI_POLL_WEIGHT);
+               }
        }
 
        mutex_init(&priv->lock);
@@ -4385,7 +4394,10 @@ int stmmac_dvr_probe(struct device *device,
        for (queue = 0; queue < maxq; queue++) {
                struct stmmac_channel *ch = &priv->channel[queue];
 
-               netif_napi_del(&ch->napi);
+               if (queue < priv->plat->rx_queues_to_use)
+                       netif_napi_del(&ch->rx_napi);
+               if (queue < priv->plat->tx_queues_to_use)
+                       netif_napi_del(&ch->tx_napi);
        }
 error_hw_init:
        destroy_workqueue(priv->wq);
-- 
2.7.4

Reply via email to