Hi Felix,

On 08/01/16 23:34, Conn O'Griofa wrote:
I tried replacing netif_start_queue(dev) with ag71xx_hw_start(ag) in 
ag71xx_hw_enable. With this change, when the DMA stuck issue occurs, there's no 
longer any tx timeouts logged, but the interface stops responding. Perhaps it's 
also necessary to call ag71xx_fast_reset (which only gets called during a link 
adjust)?

It appears that my hunch was correct, and both are necessary. The following 
patch works 100% fine; when the dma stuck condition occurs, recovery now 
completes without any link adjust or any other output from the kernel log*. The 
tx timeout condition is also avoided thanks to checking for stuck dma, internet 
connectivity remains stable, and the sirq storm never triggers. It looks like 
your patch, with my additions, solves the timeout issue perfectly on my device.

One issue I'd like to bring to your attention: with my changes, 
ag71xx_fast_reset is now called for every chipset as part of the 
ag71xx_hw_enable function. Prior to this, it was only called during a link 
adjust on the ar724x chipsets, but now it's going to be called by the ar91xx 
series, too.

If ag71xx_fast_reset is not suitable for use on the ar91xx series, the patch is 
going to need changes (and isolating ag71xx_fast_reset to ar724x in 
ag71xx_hw_enable may not be sufficient for the new ag71xx_restart_work_func to 
operate properly on ar91xx chips). But if it is suitable for all chipsets, 
perhaps it would be a good idea to call ag71xx_fast_reset for all chipsets in 
ag71xx_link_adjust, too?

Conn

*When testing on my device, I added some debug to log the dma stuck condition, 
just to ensure the problem was actually being triggered during testing.

diff --git 
a/target/linux/ar71xx/files/drivers/net/ethernet/atheros/ag71xx/ag71xx_main.c 
b/target/linux/ar71xx/files/drivers/net/ethernet/atheros/ag71xx/ag71xx_main.c
index 31b38d7..855b348 100644
--- 
a/target/linux/ar71xx/files/drivers/net/ethernet/atheros/ag71xx/ag71xx_main.c
+++ 
b/target/linux/ar71xx/files/drivers/net/ethernet/atheros/ag71xx/ag71xx_main.c
@@ -631,34 +631,63 @@ void ag71xx_link_adjust(struct ag71xx *ag)
                ag71xx_rr(ag, AG71XX_REG_MAC_IFCTL));
 }
+static int ag71xx_hw_enable(struct ag71xx *ag)
+{
+       int ret;
+
+       ret = ag71xx_rings_init(ag);
+       if (ret)
+               return ret;
+
+       napi_enable(&ag->napi);
+
+       ag71xx_wr(ag, AG71XX_REG_TX_DESC, ag->tx_ring.descs_dma);
+       ag71xx_wr(ag, AG71XX_REG_RX_DESC, ag->rx_ring.descs_dma);
+       ag71xx_fast_reset(ag);
+       ag71xx_hw_start(ag);
+
+       return 0;
+}
+
+static void ag71xx_hw_disable(struct ag71xx *ag)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&ag->lock, flags);
+
+       netif_stop_queue(ag->dev);
+
+       ag71xx_hw_stop(ag);
+       ag71xx_dma_reset(ag);
+
+       napi_disable(&ag->napi);
+       del_timer_sync(&ag->oom_timer);
+
+       spin_unlock_irqrestore(&ag->lock, flags);
+
+       ag71xx_rings_cleanup(ag);
+}
+
 static int ag71xx_open(struct net_device *dev)
 {
        struct ag71xx *ag = netdev_priv(dev);
        unsigned int max_frame_len;
        int ret;
+ netif_carrier_off(dev);
        max_frame_len = ag71xx_max_frame_len(dev->mtu);
        ag->rx_buf_size = max_frame_len + NET_SKB_PAD + NET_IP_ALIGN;
/* setup max frame length */
        ag71xx_wr(ag, AG71XX_REG_MAC_MFL, max_frame_len);
+       ag71xx_hw_set_macaddr(ag, dev->dev_addr);
- ret = ag71xx_rings_init(ag);
+       ret = ag71xx_hw_enable(ag);
        if (ret)
                goto err;
- napi_enable(&ag->napi);
-
-       netif_carrier_off(dev);
        ag71xx_phy_start(ag);
- ag71xx_wr(ag, AG71XX_REG_TX_DESC, ag->tx_ring.descs_dma);
-       ag71xx_wr(ag, AG71XX_REG_RX_DESC, ag->rx_ring.descs_dma);
-
-       ag71xx_hw_set_macaddr(ag, dev->dev_addr);
-
-       netif_start_queue(dev);
-
        return 0;
err:
@@ -669,24 +698,10 @@ err:
 static int ag71xx_stop(struct net_device *dev)
 {
        struct ag71xx *ag = netdev_priv(dev);
-       unsigned long flags;
netif_carrier_off(dev);
        ag71xx_phy_stop(ag);
-
-       spin_lock_irqsave(&ag->lock, flags);
-
-       netif_stop_queue(dev);
-
-       ag71xx_hw_stop(ag);
-       ag71xx_dma_reset(ag);
-
-       napi_disable(&ag->napi);
-       del_timer_sync(&ag->oom_timer);
-
-       spin_unlock_irqrestore(&ag->lock, flags);
-
-       ag71xx_rings_cleanup(ag);
+       ag71xx_hw_disable(ag);
return 0;
 }
@@ -870,14 +885,10 @@ static void ag71xx_restart_work_func(struct work_struct 
*work)
 {
        struct ag71xx *ag = container_of(work, struct ag71xx, restart_work);
- if (ag71xx_get_pdata(ag)->is_ar724x) {
-               ag->link = 0;
-               ag71xx_link_adjust(ag);
-               return;
-       }
-
-       ag71xx_stop(ag->dev);
-       ag71xx_open(ag->dev);
+       rtnl_lock();
+       ag71xx_hw_disable(ag);
+       ag71xx_hw_enable(ag);
+       rtnl_unlock();
 }
static bool ag71xx_check_dma_stuck(struct ag71xx *ag, unsigned long timestamp)
@@ -919,7 +930,7 @@ static int ag71xx_tx_packets(struct ag71xx *ag, bool flush)
                struct sk_buff *skb = ring->buf[i].skb;
if (!flush && !ag71xx_desc_empty(desc)) {
-                       if (pdata->is_ar7240 &&
+                       if (pdata->is_ar724x &&
                            ag71xx_check_dma_stuck(ag, ring->buf[i].timestamp))
                                schedule_work(&ag->restart_work);
                        break;
_______________________________________________
openwrt-devel mailing list
openwrt-devel@lists.openwrt.org
https://lists.openwrt.org/cgi-bin/mailman/listinfo/openwrt-devel

Reply via email to