Author: adrian
Date: Thu May 21 04:35:12 2020
New Revision: 361321
URL: https://svnweb.freebsd.org/changeset/base/361321

Log:
  [ath] Hopefully recover better-er upon RX restart on AR9380.
  
  This is all very long-standing bug stuff that is touchy and still poorly
  documented. Ok, here goes.
  
  The basic bug:
  
  * deleting a VAP causes the RX path (and TX path too) to be restarted
    without a full chip reset, which causes RX hangs on the AR9380 and later.
    (ie, the ones with the newer DMA engine.)
  
  The basic fix:
  
  * do an RX flush when stopping RX in ath_vap_delete() to match what happens
    when RX is stopped elsewhere.  This ensures any pending frames are completed
    and we restart at the right spot; it also ensures we don't push new RX 
buffers
    into the hardware if we're stopping receive.
  
  The other issues I found:
  
  * Don't bother checking the RX packet ring in the deferred read taskqueue;
    that's specifically supposed to be for completing frames rather than
    just yanking them off the receive ring.
  
  * Cancel/drain any pending deferred read taskqueue.  This isn't done inside
    any locks so we should be super careful here.  This stops the hardware
    being reprogrammed at the same time in another thread/CPU whilst we're
    stopping RX.
  
  * .. (yes, this should be better serialised, but that's for another day. 
maybe.)
  
  * Add more debugging to trace what's going on here.
  
  And the fun bit:
  
  * Reinitialise the RX FIFO ONLY if we've been reset or stopped, rather than 
just
    reset.  I noticed that after all the above was done I was STILL seeing 
RXEOL.
    RXEOL isn't enabled on the AR9380 so I'd only see it if I was sending TX 
frames
    (ie a ping where it'd be transmitted but never received) so I was not being
    spammed by RXEOL.  So, as long as stuff is stopped, restart it.
  
  This seems to be doing the right thing in both AP and STA modes.
  
  What I should do next, if I ever get time:
  
  * as I said above, serialise the receive stop/start to include taskqueues
  * monitor RXEOL on the AR9380 and I keep seeing it spammed / lockups, just
    go do a full chip reset to get things back on track. It sucks, but it
    is better than nothing.
  
  Tested:
  
  * AR9380 AP/STA mode, adding/deleting a hostap VAP to trigger the TX/RX
    queue stop/start; whilst also running an iperf through it.  Lots of times.
    Lots.  Of.. Times.

Modified:
  head/sys/dev/ath/if_ath.c
  head/sys/dev/ath/if_ath_rx_edma.c

Modified: head/sys/dev/ath/if_ath.c
==============================================================================
--- head/sys/dev/ath/if_ath.c   Thu May 21 04:26:20 2020        (r361320)
+++ head/sys/dev/ath/if_ath.c   Thu May 21 04:35:12 2020        (r361321)
@@ -1801,6 +1801,7 @@ ath_vap_delete(struct ieee80211vap *vap)
                ath_hal_intrset(ah, 0);         /* disable interrupts */
                /* XXX Do all frames from all vaps/nodes need draining here? */
                ath_stoprecv(sc, 1);            /* stop recv side */
+               ath_rx_flush(sc);
                ath_draintxq(sc, ATH_RESET_DEFAULT);            /* stop hw xmit 
side */
        }
 

Modified: head/sys/dev/ath/if_ath_rx_edma.c
==============================================================================
--- head/sys/dev/ath/if_ath_rx_edma.c   Thu May 21 04:26:20 2020        
(r361320)
+++ head/sys/dev/ath/if_ath_rx_edma.c   Thu May 21 04:35:12 2020        
(r361321)
@@ -162,6 +162,9 @@ ath_edma_stoprecv(struct ath_softc *sc, int dodelay)
 {
        struct ath_hal *ah = sc->sc_ah;
 
+       DPRINTF(sc, ATH_DEBUG_EDMA_RX, "%s: called, dodelay=%d\n",
+           __func__, dodelay);
+
        ATH_RX_LOCK(sc);
 
        ath_hal_stoppcurecv(ah);
@@ -191,6 +194,8 @@ ath_edma_stoprecv(struct ath_softc *sc, int dodelay)
                sc->sc_rxedma[HAL_RX_QUEUE_LP].m_rxpending = NULL;
        }
        ATH_RX_UNLOCK(sc);
+
+       DPRINTF(sc, ATH_DEBUG_EDMA_RX, "%s: done\n", __func__);
 }
 
 /*
@@ -205,6 +210,8 @@ ath_edma_reinit_fifo(struct ath_softc *sc, HAL_RX_QUEU
        struct ath_buf *bf;
        int i, j;
 
+       DPRINTF(sc, ATH_DEBUG_EDMA_RX, "%s: called\n", __func__);
+
        ATH_RX_LOCK_ASSERT(sc);
 
        i = re->m_fifo_head;
@@ -227,6 +234,7 @@ ath_edma_reinit_fifo(struct ath_softc *sc, HAL_RX_QUEU
                    i,
                    re->m_fifo_tail);
        }
+       DPRINTF(sc, ATH_DEBUG_EDMA_RX, "%s: done\n", __func__);
 }
 
 /*
@@ -237,6 +245,10 @@ ath_edma_startrecv(struct ath_softc *sc)
 {
        struct ath_hal *ah = sc->sc_ah;
 
+       DPRINTF(sc, ATH_DEBUG_EDMA_RX,
+           "%s: called; resetted=%d, stopped=%d\n", __func__,
+           sc->sc_rx_resetted, sc->sc_rx_stopped);
+
        ATH_RX_LOCK(sc);
 
        /*
@@ -252,7 +264,7 @@ ath_edma_startrecv(struct ath_softc *sc)
        /*
         * In theory the hardware has been initialised, right?
         */
-       if (sc->sc_rx_resetted == 1) {
+       if (sc->sc_rx_resetted == 1 || sc->sc_rx_stopped == 1) {
                DPRINTF(sc, ATH_DEBUG_EDMA_RX,
                    "%s: Re-initing HP FIFO\n", __func__);
                ath_edma_reinit_fifo(sc, HAL_RX_QUEUE_HP);
@@ -262,8 +274,11 @@ ath_edma_startrecv(struct ath_softc *sc)
                sc->sc_rx_resetted = 0;
        } else {
                device_printf(sc->sc_dev,
-                   "%s: called without resetting chip?\n",
-                   __func__);
+                   "%s: called without resetting chip? "
+                   "resetted=%d, stopped=%d\n",
+                   __func__,
+                   sc->sc_rx_resetted,
+                   sc->sc_rx_stopped);
        }
 
        /* Add up to m_fifolen entries in each queue */
@@ -290,6 +305,7 @@ ath_edma_startrecv(struct ath_softc *sc)
        sc->sc_rx_stopped = 0;
 
        ATH_RX_UNLOCK(sc);
+       DPRINTF(sc, ATH_DEBUG_EDMA_RX, "%s: ready\n", __func__);
 
        return (0);
 }
@@ -298,6 +314,8 @@ static void
 ath_edma_recv_sched_queue(struct ath_softc *sc, HAL_RX_QUEUE qtype,
     int dosched)
 {
+       DPRINTF(sc, ATH_DEBUG_EDMA_RX, "%s: called; qtype=%d, dosched=%d\n",
+           __func__, qtype, dosched);
 
        ATH_LOCK(sc);
        ath_power_set_power_state(sc, HAL_PM_AWAKE);
@@ -309,13 +327,19 @@ ath_edma_recv_sched_queue(struct ath_softc *sc, HAL_RX
        ath_power_restore_power_state(sc);
        ATH_UNLOCK(sc);
 
+       /* XXX TODO: methodize */
        taskqueue_enqueue(sc->sc_tq, &sc->sc_rxtask);
+
+       DPRINTF(sc, ATH_DEBUG_EDMA_RX, "%s: done\n", __func__);
 }
 
 static void
 ath_edma_recv_sched(struct ath_softc *sc, int dosched)
 {
 
+       DPRINTF(sc, ATH_DEBUG_EDMA_RX, "%s: called; dosched=%d\n",
+           __func__, dosched);
+
        ATH_LOCK(sc);
        ath_power_set_power_state(sc, HAL_PM_AWAKE);
        ATH_UNLOCK(sc);
@@ -327,19 +351,27 @@ ath_edma_recv_sched(struct ath_softc *sc, int dosched)
        ath_power_restore_power_state(sc);
        ATH_UNLOCK(sc);
 
+       /* XXX TODO: methodize */
        taskqueue_enqueue(sc->sc_tq, &sc->sc_rxtask);
+
+       DPRINTF(sc, ATH_DEBUG_EDMA_RX, "%s: done\n", __func__);
 }
 
 static void
 ath_edma_recv_flush(struct ath_softc *sc)
 {
 
-       DPRINTF(sc, ATH_DEBUG_RECV, "%s: called\n", __func__);
+       DPRINTF(sc, ATH_DEBUG_RECV | ATH_DEBUG_EDMA_RX, "%s: called\n", 
__func__);
 
        ATH_PCU_LOCK(sc);
        sc->sc_rxproc_cnt++;
        ATH_PCU_UNLOCK(sc);
 
+       // XXX TODO: methodize; make it an RX stop/block
+       while (taskqueue_cancel(sc->sc_tq, &sc->sc_rxtask, NULL) != 0) {
+               taskqueue_drain(sc->sc_tq, &sc->sc_rxtask);
+       }
+
        ATH_LOCK(sc);
        ath_power_set_power_state(sc, HAL_PM_AWAKE);
        ATH_UNLOCK(sc);
@@ -368,6 +400,8 @@ ath_edma_recv_flush(struct ath_softc *sc)
        ATH_PCU_LOCK(sc);
        sc->sc_rxproc_cnt--;
        ATH_PCU_UNLOCK(sc);
+
+       DPRINTF(sc, ATH_DEBUG_RECV | ATH_DEBUG_EDMA_RX, "%s: done\n", __func__);
 }
 
 /*
@@ -391,6 +425,8 @@ ath_edma_recv_proc_queue(struct ath_softc *sc, HAL_RX_
        nf = ath_hal_getchannoise(ah, sc->sc_curchan);
        sc->sc_stats.ast_rx_noise = nf;
 
+       DPRINTF(sc, ATH_DEBUG_EDMA_RX, "%s: called; qtype=%d, dosched=%d\n", 
__func__, qtype, dosched);
+
        ATH_RX_LOCK(sc);
 
 #if 1
@@ -608,9 +644,6 @@ ath_edma_recv_tasklet(void *arg, int npending)
        ath_power_set_power_state(sc, HAL_PM_AWAKE);
        ATH_UNLOCK(sc);
 
-       ath_edma_recv_proc_queue(sc, HAL_RX_QUEUE_HP, 1);
-       ath_edma_recv_proc_queue(sc, HAL_RX_QUEUE_LP, 1);
-
        ath_edma_recv_proc_deferred_queue(sc, HAL_RX_QUEUE_HP, 1);
        ath_edma_recv_proc_deferred_queue(sc, HAL_RX_QUEUE_LP, 1);
 
@@ -632,6 +665,8 @@ ath_edma_recv_tasklet(void *arg, int npending)
        ATH_PCU_LOCK(sc);
        sc->sc_rxproc_cnt--;
        ATH_PCU_UNLOCK(sc);
+
+       DPRINTF(sc, ATH_DEBUG_EDMA_RX, "%s: called; done!\n", __func__);
 }
 
 /*
_______________________________________________
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to