Author: adrian
Date: Sun Oct 18 00:59:28 2015
New Revision: 289476
URL: https://svnweb.freebsd.org/changeset/base/289476

Log:
  if_arge: fix up TX workaround; add TX/RX requirements for busdma; add stats
  
  The early ethernet MACs (I think AR71xx and AR913x) require that both
  TX and RX require 4-byte alignment for all packets.
  
  The later MACs have started relaxing the requirements.
  
  For now, the 1-byte TX and 1-byte RX alignment requirements are only for
  the QCA955x SoCs.  I'll add in the relaxed requirements as I review the
  datasheets and do testing.
  
  * Add a hardware flags field and 1-byte / 4-byte TX/RX alignment.
  * .. defaulting to 4-byte TX and 4-byte RX alignment.
  * Only enforce the TX alignment fixup if the hardware requires a 4-byte
    TX alignment.  This avoids a call to m_defrag().
  * Add counters for various situations for further debugging.
  * Set the 1-byte and 4-byte busdma alignment requirement when
    the tag is created.
  
  This improves the straight bridging performance from 130mbit/sec
  to 180mbit/sec, purely by removing the need for TX path bounce buffers.
  
  The main performance issue is the RX alignment requirement and any RX
  bounce buffering that's occuring.  (In a local test, removing the RX
  fixup path and just aligning buffers raises the performance to above
  400mbit/sec.
  
  In theory it's a no-op for SoCs before the QCA955x.
  
  Tested:
  
  * QCA9558 SoC in AP135 board, using software bridging between arge0/arge1.

Modified:
  head/sys/mips/atheros/if_arge.c
  head/sys/mips/atheros/if_argevar.h

Modified: head/sys/mips/atheros/if_arge.c
==============================================================================
--- head/sys/mips/atheros/if_arge.c     Sat Oct 17 22:41:30 2015        
(r289475)
+++ head/sys/mips/atheros/if_arge.c     Sun Oct 18 00:59:28 2015        
(r289476)
@@ -298,6 +298,29 @@ arge_attach_sysctl(device_t dev)
                "tx_pkts_unaligned", CTLFLAG_RW, &sc->stats.tx_pkts_unaligned,
                0, "number of TX unaligned packets");
 
+       SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
+               "tx_pkts_unaligned_start", CTLFLAG_RW, 
&sc->stats.tx_pkts_unaligned_start,
+               0, "number of TX unaligned packets (start)");
+
+       SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
+               "tx_pkts_unaligned_len", CTLFLAG_RW, 
&sc->stats.tx_pkts_unaligned_len,
+               0, "number of TX unaligned packets (len)");
+
+       SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
+               "tx_pkts_nosegs", CTLFLAG_RW, &sc->stats.tx_pkts_nosegs,
+               0, "number of TX packets fail with no ring slots avail");
+
+       SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
+               "intr_stray_filter", CTLFLAG_RW, &sc->stats.intr_stray,
+               0, "number of stray interrupts (filter)");
+
+       SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
+               "intr_stray_intr", CTLFLAG_RW, &sc->stats.intr_stray2,
+               0, "number of stray interrupts (intr)");
+
+       SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
+               "intr_ok", CTLFLAG_RW, &sc->stats.intr_ok,
+               0, "number of OK interrupts");
 #ifdef ARGE_DEBUG
        SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "tx_prod",
            CTLFLAG_RW, &sc->arge_cdata.arge_tx_prod, 0, "");
@@ -627,6 +650,22 @@ arge_attach(device_t dev)
        }
 
        /*
+        * Hardware workarounds.
+        */
+       switch (ar71xx_soc) {
+       case AR71XX_SOC_QCA9556:
+       case AR71XX_SOC_QCA9558:
+               /* Arbitrary alignment */
+               sc->arge_hw_flags |= ARGE_HW_FLG_TX_DESC_ALIGN_1BYTE;
+               sc->arge_hw_flags |= ARGE_HW_FLG_RX_DESC_ALIGN_1BYTE;
+               break;
+       default:
+               sc->arge_hw_flags |= ARGE_HW_FLG_TX_DESC_ALIGN_4BYTE;
+               sc->arge_hw_flags |= ARGE_HW_FLG_RX_DESC_ALIGN_4BYTE;
+               break;
+       }
+
+       /*
         * Some units (eg the TP-Link WR-1043ND) do not have a convenient
         * EEPROM location to read the ethernet MAC address from.
         * OpenWRT simply snaffles it from a fixed location.
@@ -825,6 +864,9 @@ arge_attach(device_t dev)
        ARGE_WRITE(sc, AR71XX_MAC_FIFO_CFG0,
            FIFO_CFG0_ALL << FIFO_CFG0_ENABLE_SHIFT);
 
+       /*
+        * SoC specific bits.
+        */
        switch (ar71xx_soc) {
                case AR71XX_SOC_AR7240:
                case AR71XX_SOC_AR7241:
@@ -1351,24 +1393,35 @@ arge_init_locked(struct arge_softc *sc)
  * Return whether the mbuf chain is correctly aligned
  * for the arge TX engine.
  *
- * The TX engine requires each fragment to be aligned to a
- * 4 byte boundary and the size of each fragment except
- * the last to be a multiple of 4 bytes.
+ * All the MACs have a length requirement: any non-final
+ * fragment (ie, descriptor with MORE bit set) needs to have
+ * a length divisible by 4.
  *
- * XXX TODO: I believe this is only a bug on the AR71xx and
- * AR913x MACs. The later MACs (AR724x and later) does not
- * need this workaround.
+ * The AR71xx, AR913x require the start address also be
+ * DWORD aligned.  The later MACs don't.
  */
 static int
-arge_mbuf_chain_is_tx_aligned(struct mbuf *m0)
+arge_mbuf_chain_is_tx_aligned(struct arge_softc *sc, struct mbuf *m0)
 {
        struct mbuf *m;
 
        for (m = m0; m != NULL; m = m->m_next) {
-               if((mtod(m, intptr_t) & 3) != 0)
+               /*
+                * Only do this for chips that require it.
+                */
+               if ((sc->arge_hw_flags & ARGE_HW_FLG_TX_DESC_ALIGN_4BYTE) &&
+                   (mtod(m, intptr_t) & 3) != 0) {
+                       sc->stats.tx_pkts_unaligned_start++;
                        return 0;
-               if ((m->m_next != NULL) && ((m->m_len & 0x03) != 0))
+               }
+
+               /*
+                * All chips have this requirement for length.
+                */
+               if ((m->m_next != NULL) && ((m->m_len & 0x03) != 0)) {
+                       sc->stats.tx_pkts_unaligned_len++;
                        return 0;
+               }
        }
        return 1;
 }
@@ -1389,15 +1442,10 @@ arge_encap(struct arge_softc *sc, struct
        ARGE_LOCK_ASSERT(sc);
 
        /*
-        * Fix mbuf chain, all fragments should be 4 bytes aligned and
-        * even 4 bytes
-        *
-        * XXX TODO: I believe this is only a bug on the AR71xx and
-        * AR913x MACs. The later MACs (AR724x and later) does not
-        * need this workaround.
+        * Fix mbuf chain based on hardware alignment constraints.
         */
        m = *m_head;
-       if (! arge_mbuf_chain_is_tx_aligned(m)) {
+       if (! arge_mbuf_chain_is_tx_aligned(sc, m)) {
                sc->stats.tx_pkts_unaligned++;
                m = m_defrag(*m_head, M_NOWAIT);
                if (m == NULL) {
@@ -1427,6 +1475,7 @@ arge_encap(struct arge_softc *sc, struct
        /* Check number of available descriptors. */
        if (sc->arge_cdata.arge_tx_cnt + nsegs >= (ARGE_TX_RING_COUNT - 1)) {
                bus_dmamap_unload(sc->arge_cdata.arge_tx_tag, txd->tx_dmamap);
+               sc->stats.tx_pkts_nosegs++;
                return (ENOBUFS);
        }
 
@@ -1444,7 +1493,9 @@ arge_encap(struct arge_softc *sc, struct
                desc = &sc->arge_rdata.arge_tx_ring[prod];
                desc->packet_ctrl = ARGE_DMASIZE(txsegs[i].ds_len);
 
-               if (txsegs[i].ds_addr & 3)
+               /* XXX Note: only relevant for older MACs; but check length! */
+               if ((sc->arge_hw_flags & ARGE_HW_FLG_TX_DESC_ALIGN_4BYTE) &&
+                   (txsegs[i].ds_addr & 3))
                        panic("TX packet address unaligned\n");
 
                desc->packet_addr = txsegs[i].ds_addr;
@@ -1715,6 +1766,16 @@ arge_dma_alloc(struct arge_softc *sc)
        struct arge_txdesc      *txd;
        struct arge_rxdesc      *rxd;
        int                     error, i;
+       int                     arge_tx_align, arge_rx_align;
+
+       /* Assume 4 byte alignment by default */
+       arge_tx_align = 4;
+       arge_rx_align = 4;
+
+       if (sc->arge_hw_flags & ARGE_HW_FLG_TX_DESC_ALIGN_1BYTE)
+               arge_tx_align = 1;
+       if (sc->arge_hw_flags & ARGE_HW_FLG_RX_DESC_ALIGN_1BYTE)
+               arge_rx_align = 1;
 
        /* Create parent DMA tag. */
        error = bus_dma_tag_create(
@@ -1775,7 +1836,7 @@ arge_dma_alloc(struct arge_softc *sc)
        /* Create tag for Tx buffers. */
        error = bus_dma_tag_create(
            sc->arge_cdata.arge_parent_tag,     /* parent */
-           sizeof(uint32_t), 0,        /* alignment, boundary */
+           arge_tx_align, 0,           /* alignment, boundary */
            BUS_SPACE_MAXADDR,          /* lowaddr */
            BUS_SPACE_MAXADDR,          /* highaddr */
            NULL, NULL,                 /* filter, filterarg */
@@ -1793,7 +1854,7 @@ arge_dma_alloc(struct arge_softc *sc)
        /* Create tag for Rx buffers. */
        error = bus_dma_tag_create(
            sc->arge_cdata.arge_parent_tag,     /* parent */
-           ARGE_RX_ALIGN, 0,           /* alignment, boundary */
+           arge_rx_align, 0,           /* alignment, boundary */
            BUS_SPACE_MAXADDR,          /* lowaddr */
            BUS_SPACE_MAXADDR,          /* highaddr */
            NULL, NULL,                 /* filter, filterarg */
@@ -2108,6 +2169,11 @@ arge_newbuf(struct arge_softc *sc, int i
        if (m == NULL)
                return (ENOBUFS);
        m->m_len = m->m_pkthdr.len = MCLBYTES;
+
+       /*
+        * Add extra space to "adjust" (copy) the packet back to be aligned
+        * for purposes of IPv4/IPv6 header contents.
+        */
        m_adj(m, sizeof(uint64_t));
 
        if (bus_dmamap_load_mbuf_sg(sc->arge_cdata.arge_rx_tag,
@@ -2126,7 +2192,8 @@ arge_newbuf(struct arge_softc *sc, int i
        sc->arge_cdata.arge_rx_sparemap = map;
        rxd->rx_m = m;
        desc = rxd->desc;
-       if (segs[0].ds_addr & 3)
+       if ((sc->arge_hw_flags & ARGE_HW_FLG_RX_DESC_ALIGN_4BYTE) &&
+           segs[0].ds_addr & 3)
                panic("RX packet address unaligned");
        desc->packet_addr = segs[0].ds_addr;
        desc->packet_ctrl = ARGE_DESC_EMPTY | ARGE_DMASIZE(segs[0].ds_len);
@@ -2331,10 +2398,12 @@ arge_intr_filter(void *arg)
        if (status & DMA_INTR_ALL) {
                sc->arge_intr_status |= status;
                ARGE_WRITE(sc, AR71XX_DMA_INTR, 0);
+               sc->stats.intr_ok++;
                return (FILTER_SCHEDULE_THREAD);
        }
 
        sc->arge_intr_status = 0;
+       sc->stats.intr_stray++;
        return (FILTER_STRAY);
 }
 
@@ -2355,8 +2424,10 @@ arge_intr(void *arg)
        /*
         * Is it our interrupt at all?
         */
-       if (status == 0)
+       if (status == 0) {
+               sc->stats.intr_stray2++;
                return;
+       }
 
        if (status & DMA_INTR_RX_BUS_ERROR) {
                ARGE_WRITE(sc, AR71XX_DMA_RX_STATUS, DMA_RX_STATUS_BUS_ERROR);

Modified: head/sys/mips/atheros/if_argevar.h
==============================================================================
--- head/sys/mips/atheros/if_argevar.h  Sat Oct 17 22:41:30 2015        
(r289475)
+++ head/sys/mips/atheros/if_argevar.h  Sun Oct 18 00:59:28 2015        
(r289476)
@@ -37,7 +37,10 @@
 #define        ARGE_TX_DMA_SIZE        ARGE_TX_RING_COUNT * sizeof(struct 
arge_desc)
 #define        ARGE_MAXFRAGS           8
 #define ARGE_RING_ALIGN                sizeof(struct arge_desc)
-#define ARGE_RX_ALIGN          sizeof(uint32_t)
+#define ARGE_RX_ALIGN_4BYTE    sizeof(uint32_t)
+#define ARGE_RX_ALIGN_1BYTE    sizeof(char)
+#define ARGE_TX_ALIGN_4BYTE    sizeof(uint32_t)
+#define ARGE_TX_ALIGN_1BYTE    sizeof(char)
 #define ARGE_MAXFRAGS          8
 #define        ARGE_TX_RING_ADDR(sc, i)        \
     ((sc)->arge_rdata.arge_tx_ring_paddr + sizeof(struct arge_desc) * (i))
@@ -149,6 +152,22 @@ struct arge_pll_data {
        uint32_t pll_1000;
 };
 
+/*
+ * Hardware specific behaviours.
+ */
+
+/*
+ * Older chips support 4 byte only transmit and receive
+ * addresses.
+ *
+ * Later chips support arbitrary TX and later later,
+ * arbitrary RX addresses.
+ */
+#define        ARGE_HW_FLG_TX_DESC_ALIGN_4BYTE 0x00000001
+#define        ARGE_HW_FLG_RX_DESC_ALIGN_4BYTE 0x00000002
+#define        ARGE_HW_FLG_TX_DESC_ALIGN_1BYTE 0x00000004
+#define        ARGE_HW_FLG_RX_DESC_ALIGN_1BYTE 0x00000008
+
 struct arge_softc {
        struct ifnet            *arge_ifp;      /* interface info */
        device_t                arge_dev;
@@ -180,13 +199,20 @@ struct arge_softc {
        uint32_t                arge_intr_status;
        int                     arge_mac_unit;
        int                     arge_if_flags;
+       uint32_t                arge_hw_flags;
        uint32_t                arge_debug;
        uint32_t                arge_mdiofreq;
        struct {
                uint32_t        tx_pkts_unaligned;
+               uint32_t        tx_pkts_unaligned_start;
+               uint32_t        tx_pkts_unaligned_len;
+               uint32_t        tx_pkts_nosegs;
                uint32_t        tx_pkts_aligned;
                uint32_t        rx_overflow;
                uint32_t        tx_underflow;
+               uint32_t        intr_stray;
+               uint32_t        intr_stray2;
+               uint32_t        intr_ok;
        } stats;
 };
 
_______________________________________________
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to