Author: yongari
Date: Wed Nov 16 19:25:26 2011
New Revision: 227582
URL: http://svn.freebsd.org/changeset/base/227582

Log:
  Enable 64bit DMA addressing support for all msk(4) controllers.
  Unnecessarily complex LE format used on Marvell controller was
  main reason not to enable 64bit DMA addressing in driver.  If high
  32bit address of DMA address of TX/RX buffer is changed, driver has
  to generate a new LE.  In TX path, driver will keep track of lastly
  used high 32bit address of DMA address and generate a new LE
  whenever it sees high address change in the DMA address. In RX path,
  driver will always use two LEs to specify 64bit DMA address of RX
  buffer.  If the high 32bit address of DMA address of RX buffer is
  the same as previous DMA address of RX buffer, driver does not have
  to use two LEs but driver will use two LEs for simplicity in RX
  ring management.
  
  One of draw back for switching to 64bit DMA addressing is that the
  large amount of LEs are used to specify 64bit DMA address such that
  number of available LEs for TX/RX buffers are considerably reduced.
  To mitigate the issue, increase number of available LEs from 256 to
  384 for TX and from 256 to 512 for RX. For 32bit architectures,
  msk(4) does not use 64bit DMA addressing to save resources.
  
  Tested by:    das

Modified:
  head/sys/dev/msk/if_msk.c
  head/sys/dev/msk/if_mskreg.h

Modified: head/sys/dev/msk/if_msk.c
==============================================================================
--- head/sys/dev/msk/if_msk.c   Wed Nov 16 19:06:55 2011        (r227581)
+++ head/sys/dev/msk/if_msk.c   Wed Nov 16 19:25:26 2011        (r227582)
@@ -700,7 +700,7 @@ msk_init_rx_ring(struct msk_if_softc *sc
 {
        struct msk_ring_data *rd;
        struct msk_rxdesc *rxd;
-       int i, prod;
+       int i, nbuf, prod;
 
        MSK_IF_LOCK_ASSERT(sc_if);
 
@@ -710,11 +710,18 @@ msk_init_rx_ring(struct msk_if_softc *sc
 
        rd = &sc_if->msk_rdata;
        bzero(rd->msk_rx_ring, sizeof(struct msk_rx_desc) * MSK_RX_RING_CNT);
-       prod = sc_if->msk_cdata.msk_rx_prod;
-       i = 0;
+       for (i = prod = 0; i < MSK_RX_RING_CNT; i++) {
+               rxd = &sc_if->msk_cdata.msk_rxdesc[prod];
+               rxd->rx_m = NULL;
+               rxd->rx_le = &rd->msk_rx_ring[prod];
+               MSK_INC(prod, MSK_RX_RING_CNT);
+       }
+       nbuf = MSK_RX_BUF_CNT;
+       prod = 0;
        /* Have controller know how to compute Rx checksum. */
        if ((sc_if->msk_flags & MSK_FLAG_DESCV2) == 0 &&
            (sc_if->msk_ifp->if_capenable & IFCAP_RXCSUM) != 0) {
+#ifdef MSK_64BIT_DMA
                rxd = &sc_if->msk_cdata.msk_rxdesc[prod];
                rxd->rx_m = NULL;
                rxd->rx_le = &rd->msk_rx_ring[prod];
@@ -723,15 +730,21 @@ msk_init_rx_ring(struct msk_if_softc *sc
                rxd->rx_le->msk_control = htole32(OP_TCPSTART | HW_OWNER);
                MSK_INC(prod, MSK_RX_RING_CNT);
                MSK_INC(sc_if->msk_cdata.msk_rx_cons, MSK_RX_RING_CNT);
-               i++;
-       }
-       for (; i < MSK_RX_RING_CNT; i++) {
+#endif
                rxd = &sc_if->msk_cdata.msk_rxdesc[prod];
                rxd->rx_m = NULL;
                rxd->rx_le = &rd->msk_rx_ring[prod];
+               rxd->rx_le->msk_addr = htole32(ETHER_HDR_LEN << 16 |
+                   ETHER_HDR_LEN);
+               rxd->rx_le->msk_control = htole32(OP_TCPSTART | HW_OWNER);
+               MSK_INC(prod, MSK_RX_RING_CNT);
+               MSK_INC(sc_if->msk_cdata.msk_rx_cons, MSK_RX_RING_CNT);
+               nbuf--;
+       }
+       for (i = 0; i < nbuf; i++) {
                if (msk_newbuf(sc_if, prod) != 0)
                        return (ENOBUFS);
-               MSK_INC(prod, MSK_RX_RING_CNT);
+               MSK_RX_INC(prod, MSK_RX_RING_CNT);
        }
 
        bus_dmamap_sync(sc_if->msk_cdata.msk_rx_ring_tag,
@@ -739,10 +752,11 @@ msk_init_rx_ring(struct msk_if_softc *sc
            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 
        /* Update prefetch unit. */
-       sc_if->msk_cdata.msk_rx_prod = MSK_RX_RING_CNT - 1;
+       sc_if->msk_cdata.msk_rx_prod = prod;
        CSR_WRITE_2(sc_if->msk_softc,
            Y2_PREF_Q_ADDR(sc_if->msk_rxq, PREF_UNIT_PUT_IDX_REG),
-           sc_if->msk_cdata.msk_rx_prod);
+           (sc_if->msk_cdata.msk_rx_prod + MSK_RX_RING_CNT - 1) %
+           MSK_RX_RING_CNT);
        if (msk_rx_fill(sc_if, 0) != 0)
                return (ENOBUFS);
        return (0);
@@ -753,7 +767,7 @@ msk_init_jumbo_rx_ring(struct msk_if_sof
 {
        struct msk_ring_data *rd;
        struct msk_rxdesc *rxd;
-       int i, prod;
+       int i, nbuf, prod;
 
        MSK_IF_LOCK_ASSERT(sc_if);
 
@@ -764,11 +778,18 @@ msk_init_jumbo_rx_ring(struct msk_if_sof
        rd = &sc_if->msk_rdata;
        bzero(rd->msk_jumbo_rx_ring,
            sizeof(struct msk_rx_desc) * MSK_JUMBO_RX_RING_CNT);
-       prod = sc_if->msk_cdata.msk_rx_prod;
-       i = 0;
+       for (i = prod = 0; i < MSK_JUMBO_RX_RING_CNT; i++) {
+               rxd = &sc_if->msk_cdata.msk_jumbo_rxdesc[prod];
+               rxd->rx_m = NULL;
+               rxd->rx_le = &rd->msk_jumbo_rx_ring[prod];
+               MSK_INC(prod, MSK_JUMBO_RX_RING_CNT);
+       }
+       nbuf = MSK_RX_BUF_CNT;
+       prod = 0;
        /* Have controller know how to compute Rx checksum. */
        if ((sc_if->msk_flags & MSK_FLAG_DESCV2) == 0 &&
            (sc_if->msk_ifp->if_capenable & IFCAP_RXCSUM) != 0) {
+#ifdef MSK_64BIT_DMA
                rxd = &sc_if->msk_cdata.msk_jumbo_rxdesc[prod];
                rxd->rx_m = NULL;
                rxd->rx_le = &rd->msk_jumbo_rx_ring[prod];
@@ -777,25 +798,33 @@ msk_init_jumbo_rx_ring(struct msk_if_sof
                rxd->rx_le->msk_control = htole32(OP_TCPSTART | HW_OWNER);
                MSK_INC(prod, MSK_JUMBO_RX_RING_CNT);
                MSK_INC(sc_if->msk_cdata.msk_rx_cons, MSK_JUMBO_RX_RING_CNT);
-               i++;
-       }
-       for (; i < MSK_JUMBO_RX_RING_CNT; i++) {
+#endif
                rxd = &sc_if->msk_cdata.msk_jumbo_rxdesc[prod];
                rxd->rx_m = NULL;
                rxd->rx_le = &rd->msk_jumbo_rx_ring[prod];
+               rxd->rx_le->msk_addr = htole32(ETHER_HDR_LEN << 16 |
+                   ETHER_HDR_LEN);
+               rxd->rx_le->msk_control = htole32(OP_TCPSTART | HW_OWNER);
+               MSK_INC(prod, MSK_JUMBO_RX_RING_CNT);
+               MSK_INC(sc_if->msk_cdata.msk_rx_cons, MSK_JUMBO_RX_RING_CNT);
+               nbuf--;
+       }
+       for (i = 0; i < nbuf; i++) {
                if (msk_jumbo_newbuf(sc_if, prod) != 0)
                        return (ENOBUFS);
-               MSK_INC(prod, MSK_JUMBO_RX_RING_CNT);
+               MSK_RX_INC(prod, MSK_JUMBO_RX_RING_CNT);
        }
 
        bus_dmamap_sync(sc_if->msk_cdata.msk_jumbo_rx_ring_tag,
            sc_if->msk_cdata.msk_jumbo_rx_ring_map,
            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 
-       sc_if->msk_cdata.msk_rx_prod = MSK_JUMBO_RX_RING_CNT - 1;
+       /* Update prefetch unit. */
+       sc_if->msk_cdata.msk_rx_prod = prod;
        CSR_WRITE_2(sc_if->msk_softc,
            Y2_PREF_Q_ADDR(sc_if->msk_rxq, PREF_UNIT_PUT_IDX_REG),
-           sc_if->msk_cdata.msk_rx_prod);
+           (sc_if->msk_cdata.msk_rx_prod + MSK_JUMBO_RX_RING_CNT - 1) %
+           MSK_JUMBO_RX_RING_CNT);
        if (msk_rx_fill(sc_if, 1) != 0)
                return (ENOBUFS);
        return (0);
@@ -813,6 +842,7 @@ msk_init_tx_ring(struct msk_if_softc *sc
        sc_if->msk_cdata.msk_tx_prod = 0;
        sc_if->msk_cdata.msk_tx_cons = 0;
        sc_if->msk_cdata.msk_tx_cnt = 0;
+       sc_if->msk_cdata.msk_tx_high_addr = 0;
 
        rd = &sc_if->msk_rdata;
        bzero(rd->msk_tx_ring, sizeof(struct msk_tx_desc) * MSK_TX_RING_CNT);
@@ -834,6 +864,12 @@ msk_discard_rxbuf(struct msk_if_softc *s
        struct msk_rxdesc *rxd;
        struct mbuf *m;
 
+#ifdef MSK_64BIT_DMA
+       rxd = &sc_if->msk_cdata.msk_rxdesc[idx];
+       rx_le = rxd->rx_le;
+       rx_le->msk_control = htole32(OP_ADDR64 | HW_OWNER);
+       MSK_INC(idx, MSK_RX_RING_CNT);
+#endif
        rxd = &sc_if->msk_cdata.msk_rxdesc[idx];
        m = rxd->rx_m;
        rx_le = rxd->rx_le;
@@ -847,6 +883,12 @@ msk_discard_jumbo_rxbuf(struct msk_if_so
        struct msk_rxdesc *rxd;
        struct mbuf *m;
 
+#ifdef MSK_64BIT_DMA
+       rxd = &sc_if->msk_cdata.msk_jumbo_rxdesc[idx];
+       rx_le = rxd->rx_le;
+       rx_le->msk_control = htole32(OP_ADDR64 | HW_OWNER);
+       MSK_INC(idx, MSK_JUMBO_RX_RING_CNT);
+#endif
        rxd = &sc_if->msk_cdata.msk_jumbo_rxdesc[idx];
        m = rxd->rx_m;
        rx_le = rxd->rx_le;
@@ -884,10 +926,18 @@ msk_newbuf(struct msk_if_softc *sc_if, i
        KASSERT(nsegs == 1, ("%s: %d segments returned!", __func__, nsegs));
 
        rxd = &sc_if->msk_cdata.msk_rxdesc[idx];
+#ifdef MSK_64BIT_DMA
+       rx_le = rxd->rx_le;
+       rx_le->msk_addr = htole32(MSK_ADDR_HI(segs[0].ds_addr));
+       rx_le->msk_control = htole32(OP_ADDR64 | HW_OWNER);
+       MSK_INC(idx, MSK_RX_RING_CNT);
+       rxd = &sc_if->msk_cdata.msk_rxdesc[idx];
+#endif
        if (rxd->rx_m != NULL) {
                bus_dmamap_sync(sc_if->msk_cdata.msk_rx_tag, rxd->rx_dmamap,
                    BUS_DMASYNC_POSTREAD);
                bus_dmamap_unload(sc_if->msk_cdata.msk_rx_tag, rxd->rx_dmamap);
+               rxd->rx_m = NULL;
        }
        map = rxd->rx_dmamap;
        rxd->rx_dmamap = sc_if->msk_cdata.msk_rx_sparemap;
@@ -937,11 +987,19 @@ msk_jumbo_newbuf(struct msk_if_softc *sc
        KASSERT(nsegs == 1, ("%s: %d segments returned!", __func__, nsegs));
 
        rxd = &sc_if->msk_cdata.msk_jumbo_rxdesc[idx];
+#ifdef MSK_64BIT_DMA
+       rx_le = rxd->rx_le;
+       rx_le->msk_addr = htole32(MSK_ADDR_HI(segs[0].ds_addr));
+       rx_le->msk_control = htole32(OP_ADDR64 | HW_OWNER);
+       MSK_INC(idx, MSK_JUMBO_RX_RING_CNT);
+       rxd = &sc_if->msk_cdata.msk_jumbo_rxdesc[idx];
+#endif
        if (rxd->rx_m != NULL) {
                bus_dmamap_sync(sc_if->msk_cdata.msk_jumbo_rx_tag,
                    rxd->rx_dmamap, BUS_DMASYNC_POSTREAD);
                bus_dmamap_unload(sc_if->msk_cdata.msk_jumbo_rx_tag,
                    rxd->rx_dmamap);
+               rxd->rx_m = NULL;
        }
        map = rxd->rx_dmamap;
        rxd->rx_dmamap = sc_if->msk_cdata.msk_jumbo_rx_sparemap;
@@ -1472,7 +1530,7 @@ mskc_reset(struct msk_softc *sc)
 
        /* Clear status list. */
        bzero(sc->msk_stat_ring,
-           sizeof(struct msk_stat_desc) * MSK_STAT_RING_CNT);
+           sizeof(struct msk_stat_desc) * sc->msk_stat_count);
        sc->msk_stat_cons = 0;
        bus_dmamap_sync(sc->msk_stat_tag, sc->msk_stat_map,
            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
@@ -1483,7 +1541,7 @@ mskc_reset(struct msk_softc *sc)
        CSR_WRITE_4(sc, STAT_LIST_ADDR_LO, MSK_ADDR_LO(addr));
        CSR_WRITE_4(sc, STAT_LIST_ADDR_HI, MSK_ADDR_HI(addr));
        /* Set the status list last index. */
-       CSR_WRITE_2(sc, STAT_LAST_IDX, MSK_STAT_RING_CNT - 1);
+       CSR_WRITE_2(sc, STAT_LAST_IDX, sc->msk_stat_count - 1);
        if (sc->msk_hw_id == CHIP_ID_YUKON_EC &&
            sc->msk_hw_rev == CHIP_REV_YU_EC_A1) {
                /* WA for dev. #4.3 */
@@ -2083,17 +2141,29 @@ static int
 msk_status_dma_alloc(struct msk_softc *sc)
 {
        struct msk_dmamap_arg ctx;
-       int error;
+       bus_size_t stat_sz;
+       int count, error;
 
+       /*
+        * It seems controller requires number of status LE entries
+        * is power of 2 and the maximum number of status LE entries
+        * is 4096.  For dual-port controllers, the number of status
+        * LE entries should be large enough to hold both port's
+        * status updates.
+        */
+       count = 3 * MSK_RX_RING_CNT + MSK_TX_RING_CNT;
+       count = imin(4096, roundup2(count, 1024));
+       sc->msk_stat_count = count;
+       stat_sz = count * sizeof(struct msk_stat_desc);
        error = bus_dma_tag_create(
                    bus_get_dma_tag(sc->msk_dev),       /* parent */
                    MSK_STAT_ALIGN, 0,          /* alignment, boundary */
                    BUS_SPACE_MAXADDR,          /* lowaddr */
                    BUS_SPACE_MAXADDR,          /* highaddr */
                    NULL, NULL,                 /* filter, filterarg */
-                   MSK_STAT_RING_SZ,           /* maxsize */
+                   stat_sz,                    /* maxsize */
                    1,                          /* nsegments */
-                   MSK_STAT_RING_SZ,           /* maxsegsize */
+                   stat_sz,                    /* maxsegsize */
                    0,                          /* flags */
                    NULL, NULL,                 /* lockfunc, lockarg */
                    &sc->msk_stat_tag);
@@ -2114,9 +2184,8 @@ msk_status_dma_alloc(struct msk_softc *s
        }
 
        ctx.msk_busaddr = 0;
-       error = bus_dmamap_load(sc->msk_stat_tag,
-           sc->msk_stat_map, sc->msk_stat_ring, MSK_STAT_RING_SZ,
-           msk_dmamap_cb, &ctx, 0);
+       error = bus_dmamap_load(sc->msk_stat_tag, sc->msk_stat_map,
+           sc->msk_stat_ring, stat_sz, msk_dmamap_cb, &ctx, BUS_DMA_NOWAIT);
        if (error != 0) {
                device_printf(sc->msk_dev,
                    "failed to load DMA'able memory for status ring\n");
@@ -2157,27 +2226,10 @@ msk_txrx_dma_alloc(struct msk_if_softc *
        int error, i;
 
        /* Create parent DMA tag. */
-       /*
-        * XXX
-        * It seems that Yukon II supports full 64bits DMA operations. But
-        * it needs two descriptors(list elements) for 64bits DMA operations.
-        * Since we don't know what DMA address mappings(32bits or 64bits)
-        * would be used in advance for each mbufs, we limits its DMA space
-        * to be in range of 32bits address space. Otherwise, we should check
-        * what DMA address is used and chain another descriptor for the
-        * 64bits DMA operation. This also means descriptor ring size is
-        * variable. Limiting DMA address to be in 32bit address space greatly
-        * simplifies descriptor handling and possibly would increase
-        * performance a bit due to efficient handling of descriptors.
-        * Apart from harassing checksum offloading mechanisms, it seems
-        * it's really bad idea to use a separate descriptor for 64bit
-        * DMA operation to save small descriptor memory. Anyway, I've
-        * never seen these exotic scheme on ethernet interface hardware.
-        */
        error = bus_dma_tag_create(
                    bus_get_dma_tag(sc_if->msk_if_dev), /* parent */
                    1, 0,                       /* alignment, boundary */
-                   BUS_SPACE_MAXADDR_32BIT,    /* lowaddr */
+                   BUS_SPACE_MAXADDR,          /* lowaddr */
                    BUS_SPACE_MAXADDR,          /* highaddr */
                    NULL, NULL,                 /* filter, filterarg */
                    BUS_SPACE_MAXSIZE_32BIT,    /* maxsize */
@@ -2283,7 +2335,7 @@ msk_txrx_dma_alloc(struct msk_if_softc *
        ctx.msk_busaddr = 0;
        error = bus_dmamap_load(sc_if->msk_cdata.msk_tx_ring_tag,
            sc_if->msk_cdata.msk_tx_ring_map, sc_if->msk_rdata.msk_tx_ring,
-           MSK_TX_RING_SZ, msk_dmamap_cb, &ctx, 0);
+           MSK_TX_RING_SZ, msk_dmamap_cb, &ctx, BUS_DMA_NOWAIT);
        if (error != 0) {
                device_printf(sc_if->msk_if_dev,
                    "failed to load DMA'able memory for Tx ring\n");
@@ -2304,7 +2356,7 @@ msk_txrx_dma_alloc(struct msk_if_softc *
        ctx.msk_busaddr = 0;
        error = bus_dmamap_load(sc_if->msk_cdata.msk_rx_ring_tag,
            sc_if->msk_cdata.msk_rx_ring_map, sc_if->msk_rdata.msk_rx_ring,
-           MSK_RX_RING_SZ, msk_dmamap_cb, &ctx, 0);
+           MSK_RX_RING_SZ, msk_dmamap_cb, &ctx, BUS_DMA_NOWAIT);
        if (error != 0) {
                device_printf(sc_if->msk_if_dev,
                    "failed to load DMA'able memory for Rx ring\n");
@@ -2421,7 +2473,7 @@ msk_rx_dma_jalloc(struct msk_if_softc *s
        error = bus_dmamap_load(sc_if->msk_cdata.msk_jumbo_rx_ring_tag,
            sc_if->msk_cdata.msk_jumbo_rx_ring_map,
            sc_if->msk_rdata.msk_jumbo_rx_ring, MSK_JUMBO_RX_RING_SZ,
-           msk_dmamap_cb, &ctx, 0);
+           msk_dmamap_cb, &ctx, BUS_DMA_NOWAIT);
        if (error != 0) {
                device_printf(sc_if->msk_if_dev,
                    "failed to load DMA'able memory for jumbo Rx ring\n");
@@ -2781,6 +2833,18 @@ msk_encap(struct msk_if_softc *sc_if, st
                }
        }
 
+#ifdef MSK_64BIT_DMA
+       if (MSK_ADDR_HI(txsegs[0].ds_addr) !=
+           sc_if->msk_cdata.msk_tx_high_addr) {
+               sc_if->msk_cdata.msk_tx_high_addr =
+                   MSK_ADDR_HI(txsegs[0].ds_addr);
+               tx_le = &sc_if->msk_rdata.msk_tx_ring[prod];
+               tx_le->msk_addr = htole32(MSK_ADDR_HI(txsegs[0].ds_addr));
+               tx_le->msk_control = htole32(OP_ADDR64 | HW_OWNER);
+               sc_if->msk_cdata.msk_tx_cnt++;
+               MSK_INC(prod, MSK_TX_RING_CNT);
+       }
+#endif
        si = prod;
        tx_le = &sc_if->msk_rdata.msk_tx_ring[prod];
        tx_le->msk_addr = htole32(MSK_ADDR_LO(txsegs[0].ds_addr));
@@ -2795,6 +2859,20 @@ msk_encap(struct msk_if_softc *sc_if, st
 
        for (i = 1; i < nseg; i++) {
                tx_le = &sc_if->msk_rdata.msk_tx_ring[prod];
+#ifdef MSK_64BIT_DMA
+               if (MSK_ADDR_HI(txsegs[i].ds_addr) !=
+                   sc_if->msk_cdata.msk_tx_high_addr) {
+                       sc_if->msk_cdata.msk_tx_high_addr =
+                           MSK_ADDR_HI(txsegs[i].ds_addr);
+                       tx_le = &sc_if->msk_rdata.msk_tx_ring[prod];
+                       tx_le->msk_addr =
+                           htole32(MSK_ADDR_HI(txsegs[i].ds_addr));
+                       tx_le->msk_control = htole32(OP_ADDR64 | HW_OWNER);
+                       sc_if->msk_cdata.msk_tx_cnt++;
+                       MSK_INC(prod, MSK_TX_RING_CNT);
+                       tx_le = &sc_if->msk_rdata.msk_tx_ring[prod];
+               }
+#endif
                tx_le->msk_addr = htole32(MSK_ADDR_LO(txsegs[i].ds_addr));
                tx_le->msk_control = htole32(txsegs[i].ds_len | control |
                    OP_BUFFER | HW_OWNER);
@@ -3147,7 +3225,12 @@ msk_rxeof(struct msk_if_softc *sc_if, ui
                        msk_discard_rxbuf(sc_if, cons);
                        break;
                }
+#ifdef MSK_64BIT_DMA
+               rxd = &sc_if->msk_cdata.msk_rxdesc[(cons + 1) %
+                   MSK_RX_RING_CNT];
+#else
                rxd = &sc_if->msk_cdata.msk_rxdesc[cons];
+#endif
                m = rxd->rx_m;
                if (msk_newbuf(sc_if, cons) != 0) {
                        ifp->if_iqdrops++;
@@ -3175,8 +3258,8 @@ msk_rxeof(struct msk_if_softc *sc_if, ui
                MSK_IF_LOCK(sc_if);
        } while (0);
 
-       MSK_INC(sc_if->msk_cdata.msk_rx_cons, MSK_RX_RING_CNT);
-       MSK_INC(sc_if->msk_cdata.msk_rx_prod, MSK_RX_RING_CNT);
+       MSK_RX_INC(sc_if->msk_cdata.msk_rx_cons, MSK_RX_RING_CNT);
+       MSK_RX_INC(sc_if->msk_cdata.msk_rx_prod, MSK_RX_RING_CNT);
 }
 
 static void
@@ -3207,7 +3290,12 @@ msk_jumbo_rxeof(struct msk_if_softc *sc_
                        msk_discard_jumbo_rxbuf(sc_if, cons);
                        break;
                }
+#ifdef MSK_64BIT_DMA
+               jrxd = &sc_if->msk_cdata.msk_jumbo_rxdesc[(cons + 1) %
+                   MSK_JUMBO_RX_RING_CNT];
+#else
                jrxd = &sc_if->msk_cdata.msk_jumbo_rxdesc[cons];
+#endif
                m = jrxd->rx_m;
                if (msk_jumbo_newbuf(sc_if, cons) != 0) {
                        ifp->if_iqdrops++;
@@ -3235,8 +3323,8 @@ msk_jumbo_rxeof(struct msk_if_softc *sc_
                MSK_IF_LOCK(sc_if);
        } while (0);
 
-       MSK_INC(sc_if->msk_cdata.msk_rx_cons, MSK_JUMBO_RX_RING_CNT);
-       MSK_INC(sc_if->msk_cdata.msk_rx_prod, MSK_JUMBO_RX_RING_CNT);
+       MSK_RX_INC(sc_if->msk_cdata.msk_rx_cons, MSK_JUMBO_RX_RING_CNT);
+       MSK_RX_INC(sc_if->msk_cdata.msk_rx_prod, MSK_JUMBO_RX_RING_CNT);
 }
 
 static void
@@ -3581,7 +3669,7 @@ msk_handle_events(struct msk_softc *sc)
                            control & STLE_OP_MASK);
                        break;
                }
-               MSK_INC(cons, MSK_STAT_RING_CNT);
+               MSK_INC(cons, sc->msk_stat_count);
                if (rxprog > sc->msk_process_limit)
                        break;
        }

Modified: head/sys/dev/msk/if_mskreg.h
==============================================================================
--- head/sys/dev/msk/if_mskreg.h        Wed Nov 16 19:06:55 2011        
(r227581)
+++ head/sys/dev/msk/if_mskreg.h        Wed Nov 16 19:25:26 2011        
(r227582)
@@ -2315,35 +2315,48 @@ struct msk_stat_desc {
 #define BMU_UDP_CHECK  (0x57<<16)      /* Descr with UDP ext (YUKON only) */
 #define BMU_BBC                0xffff  /* Bit 15.. 0:  Buffer Byte Counter */
 
+/*
+ * Controller requires an additional LE op code for 64bit DMA operation.
+ * Driver uses fixed number of RX buffers such that this limitation
+ * reduces number of available RX buffers with 64bit DMA so double
+ * number of RX buffers on platforms that support 64bit DMA. For TX
+ * side, controller requires an additional OP_ADDR64 op code if a TX
+ * buffer uses different high address value than previously used one.
+ * Driver monitors high DMA address change in TX and inserts an
+ * OP_ADDR64 op code if the high DMA address is changed.  Driver
+ * allocates 50% more total TX buffers on platforms that support 64bit
+ * DMA.
+ */
+#if (BUS_SPACE_MAXADDR > 0xFFFFFFFF)
+#define        MSK_64BIT_DMA
+#define MSK_TX_RING_CNT                384
+#define MSK_RX_RING_CNT                512
+#else
+#undef MSK_64BIT_DMA
 #define MSK_TX_RING_CNT                256
 #define MSK_RX_RING_CNT                256
+#endif
 #define        MSK_RX_BUF_ALIGN        8
 #define MSK_JUMBO_RX_RING_CNT  MSK_RX_RING_CNT
-#define        MSK_STAT_RING_CNT       ((1 + 3) * (MSK_TX_RING_CNT + 
MSK_RX_RING_CNT))
 #define MSK_MAXTXSEGS          32
 #define        MSK_TSO_MAXSGSIZE       4096
 #define        MSK_TSO_MAXSIZE         (65535 + sizeof(struct 
ether_vlan_header))
 
 /*
- * It seems that the hardware requires extra decriptors(LEs) to offload
- * TCP/UDP checksum, VLAN hardware tag inserstion and TSO.
+ * It seems that the hardware requires extra descriptors(LEs) to offload
+ * TCP/UDP checksum, VLAN hardware tag insertion and TSO.
  *
  * 1 descriptor for TCP/UDP checksum offload.
  * 1 descriptor VLAN hardware tag insertion.
  * 1 descriptor for TSO(TCP Segmentation Offload)
- * 1 descriptor for 64bits DMA : Not applicatable due to the use of
- *  BUS_SPACE_MAXADDR_32BIT in parent DMA tag creation.
+ * 1 descriptor for each 64bits DMA transfers 
  */
+#ifdef MSK_64BIT_DMA
+#define        MSK_RESERVED_TX_DESC_CNT        (MSK_MAXTXSEGS + 3)
+#else
 #define        MSK_RESERVED_TX_DESC_CNT        3
+#endif
 
-/*
- * Jumbo buffer stuff. Note that we must allocate more jumbo
- * buffers than there are descriptors in the receive ring. This
- * is because we don't know how long it will take for a packet
- * to be released after we hand it off to the upper protocol
- * layers. To be safe, we allocate 1.5 times the number of
- * receive descriptors.
- */
 #define MSK_JUMBO_FRAMELEN     9022
 #define MSK_JUMBO_MTU          (MSK_JUMBO_FRAMELEN-ETHER_HDR_LEN-ETHER_CRC_LEN)
 #define MSK_MAX_FRAMELEN               \
@@ -2380,6 +2393,7 @@ struct msk_chain_data {
        bus_dmamap_t            msk_jumbo_rx_sparemap;
        uint16_t                msk_tso_mtu;
        uint32_t                msk_last_csum;
+       uint32_t                msk_tx_high_addr;
        int                     msk_tx_prod;
        int                     msk_tx_cons;
        int                     msk_tx_cnt;
@@ -2411,10 +2425,17 @@ struct msk_ring_data {
     (sizeof(struct msk_rx_desc) * MSK_RX_RING_CNT)
 #define MSK_JUMBO_RX_RING_SZ           \
     (sizeof(struct msk_rx_desc) * MSK_JUMBO_RX_RING_CNT)
-#define MSK_STAT_RING_SZ               \
-    (sizeof(struct msk_stat_desc) * MSK_STAT_RING_CNT)
 
 #define MSK_INC(x, y)  (x) = (x + 1) % y
+#ifdef MSK_64BIT_DMA
+#define MSK_RX_INC(x, y)       (x) = (x + 2) % y
+#define MSK_RX_BUF_CNT         (MSK_RX_RING_CNT / 2)
+#define MSK_JUMBO_RX_BUF_CNT   (MSK_JUMBO_RX_RING_CNT / 2)
+#else
+#define MSK_RX_INC(x, y)       (x) = (x + 1) % y
+#define MSK_RX_BUF_CNT         MSK_RX_RING_CNT
+#define MSK_JUMBO_RX_BUF_CNT   MSK_JUMBO_RX_RING_CNT
+#endif
 
 #define        MSK_PCI_BUS     0
 #define        MSK_PCIX_BUS    1
@@ -2519,6 +2540,7 @@ struct msk_softc {
        int                     msk_int_holdoff;
        int                     msk_process_limit;
        int                     msk_stat_cons;
+       int                     msk_stat_count;
        struct mtx              msk_mtx;
 };
 
_______________________________________________
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to