On 04/12/15(Fri) 12:47, Mark Kettenis wrote:
> Here is a new diff to make ix(4) mpsafe.  Should now longer get stuck
> in the OACTIVE state.  Tests more than welcome.

Like for em(4) it is the right time to put this in,  ok mpi@

> Index: if_ix.c
> ===================================================================
> RCS file: /cvs/src/sys/dev/pci/if_ix.c,v
> retrieving revision 1.129
> diff -u -p -r1.129 if_ix.c
> --- if_ix.c   25 Nov 2015 03:09:59 -0000      1.129
> +++ if_ix.c   3 Dec 2015 16:50:26 -0000
> @@ -210,8 +210,6 @@ ixgbe_attach(struct device *parent, stru
>       sc->osdep.os_sc = sc;
>       sc->osdep.os_pa = *pa;
>  
> -     mtx_init(&sc->rx_mtx, IPL_NET);
> -
>       /* Set up the timer callout */
>       timeout_set(&sc->timer, ixgbe_local_timer, sc);
>       timeout_set(&sc->rx_refill, ixgbe_rxrefill, sc);
> @@ -385,6 +383,16 @@ ixgbe_start(struct ifnet * ifp)
>               if (ixgbe_encap(txr, m_head)) {
>                       ifq_deq_rollback(&ifp->if_snd, m_head);
>                       ifq_set_oactive(&ifp->if_snd);
> +                     /*
> +                      *  Make sure there are still packets on the
> +                      *  ring.  The interrupt handler may have
> +                      *  cleaned up the ring before we were able to
> +                      *  set the IF_OACTIVE flag.
> +                      */
> +                     if (txr->tx_avail == sc->num_tx_desc) {
> +                             ifq_clr_oactive(&ifp->if_snd);
> +                             continue;
> +                     }
>                       break;
>               }
>  
> @@ -527,10 +535,7 @@ ixgbe_watchdog(struct ifnet * ifp)
>  
>       /*
>        * The timer is set to 5 every time ixgbe_start() queues a packet.
> -      * Then ixgbe_txeof() keeps resetting to 5 as long as it cleans at
> -      * least one descriptor.
> -      * Finally, anytime all descriptors are clean the timer is
> -      * set to 0.
> +      * Anytime all descriptors are clean the timer is set to 0.
>        */
>       for (i = 0; i < sc->num_queues; i++, txr++) {
>               if (txr->watchdog_timer == 0 || --txr->watchdog_timer)
> @@ -862,7 +867,7 @@ ixgbe_intr(void *arg)
>       struct tx_ring  *txr = sc->tx_rings;
>       struct ixgbe_hw *hw = &sc->hw;
>       uint32_t         reg_eicr;
> -     int              i, refill = 0, was_active = 0;
> +     int              i, refill = 0;
>  
>       reg_eicr = IXGBE_READ_REG(&sc->hw, IXGBE_EICR);
>       if (reg_eicr == 0) {
> @@ -872,11 +877,8 @@ ixgbe_intr(void *arg)
>  
>       if (ISSET(ifp->if_flags, IFF_RUNNING)) {
>               ixgbe_rxeof(que);
> -             refill = 1;
> -
> -             if (ifq_is_oactive(&ifp->if_snd))
> -                     was_active = 1;
>               ixgbe_txeof(txr);
> +             refill = 1;
>       }
>  
>       if (refill) {
> @@ -892,6 +894,8 @@ ixgbe_intr(void *arg)
>       if (reg_eicr & IXGBE_EICR_LSC) {
>               KERNEL_LOCK();
>               ixgbe_update_link_status(sc);
> +             if (!IFQ_IS_EMPTY(&ifp->if_snd))
> +                     ixgbe_start(ifp);
>               KERNEL_UNLOCK();
>       }
>  
> @@ -913,12 +917,6 @@ ixgbe_intr(void *arg)
>               }
>       }
>  
> -     if (was_active) {
> -             KERNEL_LOCK();
> -             ixgbe_start(ifp);
> -             KERNEL_UNLOCK();
> -     }
> -
>       /* Check for fan failure */
>       if ((hw->device_id == IXGBE_DEV_ID_82598AT) &&
>           (reg_eicr & IXGBE_EICR_GPI_SDP1)) {
> @@ -1061,17 +1059,9 @@ ixgbe_encap(struct tx_ring *txr, struct 
>               cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
>  #endif
>  
> -     /*
> -      * Force a cleanup if number of TX descriptors
> -      * available is below the threshold. If it fails
> -      * to get above, then abort transmit.
> -      */
> -     if (txr->tx_avail <= IXGBE_TX_CLEANUP_THRESHOLD) {
> -             ixgbe_txeof(txr);
> -             /* Make sure things have improved */
> -             if (txr->tx_avail <= IXGBE_TX_OP_THRESHOLD)
> -                     return (ENOBUFS);
> -     }
> +     /* Check that we have least the minimal number of TX descriptors. */
> +     if (txr->tx_avail <= IXGBE_TX_OP_THRESHOLD)
> +             return (ENOBUFS);
>  
>       /*
>        * Important to capture the first descriptor
> @@ -1135,8 +1125,6 @@ ixgbe_encap(struct tx_ring *txr, struct 
>  
>       txd->read.cmd_type_len |=
>           htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
> -     txr->tx_avail -= map->dm_nsegs;
> -     txr->next_avail_desc = i;
>  
>       txbuf->m_head = m_head;
>       /*
> @@ -1154,6 +1142,11 @@ ixgbe_encap(struct tx_ring *txr, struct 
>       txbuf = &txr->tx_buffers[first];
>       txbuf->eop_index = last;
>  
> +     membar_producer();
> +
> +     atomic_sub_int(&txr->tx_avail, map->dm_nsegs);
> +     txr->next_avail_desc = i;
> +
>       ++txr->tx_packets;
>       return (0);
>  
> @@ -1322,6 +1315,10 @@ ixgbe_stop(void *arg)
>       /* reprogram the RAR[0] in case user changed it. */
>       ixgbe_set_rar(&sc->hw, 0, sc->hw.mac.addr, 0, IXGBE_RAH_AV);
>  
> +     intr_barrier(sc->tag);
> +
> +     KASSERT((ifp->if_flags & IFF_RUNNING) == 0);
> +
>       /* Should we really clear all structures on stop? */
>       ixgbe_free_transmit_structures(sc);
>       ixgbe_free_receive_structures(sc);
> @@ -2202,11 +2199,13 @@ ixgbe_tx_ctx_setup(struct tx_ring *txr, 
>       tx_buffer->m_head = NULL;
>       tx_buffer->eop_index = -1;
>  
> +     membar_producer();
> +
>       /* We've consumed the first desc, adjust counters */
>       if (++ctxd == sc->num_tx_desc)
>               ctxd = 0;
>       txr->next_avail_desc = ctxd;
> -     --txr->tx_avail;
> +     atomic_dec_int(&txr->tx_avail);
>  
>       return (0);
>  }
> @@ -2320,10 +2319,12 @@ ixgbe_tso_setup(struct tx_ring *txr, str
>  
>       TXD->seqnum_seed = htole32(0);
>  
> +     membar_producer();
> +
>       if (++ctxd == sc->num_tx_desc)
>               ctxd = 0;
>  
> -     txr->tx_avail--;
> +     atomic_dec_int(&txr->tx_avail);
>       txr->next_avail_desc = ctxd;
>       *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
>       *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
> @@ -2345,6 +2346,7 @@ ixgbe_txeof(struct tx_ring *txr)
>       struct ix_softc                 *sc = txr->sc;
>       struct ifnet                    *ifp = &sc->arpcom.ac_if;
>       uint32_t                         first, last, done, processed;
> +     uint32_t                         num_avail;
>       struct ixgbe_tx_buf             *tx_buffer;
>       struct ixgbe_legacy_tx_desc *tx_desc, *eop_desc;
>  
> @@ -2356,23 +2358,19 @@ ixgbe_txeof(struct tx_ring *txr)
>               return FALSE;
>       }
>  
> -     KERNEL_LOCK();
> +     membar_consumer();
>  
>       processed = 0;
>       first = txr->next_to_clean;
>       /* was the txt queue cleaned up in the meantime */
> -     if (txr->tx_buffers == NULL) {
> -             KERNEL_UNLOCK();
> +     if (txr->tx_buffers == NULL)
>               return FALSE;
> -     }
>       tx_buffer = &txr->tx_buffers[first];
>       /* For cleanup we just use legacy struct */
>       tx_desc = (struct ixgbe_legacy_tx_desc *)&txr->tx_base[first];
>       last = tx_buffer->eop_index;
> -     if (last == -1) {
> -             KERNEL_UNLOCK();
> +     if (last == -1)
>               return FALSE;
> -     }
>       eop_desc = (struct ixgbe_legacy_tx_desc *)&txr->tx_base[last];
>  
>       /*
> @@ -2394,7 +2392,6 @@ ixgbe_txeof(struct tx_ring *txr)
>                       tx_desc->upper.data = 0;
>                       tx_desc->lower.data = 0;
>                       tx_desc->buffer_addr = 0;
> -                     ++txr->tx_avail;
>                       ++processed;
>  
>                       if (tx_buffer->m_head) {
> @@ -2436,31 +2433,24 @@ ixgbe_txeof(struct tx_ring *txr)
>  
>       txr->next_to_clean = first;
>  
> +     num_avail = atomic_add_int_nv(&txr->tx_avail, processed);
> +
> +     /* All clean, turn off the timer */
> +     if (num_avail == sc->num_tx_desc)
> +             ifp->if_timer = 0;
> +
>       /*
>        * If we have enough room, clear IFF_OACTIVE to tell the stack that
> -      * it is OK to send packets. If there are no pending descriptors,
> -      * clear the timeout. Otherwise, if some descriptors have been freed,
> -      * restart the timeout.
> +      * it is OK to send packets.
>        */
> -     if (txr->tx_avail > IXGBE_TX_CLEANUP_THRESHOLD) {
> +     if (ifq_is_oactive(&ifp->if_snd) &&
> +         num_avail > IXGBE_TX_OP_THRESHOLD) {
>               ifq_clr_oactive(&ifp->if_snd);
> -
> -             /* If all are clean turn off the timer */
> -             if (txr->tx_avail == sc->num_tx_desc) {
> -                     ifp->if_timer = 0;
> -                     txr->watchdog_timer = 0;
> -                     KERNEL_UNLOCK();
> -                     return FALSE;
> -             }
> -             /* Some were cleaned, so reset timer */
> -             else if (processed) {
> -                     ifp->if_timer = IXGBE_TX_TIMEOUT;
> -                     txr->watchdog_timer = IXGBE_TX_TIMEOUT;
> -             }
> +             KERNEL_LOCK();
> +             ixgbe_start(ifp);
> +             KERNEL_UNLOCK();
>       }
>  
> -     KERNEL_UNLOCK();
> -
>       return TRUE;
>  }
>  
> @@ -2610,8 +2600,6 @@ ixgbe_rxfill(struct rx_ring *rxr)
>       u_int            slots;
>       int              i;
>  
> -     mtx_enter(&sc->rx_mtx);
> -
>       i = rxr->last_desc_filled;
>       for (slots = if_rxr_get(&rxr->rx_ring, sc->num_rx_desc);
>           slots > 0; slots--) {
> @@ -2627,8 +2615,6 @@ ixgbe_rxfill(struct rx_ring *rxr)
>  
>       if_rxr_put(&rxr->rx_ring, slots);
>  
> -     mtx_leave(&sc->rx_mtx);
> -
>       return (post);
>  }
>  
> @@ -2796,10 +2782,8 @@ ixgbe_free_receive_structures(struct ix_
>       struct rx_ring *rxr;
>       int             i;
>  
> -     mtx_enter(&sc->rx_mtx);
>       for (i = 0, rxr = sc->rx_rings; i < sc->num_queues; i++, rxr++)
>               if_rxr_init(&rxr->rx_ring, 0, 0);
> -     mtx_leave(&sc->rx_mtx);
>  
>       for (i = 0, rxr = sc->rx_rings; i < sc->num_queues; i++, rxr++)
>               ixgbe_free_receive_buffers(rxr);
> @@ -2853,7 +2837,6 @@ ixgbe_rxeof(struct ix_queue *que)
>       struct rx_ring          *rxr = que->rxr;
>       struct ifnet            *ifp = &sc->arpcom.ac_if;
>       struct mbuf_list         ml = MBUF_LIST_INITIALIZER();
> -     struct mbuf_list         free_ml = MBUF_LIST_INITIALIZER();
>       struct mbuf             *mp, *sendmp;
>       uint8_t                  eop = 0;
>       uint16_t                 len, vtag;
> @@ -2866,7 +2849,6 @@ ixgbe_rxeof(struct ix_queue *que)
>       if (!ISSET(ifp->if_flags, IFF_RUNNING))
>               return FALSE;
>  
> -     mtx_enter(&sc->rx_mtx);
>       i = rxr->next_to_check;
>       while (if_rxr_inuse(&rxr->rx_ring) > 0) {
>               bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
> @@ -2901,11 +2883,11 @@ ixgbe_rxeof(struct ix_queue *que)
>                       sc->dropped_pkts++;
>  
>                       if (rxbuf->fmp) {
> -                             ml_enqueue(&free_ml, rxbuf->fmp);
> +                             m_freem(rxbuf->fmp);
>                               rxbuf->fmp = NULL;
>                       }
>  
> -                     ml_enqueue(&free_ml, mp);
> +                     m_freem(mp);
>                       rxbuf->buf = NULL;
>                       goto next_desc;
>               }
> @@ -2983,9 +2965,6 @@ next_desc:
>                       i = 0;
>       }
>       rxr->next_to_check = i;
> -     mtx_leave(&sc->rx_mtx);
> -
> -     ml_purge(&free_ml);
>  
>       if_input(ifp, &ml);
>  
> Index: if_ix.h
> ===================================================================
> RCS file: /cvs/src/sys/dev/pci/if_ix.h,v
> retrieving revision 1.29
> diff -u -p -r1.29 if_ix.h
> --- if_ix.h   11 Sep 2015 13:02:28 -0000      1.29
> +++ if_ix.h   3 Dec 2015 16:50:26 -0000
> @@ -77,10 +77,9 @@
>  #define IXGBE_TX_TIMEOUT                   5 /* set to 5 seconds */
>  
>  /*
> - * This parameters control when the driver calls the routine to reclaim
> - * transmit descriptors.
> + * Thise parameter controls the minimum number of available transmit
> + * descriptors needed before we attempt transmission of a packet.
>   */
> -#define IXGBE_TX_CLEANUP_THRESHOLD   (sc->num_tx_desc / 16)
>  #define IXGBE_TX_OP_THRESHOLD                (sc->num_tx_desc / 32)
>  
>  #define IXGBE_MAX_FRAME_SIZE 9216
> @@ -170,7 +169,7 @@ struct tx_ring {
>       union ixgbe_adv_tx_desc *tx_base;
>       struct ixgbe_tx_buf     *tx_buffers;
>       struct ixgbe_dma_alloc  txdma;
> -     volatile uint16_t       tx_avail;
> +     volatile uint32_t       tx_avail;
>       uint32_t                next_avail_desc;
>       uint32_t                next_to_clean;
>       enum {
> @@ -277,7 +276,6 @@ struct ix_softc {
>        * Receive rings:
>        *      Allocated at run time, an array of rings.
>        */
> -     struct mutex            rx_mtx;
>       struct rx_ring          *rx_rings;
>       uint64_t                que_mask;
>       int                     num_rx_desc;
> Index: ixgbe.h
> ===================================================================
> RCS file: /cvs/src/sys/dev/pci/ixgbe.h,v
> retrieving revision 1.20
> diff -u -p -r1.20 ixgbe.h
> --- ixgbe.h   24 Nov 2015 17:11:40 -0000      1.20
> +++ ixgbe.h   3 Dec 2015 16:50:26 -0000
> @@ -52,6 +52,7 @@
>  #include <sys/timeout.h>
>  #include <sys/pool.h>
>  #include <sys/rwlock.h>
> +#include <sys/atomic.h>
>  
>  #include <net/if.h>
>  #include <net/bpf.h>
> 

Reply via email to