On Monday, October 15, 2012 10:10:40 am Alexander V. Chernikov wrote:
> On 13.10.2012 23:24, Jack Vogel wrote:
> > On Sat, Oct 13, 2012 at 11:22 AM, Luigi Rizzo<ri...@iet.unipi.it>  wrote:
> 
> >>
> >> one option could be (same as it is done in the timer
> >> routine in dummynet) to build a list of all the packets
> >> that need to be sent to if_input(), and then call
> >> if_input with the entire list outside the lock.
> >>
> >> It would be even easier if we modify the various *_input()
> >> routines to handle a list of mbufs instead of just one.
> 
> Bulk processing is generally a good idea we probably should implement.
> Probably starting from driver queue ending with marked mbufs 
> (OURS/forward/legacy processing (appletalk and similar))?
> 
> This can minimize an impact for all
> locks on RX side:
> L2
> * rx PFIL hook
> L3 (both IPv4 and IPv6)
> * global IF_ADDR_RLOCK (currently commented out)
> * Per-interface ADDR_RLOCK
> * PFIL hook
> 
>  From the first glance, there can be problems with:
> * Increased latency (we should have some kind of rx_process_limit), but 
> still
> * reader locks being acquired for much longer amount of time
> 
> >>
> >> cheers
> >> luigi
> >>
> >> Very interesting idea Luigi, will have to get that some thought.
> >
> > Jack
> 
> Returning to original post topic:
> 
> Given
> 1) we are currently binding ixgbe ithreads to CPU cores
> 2) RX queue lock is used by (indirectly) in only 2 places:
> a) ISR routine (msix or legacy irq)
> b) taskqueue routine which is scheduled if some packets remains in RX 
> queue and rx_process_limit ended OR we need something to TX
> 
> 3) in practice taskqueue routine is a nightmare for many people since 
> there is no way to stop "kernel {ix0 que}" thread eating 100% cpu after 
> some traffic burst happens: once it is called it starts to schedule 
> itself more and more replacing original ISR routine. Additionally, 
> increasing rx_process_limit does not help since taskqueue is called with 
> the same limit. Finally, currently netisr taskq threads are not bound to 
> any CPU which makes the process even more uncontrollable.

I think part of the problem here is that the taskqueue in ixgbe(4) is
bogusly rescheduled for TX handling.  Instead, ixgbe_msix_que() should
just start transmitting packets directly.

I fixed this in igb(4) here:

http://svnweb.freebsd.org/base?view=revision&revision=233708

You can try this for ixgbe(4).  It also comments out a spurious taskqueue 
reschedule from the watchdog handler that might also lower the taskqueue 
usage.  You can try changing that #if 0 to an #if 1 to test just the txeof 
changes:

Index: ixgbe.c
===================================================================
--- ixgbe.c     (revision 241579)
+++ ixgbe.c     (working copy)
@@ -149,7 +149,7 @@
 static void     ixgbe_enable_intr(struct adapter *);
 static void     ixgbe_disable_intr(struct adapter *);
 static void     ixgbe_update_stats_counters(struct adapter *);
-static bool    ixgbe_txeof(struct tx_ring *);
+static void    ixgbe_txeof(struct tx_ring *);
 static bool    ixgbe_rxeof(struct ix_queue *, int);
 static void    ixgbe_rx_checksum(u32, struct mbuf *, u32);
 static void     ixgbe_set_promisc(struct adapter *);
@@ -1439,8 +1439,9 @@
        struct adapter  *adapter = que->adapter;
        struct ixgbe_hw *hw = &adapter->hw;
        struct          tx_ring *txr = adapter->tx_rings;
-       bool            more_tx, more_rx;
-       u32             reg_eicr, loop = MAX_LOOP;
+       struct ifnet    *ifp = adapter->ifp;
+       bool            more_rx;
+       u32             reg_eicr;
 
 
        reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICR);
@@ -1454,14 +1455,16 @@
        more_rx = ixgbe_rxeof(que, adapter->rx_process_limit);
 
        IXGBE_TX_LOCK(txr);
-       do {
-               more_tx = ixgbe_txeof(txr);
-       } while (loop-- && more_tx);
+       ixgbe_txeof(txr);
+#if __FreeBSD_version >= 800000
+       if (!drbr_empty(ifp, txr->br))
+               ixgbe_mq_start_locked(ifp, txr, NULL);
+#else
+       if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
+               ixgbe_start_locked(txr, ifp);
+#endif
        IXGBE_TX_UNLOCK(txr);
 
-       if (more_rx || more_tx)
-               taskqueue_enqueue(que->tq, &que->que_task);
-
        /* Check for fan failure */
        if ((hw->phy.media_type == ixgbe_media_type_copper) &&
            (reg_eicr & IXGBE_EICR_GPI_SDP1)) {
@@ -1474,7 +1477,10 @@
        if (reg_eicr & IXGBE_EICR_LSC)
                taskqueue_enqueue(adapter->tq, &adapter->link_task);
 
-       ixgbe_enable_intr(adapter);
+       if (more_rx)
+               taskqueue_enqueue(que->tq, &que->que_task);
+       else
+               ixgbe_enable_intr(adapter);
        return;
 }
 
@@ -1491,7 +1497,8 @@
        struct adapter  *adapter = que->adapter;
        struct tx_ring  *txr = que->txr;
        struct rx_ring  *rxr = que->rxr;
-       bool            more_tx, more_rx;
+       struct ifnet    *ifp = adapter->ifp;
+       bool            more_rx;
        u32             newitr = 0;
 
        ixgbe_disable_queue(adapter, que->msix);
@@ -1500,18 +1507,14 @@
        more_rx = ixgbe_rxeof(que, adapter->rx_process_limit);
 
        IXGBE_TX_LOCK(txr);
-       more_tx = ixgbe_txeof(txr);
-       /*
-       ** Make certain that if the stack 
-       ** has anything queued the task gets
-       ** scheduled to handle it.
-       */
-#if __FreeBSD_version < 800000
-       if (!IFQ_DRV_IS_EMPTY(&adapter->ifp->if_snd))
+       ixgbe_txeof(txr);
+#if __FreeBSD_version >= 800000
+       if (!drbr_empty(ifp, txr->br))
+               ixgbe_mq_start_locked(ifp, txr, NULL);
 #else
-       if (!drbr_empty(adapter->ifp, txr->br))
+       if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
+               ixgbe_start_locked(txr, ifp);
 #endif
-               more_tx = 1;
        IXGBE_TX_UNLOCK(txr);
 
        /* Do AIM now? */
@@ -1565,7 +1568,7 @@
         rxr->packets = 0;
 
 no_calc:
-       if (more_tx || more_rx)
+       if (more_rx)
                taskqueue_enqueue(que->tq, &que->que_task);
        else /* Reenable this interrupt */
                ixgbe_enable_queue(adapter, que->msix);
@@ -2049,8 +2052,10 @@
                        ++hung;
                if (txr->queue_status & IXGBE_QUEUE_DEPLETED)
                        ++busy;
+#if 0
                if ((txr->queue_status & IXGBE_QUEUE_IDLE) == 0)
                        taskqueue_enqueue(que->tq, &que->que_task);
+#endif
         }
        /* Only truely watchdog if all queues show hung */
         if (hung == adapter->num_queues)
@@ -3548,7 +3556,7 @@
  *  tx_buffer is put back on the free queue.
  *
  **********************************************************************/
-static bool
+static void
 ixgbe_txeof(struct tx_ring *txr)
 {
        struct adapter  *adapter = txr->adapter;
@@ -3597,13 +3605,13 @@
                        IXGBE_CORE_UNLOCK(adapter);
                        IXGBE_TX_LOCK(txr);
                }
-               return FALSE;
+               return;
        }
 #endif /* DEV_NETMAP */
 
        if (txr->tx_avail == adapter->num_tx_desc) {
                txr->queue_status = IXGBE_QUEUE_IDLE;
-               return FALSE;
+               return;
        }
 
        processed = 0;
@@ -3613,7 +3621,7 @@
        tx_desc = (struct ixgbe_legacy_tx_desc *)&txr->tx_base[first];
        last = tx_buffer->eop_index;
        if (last == -1)
-               return FALSE;
+               return;
        eop_desc = (struct ixgbe_legacy_tx_desc *)&txr->tx_base[last];
 
        /*
@@ -3693,12 +3701,8 @@
        if (txr->tx_avail > IXGBE_TX_CLEANUP_THRESHOLD)
                txr->queue_status &= ~IXGBE_QUEUE_DEPLETED;
 
-       if (txr->tx_avail == adapter->num_tx_desc) {
+       if (txr->tx_avail == adapter->num_tx_desc)
                txr->queue_status = IXGBE_QUEUE_IDLE;
-               return (FALSE);
-       }
-
-       return TRUE;
 }
 
 /*********************************************************************

-- 
John Baldwin
_______________________________________________
freebsd-net@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/freebsd-net
To unsubscribe, send any mail to "freebsd-net-unsubscr...@freebsd.org"

Reply via email to