Author: shurd
Date: Wed May 16 21:03:22 2018
New Revision: 333686

  Work around lack of TX IRQs in iflib for netmap
  When poll() is called via netmap, txsync is initially called,
  and if there are no available buffers to reclaim, it waits for the driver
  to notify of new buffers. Since the TX IRQ is generally not used in iflib
  drivers, this ends up causing a timeout.
  Work around this by having the reclaim DELAY(1) if it's initially unable
  to reclaim anything, then schedule the tx task, which will spin by
  continuously rescheduling itself until some buffers are reclaimed. In
  general, the delay is enough to allow some buffers to be reclaimed, so
  spinning is minimized.
  Reported by:  Johannes Lundberg <>
  Reviewed by:  sbruno
  Sponsored by: Limelight Networks
  Differential Revision:


Modified: head/sys/net/iflib.c
--- head/sys/net/iflib.c        Wed May 16 20:44:08 2018        (r333685)
+++ head/sys/net/iflib.c        Wed May 16 21:03:22 2018        (r333686)
@@ -960,10 +960,10 @@ iflib_netmap_txsync(struct netmap_kring *kring, int fl
        nm_i = netmap_idx_n2k(kring, kring->nr_hwcur);
-       pkt_info_zero(&pi);
-       pi.ipi_segs = txq->ift_segs;
-       pi.ipi_qsidx = kring->ring_id;
        if (nm_i != head) {     /* we have new packets to send */
+               pkt_info_zero(&pi);
+               pi.ipi_segs = txq->ift_segs;
+               pi.ipi_qsidx = kring->ring_id;
                nic_i = netmap_idx_k2n(kring, nm_i);
@@ -1025,11 +1025,24 @@ iflib_netmap_txsync(struct netmap_kring *kring, int fl
         * Second part: reclaim buffers for completed transmissions.
+        *
+        * If there are unclaimed buffers, attempt to reclaim them.
+        * If none are reclaimed, and TX IRQs are not in use, do an initial
+        * minimal delay, then trigger the tx handler which will spin in the
+        * group task queue.
-       if (iflib_tx_credits_update(ctx, txq)) {
-               /* some tx completed, increment avail */
-               nic_i = txq->ift_cidx_processed;
-               kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim);
+       if (kring->nr_hwtail != nm_prev(head, lim)) {
+               if (iflib_tx_credits_update(ctx, txq)) {
+                       /* some tx completed, increment avail */
+                       nic_i = txq->ift_cidx_processed;
+                       kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, 
nic_i), lim);
+               }
+               else {
+                       if (!(ctx->ifc_flags & IFC_NETMAP_TX_IRQ)) {
+                               DELAY(1);
+                       }
+               }
        return (0);
@@ -3702,8 +3715,20 @@ _task_fn_tx(void *context)
        if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING))
        if (if_getcapenable(ifp) & IFCAP_NETMAP) {
+               /*
+                * If there are no available credits, and TX IRQs are not in 
+                * re-schedule the task immediately.
+                */
                if (ctx->isc_txd_credits_update(ctx->ifc_softc, txq->ift_id, 
                        netmap_tx_irq(ifp, txq->ift_id);
+               else {
+                       if (!(ctx->ifc_flags & IFC_NETMAP_TX_IRQ)) {
+                               struct netmap_kring *kring = 
+                               if (kring->nr_hwtail != nm_prev(kring->rhead, 
kring->nkr_num_slots - 1))
+                                       GROUPTASK_ENQUEUE(&txq->ift_task);
+                       }
+               }
                IFDI_TX_QUEUE_INTR_ENABLE(ctx, txq->ift_id);
@@ -5548,6 +5573,7 @@ iflib_irq_alloc_generic(if_ctx_t ctx, if_irq_t irq, in
                fn = _task_fn_tx;
                intr_fast = iflib_fast_intr;
                GROUPTASK_INIT(gtask, 0, fn, q);
+               ctx->ifc_flags |= IFC_NETMAP_TX_IRQ;
        case IFLIB_INTR_RX:
                q = &ctx->ifc_rxqs[qid];

Modified: head/sys/net/iflib_private.h
--- head/sys/net/iflib_private.h        Wed May 16 20:44:08 2018        
+++ head/sys/net/iflib_private.h        Wed May 16 21:03:22 2018        
@@ -43,6 +43,8 @@
 #define        IFC_CHECK_HUNG          0x200
 #define        IFC_PSEUDO              0x400
+#define IFC_NETMAP_TX_IRQ      0x80000000
 #define IFLIB_MAX_TX_BYTES             (2*1024*1024)
_______________________________________________ mailing list
To unsubscribe, send any mail to ""

Reply via email to