Author: luigi
Date: Mon Feb 27 19:05:01 2012
New Revision: 232238
URL: http://svn.freebsd.org/changeset/base/232238

Log:
  A bunch of netmap fixes:
  
  USERSPACE:
  1. add support for devices with different number of rx and tx queues;
  
  2. add better support for zero-copy operation, adding an extra field
     to the netmap ring to indicate how many buffers we have already processed
     but not yet released (with help from Eddie Kohler);
  
  3. The two changes above unfortunately require an API change, so while
     at it add a version field and some spares to the ioctl() argument
     to help detect mismatches.
  
  4. update the manual page for the two changes above;
  
  5. update sample applications in tools/tools/netmap
  
  KERNEL:
  
  1. simplify the internal structures moving the global wait queues
     to the 'struct netmap_adapter';
  
  2. simplify the functions that map kring<->nic ring indexes
  
  3. normalize device-specific code, helps mainteinance;
  
  4. start exploring the impact of micro-optimizations (prefetch etc.)
     in the ixgbe driver.
     Use 'legacy' descriptors on the tx ring and prefetch slots gives
     about 20% speedup at 900 MHz. Another 7-10% would come from removing
     the explict calls to bus_dmamap* in the core (they are effectively
     NOPs in this case, but it takes expensive load of the per-buffer
     dma maps to figure out that they are all NULL.
  
     Rx performance not investigated.
  
  I am postponing the MFC so i can import a few more improvements
  before merging.

Modified:
  head/share/man/man4/netmap.4
  head/sys/dev/e1000/if_em.c
  head/sys/dev/e1000/if_igb.c
  head/sys/dev/e1000/if_lem.c
  head/sys/dev/ixgbe/ixgbe.c
  head/sys/dev/netmap/if_em_netmap.h
  head/sys/dev/netmap/if_igb_netmap.h
  head/sys/dev/netmap/if_lem_netmap.h
  head/sys/dev/netmap/if_re_netmap.h
  head/sys/dev/netmap/ixgbe_netmap.h
  head/sys/dev/netmap/netmap.c
  head/sys/dev/netmap/netmap_kern.h
  head/sys/dev/re/if_re.c
  head/sys/net/netmap.h
  head/sys/net/netmap_user.h
  head/tools/tools/netmap/bridge.c
  head/tools/tools/netmap/pcap.c
  head/tools/tools/netmap/pkt-gen.c

Modified: head/share/man/man4/netmap.4
==============================================================================
--- head/share/man/man4/netmap.4        Mon Feb 27 18:28:31 2012        
(r232237)
+++ head/share/man/man4/netmap.4        Mon Feb 27 19:05:01 2012        
(r232238)
@@ -28,7 +28,7 @@
 .\" $FreeBSD$
 .\" $Id: netmap.4 9662 2011-11-16 13:18:06Z luigi $: 
stable/8/share/man/man4/bpf.4 181694 2008-08-13 17:45:06Z ed $
 .\"
-.Dd November 16, 2011
+.Dd February 27, 2012
 .Dt NETMAP 4
 .Os
 .Sh NAME
@@ -123,8 +123,9 @@ one ring pair (numbered N) for packets f
 struct netmap_ring {
     const ssize_t buf_ofs;
     const uint32_t num_slots; /* number of slots in the ring. */
-    uint32_t avail; /* number of usable slots */
-    uint32_t cur; /* 'current' index for the user side */
+    uint32_t avail;           /* number of usable slots */
+    uint32_t cur;             /* 'current' index for the user side */
+    uint32_t reserved;        /* not refilled before current */
 
     const uint16_t nr_buf_size;
     uint16_t flags;
@@ -173,10 +174,14 @@ defined as follows:
 .Bd -literal
 struct nmreq {
         char      nr_name[IFNAMSIZ];
+        uint32_t  nr_version;     /* API version */
+#define NETMAP_API      2         /* current version */
         uint32_t  nr_offset;      /* nifp offset in the shared region */
         uint32_t  nr_memsize;     /* size of the shared region */
-        uint32_t  nr_numdescs;    /* descriptors per queue */
-        uint16_t  nr_numqueues;
+        uint32_t  nr_tx_slots;    /* slots in tx rings */
+        uint32_t  nr_rx_slots;    /* slots in rx rings */
+        uint16_t  nr_tx_rings;    /* number of tx rings */
+        uint16_t  nr_rx_rings;    /* number of tx rings */
         uint16_t  nr_ringid;      /* ring(s) we care about */
 #define NETMAP_HW_RING  0x4000    /* low bits indicate one hw ring */
 #define NETMAP_SW_RING  0x2000    /* we process the sw ring */
@@ -199,8 +204,10 @@ and are:
 returns information about the interface named in nr_name.
 On return, nr_memsize indicates the size of the shared netmap
 memory region (this is device-independent),
-nr_numslots indicates how many buffers are in a ring,
-nr_numrings indicates the number of rings supported by the hardware.
+nr_tx_slots and nr_rx_slots indicates how many buffers are in a
+transmit and receive ring,
+nr_tx_rings and nr_rx_rings indicates the number of transmit
+and receive rings supported by the hardware.
 .Pp
 If the device does not support netmap, the ioctl returns EINVAL.
 .It Dv NIOCREGIF
@@ -266,6 +273,7 @@ struct netmap_request nmr;
 fd = open("/dev/netmap", O_RDWR);
 bzero(&nmr, sizeof(nmr));
 strcpy(nmr.nm_name, "ix0");
+nmr.nm_version = NETMAP_API;
 ioctl(fd, NIOCREG, &nmr);
 p = mmap(0, nmr.memsize, fd);
 nifp = NETMAP_IF(p, nmr.offset);

Modified: head/sys/dev/e1000/if_em.c
==============================================================================
--- head/sys/dev/e1000/if_em.c  Mon Feb 27 18:28:31 2012        (r232237)
+++ head/sys/dev/e1000/if_em.c  Mon Feb 27 19:05:01 2012        (r232238)
@@ -3296,7 +3296,7 @@ em_setup_transmit_ring(struct tx_ring *t
                }
 #ifdef DEV_NETMAP
                if (slot) {
-                       int si = netmap_tidx_n2k(na, txr->me, i);
+                       int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
                        uint64_t paddr;
                        void *addr;
 
@@ -3759,7 +3759,7 @@ em_txeof(struct tx_ring *txr)
                selwakeuppri(&na->tx_rings[txr->me].si, PI_NET);
                EM_TX_UNLOCK(txr);
                EM_CORE_LOCK(adapter);
-               selwakeuppri(&na->tx_rings[na->num_queues + 1].si, PI_NET);
+               selwakeuppri(&na->tx_si, PI_NET);
                EM_CORE_UNLOCK(adapter);
                EM_TX_LOCK(txr);
                return (FALSE);
@@ -4051,7 +4051,7 @@ em_setup_receive_ring(struct rx_ring *rx
                rxbuf = &rxr->rx_buffers[j];
 #ifdef DEV_NETMAP
                if (slot) {
-                       int si = netmap_ridx_n2k(na, rxr->me, j);
+                       int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
                        uint64_t paddr;
                        void *addr;
 
@@ -4370,10 +4370,11 @@ em_rxeof(struct rx_ring *rxr, int count,
        if (ifp->if_capenable & IFCAP_NETMAP) {
                struct netmap_adapter *na = NA(ifp);
 
+               na->rx_rings[rxr->me].nr_kflags |= NKR_PENDINTR;
                selwakeuppri(&na->rx_rings[rxr->me].si, PI_NET);
                EM_RX_UNLOCK(rxr);
                EM_CORE_LOCK(adapter);
-               selwakeuppri(&na->rx_rings[na->num_queues + 1].si, PI_NET);
+               selwakeuppri(&na->rx_si, PI_NET);
                EM_CORE_UNLOCK(adapter);
                return (0);
        }

Modified: head/sys/dev/e1000/if_igb.c
==============================================================================
--- head/sys/dev/e1000/if_igb.c Mon Feb 27 18:28:31 2012        (r232237)
+++ head/sys/dev/e1000/if_igb.c Mon Feb 27 19:05:01 2012        (r232238)
@@ -3315,7 +3315,7 @@ igb_setup_transmit_ring(struct tx_ring *
                }
 #ifdef DEV_NETMAP
                if (slot) {
-                       int si = netmap_tidx_n2k(na, txr->me, i);
+                       int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
                        /* no need to set the address */
                        netmap_load_map(txr->txtag, txbuf->map, NMB(slot + si));
                }
@@ -3693,7 +3693,7 @@ igb_txeof(struct tx_ring *txr)
                selwakeuppri(&na->tx_rings[txr->me].si, PI_NET);
                IGB_TX_UNLOCK(txr);
                IGB_CORE_LOCK(adapter);
-               selwakeuppri(&na->tx_rings[na->num_queues + 1].si, PI_NET);
+               selwakeuppri(&na->tx_si, PI_NET);
                IGB_CORE_UNLOCK(adapter);
                IGB_TX_LOCK(txr);
                return FALSE;
@@ -4057,7 +4057,7 @@ igb_setup_receive_ring(struct rx_ring *r
 #ifdef DEV_NETMAP
                if (slot) {
                        /* slot sj is mapped to the i-th NIC-ring entry */
-                       int sj = netmap_ridx_n2k(na, rxr->me, j);
+                       int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
                        uint64_t paddr;
                        void *addr;
 
@@ -4554,10 +4554,11 @@ igb_rxeof(struct igb_queue *que, int cou
        if (ifp->if_capenable & IFCAP_NETMAP) {
                struct netmap_adapter *na = NA(ifp);
 
+               na->rx_rings[rxr->me].nr_kflags |= NKR_PENDINTR;
                selwakeuppri(&na->rx_rings[rxr->me].si, PI_NET);
                IGB_RX_UNLOCK(rxr);
                IGB_CORE_LOCK(adapter);
-               selwakeuppri(&na->rx_rings[na->num_queues + 1].si, PI_NET);
+               selwakeuppri(&na->rx_si, PI_NET);
                IGB_CORE_UNLOCK(adapter);
                return (0);
        }

Modified: head/sys/dev/e1000/if_lem.c
==============================================================================
--- head/sys/dev/e1000/if_lem.c Mon Feb 27 18:28:31 2012        (r232237)
+++ head/sys/dev/e1000/if_lem.c Mon Feb 27 19:05:01 2012        (r232238)
@@ -2669,7 +2669,7 @@ lem_setup_transmit_structures(struct ada
 #ifdef DEV_NETMAP
                if (slot) {
                        /* the i-th NIC entry goes to slot si */
-                       int si = netmap_tidx_n2k(na, 0, i);
+                       int si = netmap_idx_n2k(&na->tx_rings[0], i);
                        uint64_t paddr;
                        void *addr;
 
@@ -3243,7 +3243,7 @@ lem_setup_receive_structures(struct adap
 #ifdef DEV_NETMAP
                if (slot) {
                        /* the i-th NIC entry goes to slot si */
-                       int si = netmap_ridx_n2k(na, 0, i);
+                       int si = netmap_idx_n2k(&na->rx_rings[0], i);
                        uint64_t paddr;
                        void *addr;
 
@@ -3475,7 +3475,9 @@ lem_rxeof(struct adapter *adapter, int c
 
 #ifdef DEV_NETMAP
        if (ifp->if_capenable & IFCAP_NETMAP) {
-               selwakeuppri(&NA(ifp)->rx_rings[0].si, PI_NET);
+               struct netmap_adapter *na = NA(ifp);
+               na->rx_rings[0].nr_kflags |= NKR_PENDINTR;
+               selwakeuppri(&na->rx_rings[0].si, PI_NET);
                EM_RX_UNLOCK(adapter);
                return (0);
        }

Modified: head/sys/dev/ixgbe/ixgbe.c
==============================================================================
--- head/sys/dev/ixgbe/ixgbe.c  Mon Feb 27 18:28:31 2012        (r232237)
+++ head/sys/dev/ixgbe/ixgbe.c  Mon Feb 27 19:05:01 2012        (r232238)
@@ -2970,10 +2970,10 @@ ixgbe_setup_transmit_ring(struct tx_ring
                 * kring->nkr_hwofs positions "ahead" wrt the
                 * corresponding slot in the NIC ring. In some drivers
                 * (not here) nkr_hwofs can be negative. Function
-                * netmap_tidx_n2k() handles wraparounds properly.
+                * netmap_idx_n2k() handles wraparounds properly.
                 */
                if (slot) {
-                       int si = netmap_tidx_n2k(na, txr->me, i);
+                       int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
                        netmap_load_map(txr->txtag, txbuf->map, NMB(slot + si));
                }
 #endif /* DEV_NETMAP */
@@ -3491,7 +3491,7 @@ ixgbe_txeof(struct tx_ring *txr)
                        selwakeuppri(&na->tx_rings[txr->me].si, PI_NET);
                        IXGBE_TX_UNLOCK(txr);
                        IXGBE_CORE_LOCK(adapter);
-                       selwakeuppri(&na->tx_rings[na->num_queues + 1].si, 
PI_NET);
+                       selwakeuppri(&na->tx_si, PI_NET);
                        IXGBE_CORE_UNLOCK(adapter);
                        IXGBE_TX_LOCK(txr);
                }
@@ -3922,7 +3922,7 @@ ixgbe_setup_receive_ring(struct rx_ring 
                 * an mbuf, so end the block with a continue;
                 */
                if (slot) {
-                       int sj = netmap_ridx_n2k(na, rxr->me, j);
+                       int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
                        uint64_t paddr;
                        void *addr;
 
@@ -4376,7 +4376,7 @@ ixgbe_rxeof(struct ix_queue *que, int co
                selwakeuppri(&na->rx_rings[rxr->me].si, PI_NET);
                IXGBE_RX_UNLOCK(rxr);
                IXGBE_CORE_LOCK(adapter);
-               selwakeuppri(&na->rx_rings[na->num_queues + 1].si, PI_NET);
+               selwakeuppri(&na->rx_si, PI_NET);
                IXGBE_CORE_UNLOCK(adapter);
                return (FALSE);
        }

Modified: head/sys/dev/netmap/if_em_netmap.h
==============================================================================
--- head/sys/dev/netmap/if_em_netmap.h  Mon Feb 27 18:28:31 2012        
(r232237)
+++ head/sys/dev/netmap/if_em_netmap.h  Mon Feb 27 19:05:01 2012        
(r232238)
@@ -25,45 +25,23 @@
 
 /*
  * $FreeBSD$
- * $Id: if_em_netmap.h 9802 2011-12-02 18:42:37Z luigi $
+ * $Id: if_em_netmap.h 10627 2012-02-23 19:37:15Z luigi $
  *
- * netmap support for if_em.c
+ * netmap support for em.
  *
- * For structure and details on the individual functions please see
- * ixgbe_netmap.h
+ * For more details on netmap support please see ixgbe_netmap.h
  */
 
+
 #include <net/netmap.h>
 #include <sys/selinfo.h>
 #include <vm/vm.h>
 #include <vm/pmap.h>    /* vtophys ? */
 #include <dev/netmap/netmap_kern.h>
 
+
 static void    em_netmap_block_tasks(struct adapter *);
 static void    em_netmap_unblock_tasks(struct adapter *);
-static int     em_netmap_reg(struct ifnet *, int onoff);
-static int     em_netmap_txsync(struct ifnet *, u_int, int);
-static int     em_netmap_rxsync(struct ifnet *, u_int, int);
-static void    em_netmap_lock_wrapper(struct ifnet *, int, u_int);
-
-
-static void
-em_netmap_attach(struct adapter *adapter)
-{
-       struct netmap_adapter na;
-
-       bzero(&na, sizeof(na));
-
-       na.ifp = adapter->ifp;
-       na.separate_locks = 1;
-       na.num_tx_desc = adapter->num_tx_desc;
-       na.num_rx_desc = adapter->num_rx_desc;
-       na.nm_txsync = em_netmap_txsync;
-       na.nm_rxsync = em_netmap_rxsync;
-       na.nm_lock = em_netmap_lock_wrapper;
-       na.nm_register = em_netmap_reg;
-       netmap_attach(&na, adapter->num_queues);
-}
 
 
 static void
@@ -137,7 +115,7 @@ em_netmap_unblock_tasks(struct adapter *
 
 
 /*
- * register-unregister routine
+ * Register/unregister routine
  */
 static int
 em_netmap_reg(struct ifnet *ifp, int onoff)
@@ -180,17 +158,17 @@ fail:
 
 
 /*
- * Reconcile hardware and user view of the transmit ring.
+ * Reconcile kernel and user view of the transmit ring.
  */
 static int
 em_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int do_lock)
 {
        struct adapter *adapter = ifp->if_softc;
        struct tx_ring *txr = &adapter->tx_rings[ring_nr];
-       struct netmap_adapter *na = NA(adapter->ifp);
+       struct netmap_adapter *na = NA(ifp);
        struct netmap_kring *kring = &na->tx_rings[ring_nr];
        struct netmap_ring *ring = kring->ring;
-       int j, k, l, n = 0, lim = kring->nkr_num_slots - 1;
+       u_int j, k, l, n = 0, lim = kring->nkr_num_slots - 1;
 
        /* generate an interrupt approximately every half ring */
        int report_frequency = kring->nkr_num_slots >> 1;
@@ -204,16 +182,17 @@ em_netmap_txsync(struct ifnet *ifp, u_in
        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
                        BUS_DMASYNC_POSTREAD);
 
-       /* check for new packets to send.
-        * j indexes the netmap ring, l indexes the nic ring, and
-        *      j = kring->nr_hwcur, l = E1000_TDT (not tracked),
-        *      j == (l + kring->nkr_hwofs) % ring_size
+       /*
+        * Process new packets to send. j is the current index in the
+        * netmap ring, l is the corresponding index in the NIC ring.
         */
        j = kring->nr_hwcur;
-       if (j != k) {   /* we have packets to send */
-               l = netmap_tidx_k2n(na, ring_nr, j);
+       if (j != k) {   /* we have new packets to send */
+               l = netmap_idx_k2n(kring, j);
                for (n = 0; j != k; n++) {
+                       /* slot is the current slot in the netmap ring */
                        struct netmap_slot *slot = &ring->slot[j];
+                       /* curr is the current slot in the nic ring */
                        struct e1000_tx_desc *curr = &txr->tx_base[l];
                        struct em_buffer *txbuf = &txr->tx_buffers[l];
                        int flags = ((slot->flags & NS_REPORT) ||
@@ -221,7 +200,7 @@ em_netmap_txsync(struct ifnet *ifp, u_in
                                        E1000_TXD_CMD_RS : 0;
                        uint64_t paddr;
                        void *addr = PNMB(slot, &paddr);
-                       int len = slot->len;
+                       u_int len = slot->len;
 
                        if (addr == netmap_buffer_base || len > 
NETMAP_BUF_SIZE) {
                                if (do_lock)
@@ -230,25 +209,21 @@ em_netmap_txsync(struct ifnet *ifp, u_in
                        }
 
                        slot->flags &= ~NS_REPORT;
-                       curr->upper.data = 0;
-                       curr->lower.data = 
-                           htole32(adapter->txd_cmd | len |
-                               (E1000_TXD_CMD_EOP | flags) );
                        if (slot->flags & NS_BUF_CHANGED) {
                                curr->buffer_addr = htole64(paddr);
                                /* buffer has changed, reload map */
                                netmap_reload_map(txr->txtag, txbuf->map, addr);
                                slot->flags &= ~NS_BUF_CHANGED;
                        }
-
+                       curr->upper.data = 0;
+                       curr->lower.data = htole32(adapter->txd_cmd | len |
+                               (E1000_TXD_CMD_EOP | flags) );
                        bus_dmamap_sync(txr->txtag, txbuf->map,
                                BUS_DMASYNC_PREWRITE);
                        j = (j == lim) ? 0 : j + 1;
                        l = (l == lim) ? 0 : l + 1;
                }
-               kring->nr_hwcur = k;
-
-               /* decrease avail by number of sent packets */
+               kring->nr_hwcur = k; /* the saved ring->cur */
                kring->nr_hwavail -= n;
 
                bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
@@ -275,7 +250,7 @@ em_netmap_txsync(struct ifnet *ifp, u_in
                        kring->nr_hwavail += delta;
                }
        }
-       /* update avail to what the hardware knows */
+       /* update avail to what the kernel knows */
        ring->avail = kring->nr_hwavail;
 
        if (do_lock)
@@ -292,10 +267,12 @@ em_netmap_rxsync(struct ifnet *ifp, u_in
 {
        struct adapter *adapter = ifp->if_softc;
        struct rx_ring *rxr = &adapter->rx_rings[ring_nr];
-       struct netmap_adapter *na = NA(adapter->ifp);
+       struct netmap_adapter *na = NA(ifp);
        struct netmap_kring *kring = &na->rx_rings[ring_nr];
        struct netmap_ring *ring = kring->ring;
-       int j, k, l, n, lim = kring->nkr_num_slots - 1;
+       u_int j, l, n, lim = kring->nkr_num_slots - 1;
+       int force_update = do_lock || kring->nr_kflags & NKR_PENDINTR;
+       u_int k = ring->cur, resvd = ring->reserved;
 
        k = ring->cur;
        if (k > lim)
@@ -308,37 +285,45 @@ em_netmap_rxsync(struct ifnet *ifp, u_in
        bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
                        BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
 
-       /* import newly received packets into the netmap ring.
-        * j is an index in the netmap ring, l in the NIC ring, and
-        *      j = (kring->nr_hwcur + kring->nr_hwavail) % ring_size
-        *      l = rxr->next_to_check;
-        * and
-        *      j == (l + kring->nkr_hwofs) % ring_size
+       /*
+        * Import newly received packets into the netmap ring.
+        * j is an index in the netmap ring, l in the NIC ring.
         */
        l = rxr->next_to_check;
-       j = netmap_ridx_n2k(na, ring_nr, l);
-       for (n = 0; ; n++) {
-               struct e1000_rx_desc *curr = &rxr->rx_base[l];
-
-               if ((curr->status & E1000_RXD_STAT_DD) == 0)
-                       break;
-               ring->slot[j].len = le16toh(curr->length);
-               bus_dmamap_sync(rxr->rxtag, rxr->rx_buffers[l].map,
-                       BUS_DMASYNC_POSTREAD);
-               j = (j == lim) ? 0 : j + 1;
-               /* make sure next_to_refresh follows next_to_check */
-               rxr->next_to_refresh = l;       // XXX
-               l = (l == lim) ? 0 : l + 1;
-       }
-       if (n) {
-               rxr->next_to_check = l;
-               kring->nr_hwavail += n;
+       j = netmap_idx_n2k(kring, l);
+       if (netmap_no_pendintr || force_update) {
+               for (n = 0; ; n++) {
+                       struct e1000_rx_desc *curr = &rxr->rx_base[l];
+                       uint32_t staterr = le32toh(curr->status);
+
+                       if ((staterr & E1000_RXD_STAT_DD) == 0)
+                               break;
+                       ring->slot[j].len = le16toh(curr->length);
+                       bus_dmamap_sync(rxr->rxtag, rxr->rx_buffers[l].map,
+                               BUS_DMASYNC_POSTREAD);
+                       j = (j == lim) ? 0 : j + 1;
+                       /* make sure next_to_refresh follows next_to_check */
+                       rxr->next_to_refresh = l;       // XXX
+                       l = (l == lim) ? 0 : l + 1;
+               }
+               if (n) { /* update the state variables */
+                       rxr->next_to_check = l;
+                       kring->nr_hwavail += n;
+               }
+               kring->nr_kflags &= ~NKR_PENDINTR;
        }
 
-       /* skip past packets that userspace has already processed */
+       /* skip past packets that userspace has released */
        j = kring->nr_hwcur;    /* netmap ring index */
-       if (j != k) { /* userspace has read some packets. */
-               l = netmap_ridx_k2n(na, ring_nr, j); /* NIC ring index */
+       if (resvd > 0) {
+               if (resvd + ring->avail >= lim + 1) {
+                       D("XXX invalid reserve/avail %d %d", resvd, 
ring->avail);
+                       ring->reserved = resvd = 0; // XXX panic...
+               }
+               k = (k >= resvd) ? k - resvd : k + lim + 1 - resvd;
+       }
+        if (j != k) { /* userspace has released some packets. */
+               l = netmap_idx_k2n(kring, j); /* NIC ring index */
                for (n = 0; j != k; n++) {
                        struct netmap_slot *slot = &ring->slot[j];
                        struct e1000_rx_desc *curr = &rxr->rx_base[l];
@@ -352,17 +337,15 @@ em_netmap_rxsync(struct ifnet *ifp, u_in
                                return netmap_ring_reinit(kring);
                        }
 
-                       curr->status = 0;
                        if (slot->flags & NS_BUF_CHANGED) {
                                curr->buffer_addr = htole64(paddr);
                                /* buffer has changed, reload map */
                                netmap_reload_map(rxr->rxtag, rxbuf->map, addr);
                                slot->flags &= ~NS_BUF_CHANGED;
                        }
-
+                       curr->status = 0;
                        bus_dmamap_sync(rxr->rxtag, rxbuf->map,
                            BUS_DMASYNC_PREREAD);
-
                        j = (j == lim) ? 0 : j + 1;
                        l = (l == lim) ? 0 : l + 1;
                }
@@ -378,9 +361,29 @@ em_netmap_rxsync(struct ifnet *ifp, u_in
                E1000_WRITE_REG(&adapter->hw, E1000_RDT(rxr->me), l);
        }
        /* tell userspace that there are new packets */
-       ring->avail = kring->nr_hwavail ;
+       ring->avail = kring->nr_hwavail - resvd;
        if (do_lock)
                EM_RX_UNLOCK(rxr);
        return 0;
 }
+
+
+static void
+em_netmap_attach(struct adapter *adapter)
+{
+       struct netmap_adapter na;
+
+       bzero(&na, sizeof(na));
+
+       na.ifp = adapter->ifp;
+       na.separate_locks = 1;
+       na.num_tx_desc = adapter->num_tx_desc;
+       na.num_rx_desc = adapter->num_rx_desc;
+       na.nm_txsync = em_netmap_txsync;
+       na.nm_rxsync = em_netmap_rxsync;
+       na.nm_lock = em_netmap_lock_wrapper;
+       na.nm_register = em_netmap_reg;
+       netmap_attach(&na, adapter->num_queues);
+}
+
 /* end of file */

Modified: head/sys/dev/netmap/if_igb_netmap.h
==============================================================================
--- head/sys/dev/netmap/if_igb_netmap.h Mon Feb 27 18:28:31 2012        
(r232237)
+++ head/sys/dev/netmap/if_igb_netmap.h Mon Feb 27 19:05:01 2012        
(r232238)
@@ -25,41 +25,19 @@
 
 /*
  * $FreeBSD$
- * $Id: if_igb_netmap.h 9802 2011-12-02 18:42:37Z luigi $
+ * $Id: if_igb_netmap.h 10627 2012-02-23 19:37:15Z luigi $
  *
- * netmap modifications for igb contributed by Ahmed Kooli
+ * Netmap support for igb, partly contributed by Ahmed Kooli
+ * For details on netmap support please see ixgbe_netmap.h
  */
 
+
 #include <net/netmap.h>
 #include <sys/selinfo.h>
 #include <vm/vm.h>
 #include <vm/pmap.h>    /* vtophys ? */
 #include <dev/netmap/netmap_kern.h>
 
-static int     igb_netmap_reg(struct ifnet *, int onoff);
-static int     igb_netmap_txsync(struct ifnet *, u_int, int);
-static int     igb_netmap_rxsync(struct ifnet *, u_int, int);
-static void    igb_netmap_lock_wrapper(struct ifnet *, int, u_int);
-
-
-static void
-igb_netmap_attach(struct adapter *adapter)
-{
-       struct netmap_adapter na;
-
-       bzero(&na, sizeof(na));
-
-       na.ifp = adapter->ifp;
-       na.separate_locks = 1;
-       na.num_tx_desc = adapter->num_tx_desc;
-       na.num_rx_desc = adapter->num_rx_desc;
-       na.nm_txsync = igb_netmap_txsync;
-       na.nm_rxsync = igb_netmap_rxsync;
-       na.nm_lock = igb_netmap_lock_wrapper;
-       na.nm_register = igb_netmap_reg;
-       netmap_attach(&na, adapter->num_queues);
-}      
-
 
 /*
  * wrapper to export locks to the generic code
@@ -134,17 +112,17 @@ fail:
 
 
 /*
- * Reconcile hardware and user view of the transmit ring.
+ * Reconcile kernel and user view of the transmit ring.
  */
 static int
 igb_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int do_lock)
 {
        struct adapter *adapter = ifp->if_softc;
        struct tx_ring *txr = &adapter->tx_rings[ring_nr];
-       struct netmap_adapter *na = NA(adapter->ifp);
+       struct netmap_adapter *na = NA(ifp);
        struct netmap_kring *kring = &na->tx_rings[ring_nr];
        struct netmap_ring *ring = kring->ring;
-       int j, k, l, n = 0, lim = kring->nkr_num_slots - 1;
+       u_int j, k, l, n = 0, lim = kring->nkr_num_slots - 1;
 
        /* generate an interrupt approximately every half ring */
        int report_frequency = kring->nkr_num_slots >> 1;
@@ -164,14 +142,16 @@ igb_netmap_txsync(struct ifnet *ifp, u_i
         *      j == (l + kring->nkr_hwofs) % ring_size
         */
        j = kring->nr_hwcur;
-       if (j != k) {   /* we have packets to send */
+       if (j != k) {   /* we have new packets to send */
                /* 82575 needs the queue index added */
                u32 olinfo_status =
                    (adapter->hw.mac.type == e1000_82575) ? (txr->me << 4) : 0;
 
-               l = netmap_tidx_k2n(na, ring_nr, j);
+               l = netmap_idx_k2n(kring, j);
                for (n = 0; j != k; n++) {
+                       /* slot is the current slot in the netmap ring */
                        struct netmap_slot *slot = &ring->slot[j];
+                       /* curr is the current slot in the nic ring */
                        union e1000_adv_tx_desc *curr =
                            (union e1000_adv_tx_desc *)&txr->tx_base[l];
                        struct igb_tx_buffer *txbuf = &txr->tx_buffers[l];
@@ -180,7 +160,7 @@ igb_netmap_txsync(struct ifnet *ifp, u_i
                                        E1000_ADVTXD_DCMD_RS : 0;
                        uint64_t paddr;
                        void *addr = PNMB(slot, &paddr);
-                       int len = slot->len;
+                       u_int len = slot->len;
 
                        if (addr == netmap_buffer_base || len > 
NETMAP_BUF_SIZE) {
                                if (do_lock)
@@ -189,8 +169,13 @@ igb_netmap_txsync(struct ifnet *ifp, u_i
                        }
 
                        slot->flags &= ~NS_REPORT;
-                       // XXX set the address unconditionally
+                       if (slot->flags & NS_BUF_CHANGED) {
+                               /* buffer has changed, reload map */
+                               netmap_reload_map(txr->txtag, txbuf->map, addr);
+                               slot->flags &= ~NS_BUF_CHANGED;
+                       }
                        curr->read.buffer_addr = htole64(paddr);
+                       // XXX check olinfo and cmd_type_len
                        curr->read.olinfo_status =
                            htole32(olinfo_status |
                                (len<< E1000_ADVTXD_PAYLEN_SHIFT));
@@ -199,20 +184,13 @@ igb_netmap_txsync(struct ifnet *ifp, u_i
                                    E1000_ADVTXD_DCMD_IFCS |
                                    E1000_ADVTXD_DCMD_DEXT |
                                    E1000_ADVTXD_DCMD_EOP | flags);
-                       if (slot->flags & NS_BUF_CHANGED) {
-                               /* buffer has changed, reload map */
-                               netmap_reload_map(txr->txtag, txbuf->map, addr);
-                               slot->flags &= ~NS_BUF_CHANGED;
-                       }
 
                        bus_dmamap_sync(txr->txtag, txbuf->map,
                                BUS_DMASYNC_PREWRITE);
                        j = (j == lim) ? 0 : j + 1;
                        l = (l == lim) ? 0 : l + 1;
                }
-               kring->nr_hwcur = k;
-
-               /* decrease avail by number of sent packets */
+               kring->nr_hwcur = k; /* the saved ring->cur */
                kring->nr_hwavail -= n;
 
                /* Set the watchdog XXX ? */
@@ -243,7 +221,7 @@ igb_netmap_txsync(struct ifnet *ifp, u_i
                        kring->nr_hwavail += delta;
                }
        }
-       /* update avail to what the hardware knows */
+       /* update avail to what the kernel knows */
        ring->avail = kring->nr_hwavail;
 
        if (do_lock)
@@ -260,10 +238,12 @@ igb_netmap_rxsync(struct ifnet *ifp, u_i
 {
        struct adapter *adapter = ifp->if_softc;
        struct rx_ring *rxr = &adapter->rx_rings[ring_nr];
-       struct netmap_adapter *na = NA(adapter->ifp);
+       struct netmap_adapter *na = NA(ifp);
        struct netmap_kring *kring = &na->rx_rings[ring_nr];
        struct netmap_ring *ring = kring->ring;
-       int j, k, l, n, lim = kring->nkr_num_slots - 1;
+       u_int j, l, n, lim = kring->nkr_num_slots - 1;
+       int force_update = do_lock || kring->nr_kflags & NKR_PENDINTR;
+       u_int k = ring->cur, resvd = ring->reserved;
 
        k = ring->cur;
        if (k > lim)
@@ -276,36 +256,43 @@ igb_netmap_rxsync(struct ifnet *ifp, u_i
        bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
            BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
 
-       /* import newly received packets into the netmap ring.
-        * j is an index in the netmap ring, l in the NIC ring, and
-        *      j = (kring->nr_hwcur + kring->nr_hwavail) % ring_size
-        *      l = rxr->next_to_check;
-        * and
-        *      j == (l + kring->nkr_hwofs) % ring_size
+       /*
+        * import newly received packets into the netmap ring.
+        * j is an index in the netmap ring, l in the NIC ring.
         */
        l = rxr->next_to_check;
-       j = netmap_ridx_n2k(na, ring_nr, l);
-       for (n = 0; ; n++) {
-               union e1000_adv_rx_desc *curr = &rxr->rx_base[l];
-               uint32_t staterr = le32toh(curr->wb.upper.status_error);
-
-               if ((staterr & E1000_RXD_STAT_DD) == 0)
-                       break;
-               ring->slot[j].len = le16toh(curr->wb.upper.length);
-               bus_dmamap_sync(rxr->ptag,
-                       rxr->rx_buffers[l].pmap, BUS_DMASYNC_POSTREAD);
-               j = (j == lim) ? 0 : j + 1;
-               l = (l == lim) ? 0 : l + 1;
-       }
-       if (n) {
-               rxr->next_to_check = l;
-               kring->nr_hwavail += n;
+       j = netmap_idx_n2k(kring, l);
+       if (netmap_no_pendintr || force_update) {
+               for (n = 0; ; n++) {
+                       union e1000_adv_rx_desc *curr = &rxr->rx_base[l];
+                       uint32_t staterr = le32toh(curr->wb.upper.status_error);
+
+                       if ((staterr & E1000_RXD_STAT_DD) == 0)
+                               break;
+                       ring->slot[j].len = le16toh(curr->wb.upper.length);
+                       bus_dmamap_sync(rxr->ptag,
+                               rxr->rx_buffers[l].pmap, BUS_DMASYNC_POSTREAD);
+                       j = (j == lim) ? 0 : j + 1;
+                       l = (l == lim) ? 0 : l + 1;
+               }
+               if (n) { /* update the state variables */
+                       rxr->next_to_check = l;
+                       kring->nr_hwavail += n;
+               }
+               kring->nr_kflags &= ~NKR_PENDINTR;
        }
 
-       /* skip past packets that userspace has already processed */
-       j = kring->nr_hwcur;
-       if (j != k) { /* userspace has read some packets. */
-               l = netmap_ridx_k2n(na, ring_nr, j);
+       /* skip past packets that userspace has released */
+        j = kring->nr_hwcur;    /* netmap ring index */
+       if (resvd > 0) {
+               if (resvd + ring->avail >= lim + 1) {
+                       D("XXX invalid reserve/avail %d %d", resvd, 
ring->avail);
+                       ring->reserved = resvd = 0; // XXX panic...
+               }
+               k = (k >= resvd) ? k - resvd : k + lim + 1 - resvd;
+       }
+       if (j != k) { /* userspace has released some packets. */
+               l = netmap_idx_k2n(kring, j);
                for (n = 0; j != k; n++) {
                        struct netmap_slot *slot = ring->slot + j;
                        union e1000_adv_rx_desc *curr = &rxr->rx_base[l];
@@ -319,16 +306,14 @@ igb_netmap_rxsync(struct ifnet *ifp, u_i
                                return netmap_ring_reinit(kring);
                        }
 
-                       curr->wb.upper.status_error = 0;
-                       curr->read.pkt_addr = htole64(paddr);
                        if (slot->flags & NS_BUF_CHANGED) {
                                netmap_reload_map(rxr->ptag, rxbuf->pmap, addr);
                                slot->flags &= ~NS_BUF_CHANGED;
                        }
-
+                       curr->read.pkt_addr = htole64(paddr);
+                       curr->wb.upper.status_error = 0;
                        bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
                                BUS_DMASYNC_PREREAD);
-
                        j = (j == lim) ? 0 : j + 1;
                        l = (l == lim) ? 0 : l + 1;
                }
@@ -344,9 +329,28 @@ igb_netmap_rxsync(struct ifnet *ifp, u_i
                E1000_WRITE_REG(&adapter->hw, E1000_RDT(rxr->me), l);
        }
        /* tell userspace that there are new packets */
-       ring->avail = kring->nr_hwavail ;
+       ring->avail = kring->nr_hwavail - resvd;
        if (do_lock)
                IGB_RX_UNLOCK(rxr);
        return 0;
 }
+
+
+static void
+igb_netmap_attach(struct adapter *adapter)
+{
+       struct netmap_adapter na;
+
+       bzero(&na, sizeof(na));
+
+       na.ifp = adapter->ifp;
+       na.separate_locks = 1;
+       na.num_tx_desc = adapter->num_tx_desc;
+       na.num_rx_desc = adapter->num_rx_desc;
+       na.nm_txsync = igb_netmap_txsync;
+       na.nm_rxsync = igb_netmap_rxsync;
+       na.nm_lock = igb_netmap_lock_wrapper;
+       na.nm_register = igb_netmap_reg;
+       netmap_attach(&na, adapter->num_queues);
+}      
 /* end of file */

Modified: head/sys/dev/netmap/if_lem_netmap.h
==============================================================================
--- head/sys/dev/netmap/if_lem_netmap.h Mon Feb 27 18:28:31 2012        
(r232237)
+++ head/sys/dev/netmap/if_lem_netmap.h Mon Feb 27 19:05:01 2012        
(r232238)
@@ -23,14 +23,14 @@
  * SUCH DAMAGE.
  */
 
+
 /*
  * $FreeBSD$
- * $Id: if_lem_netmap.h 9802 2011-12-02 18:42:37Z luigi $
+ * $Id: if_lem_netmap.h 10627 2012-02-23 19:37:15Z luigi $
  *
- * netmap support for if_lem.c
+ * netmap support for "lem"
  *
- * For structure and details on the individual functions please see
- * ixgbe_netmap.h
+ * For details on netmap support please see ixgbe_netmap.h
  */
 
 #include <net/netmap.h>
@@ -39,30 +39,6 @@
 #include <vm/pmap.h>    /* vtophys ? */
 #include <dev/netmap/netmap_kern.h>
 
-static int     lem_netmap_reg(struct ifnet *, int onoff);
-static int     lem_netmap_txsync(struct ifnet *, u_int, int);
-static int     lem_netmap_rxsync(struct ifnet *, u_int, int);
-static void    lem_netmap_lock_wrapper(struct ifnet *, int, u_int);
-
-
-static void
-lem_netmap_attach(struct adapter *adapter)
-{
-       struct netmap_adapter na;
-
-       bzero(&na, sizeof(na));
-
-       na.ifp = adapter->ifp;
-       na.separate_locks = 1;
-       na.num_tx_desc = adapter->num_tx_desc;
-       na.num_rx_desc = adapter->num_rx_desc;
-       na.nm_txsync = lem_netmap_txsync;
-       na.nm_rxsync = lem_netmap_rxsync;
-       na.nm_lock = lem_netmap_lock_wrapper;
-       na.nm_register = lem_netmap_reg;
-       netmap_attach(&na, 1);
-}
-
 
 static void
 lem_netmap_lock_wrapper(struct ifnet *ifp, int what, u_int ringid)
@@ -94,7 +70,7 @@ lem_netmap_lock_wrapper(struct ifnet *if
 
 
 /*
- * register-unregister routine
+ * Register/unregister
  */
 static int
 lem_netmap_reg(struct ifnet *ifp, int onoff)
@@ -104,7 +80,7 @@ lem_netmap_reg(struct ifnet *ifp, int on
        int error = 0;
 
        if (na == NULL)
-               return EINVAL;  /* no netmap support here */
+               return EINVAL;
 
        lem_disable_intr(adapter);
 
@@ -144,20 +120,21 @@ fail:
 
 
 /*
- * Reconcile hardware and user view of the transmit ring.
+ * Reconcile kernel and user view of the transmit ring.
  */
 static int
 lem_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int do_lock)
 {
        struct adapter *adapter = ifp->if_softc;
-       struct netmap_adapter *na = NA(adapter->ifp);
+       struct netmap_adapter *na = NA(ifp);
        struct netmap_kring *kring = &na->tx_rings[ring_nr];
        struct netmap_ring *ring = kring->ring;
-       int j, k, l, n = 0, lim = kring->nkr_num_slots - 1;
+       u_int j, k, l, n = 0, lim = kring->nkr_num_slots - 1;
 
        /* generate an interrupt approximately every half ring */
        int report_frequency = kring->nkr_num_slots >> 1;
 
+       /* take a copy of ring->cur now, and never read it again */
        k = ring->cur;
        if (k > lim)
                return netmap_ring_reinit(kring);
@@ -166,17 +143,17 @@ lem_netmap_txsync(struct ifnet *ifp, u_i
                EM_TX_LOCK(adapter);
        bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
                        BUS_DMASYNC_POSTREAD);
-
-       /* check for new packets to send.
-        * j indexes the netmap ring, l indexes the nic ring, and
-        *      j = kring->nr_hwcur, l = E1000_TDT (not tracked),
-        *      j == (l + kring->nkr_hwofs) % ring_size
+       /*
+        * Process new packets to send. j is the current index in the
+        * netmap ring, l is the corresponding index in the NIC ring.
         */
        j = kring->nr_hwcur;
-       if (j != k) {   /* we have packets to send */
-               l = netmap_tidx_k2n(na, ring_nr, j);
+       if (j != k) {   /* we have new packets to send */
+               l = netmap_idx_k2n(kring, j);
                for (n = 0; j != k; n++) {
+                       /* slot is the current slot in the netmap ring */
                        struct netmap_slot *slot = &ring->slot[j];
+                       /* curr is the current slot in the nic ring */
                        struct e1000_tx_desc *curr = &adapter->tx_desc_base[l];
                        struct em_buffer *txbuf = &adapter->tx_buffer_area[l];
                        int flags = ((slot->flags & NS_REPORT) ||
@@ -184,7 +161,7 @@ lem_netmap_txsync(struct ifnet *ifp, u_i
                                        E1000_TXD_CMD_RS : 0;
                        uint64_t paddr;
                        void *addr = PNMB(slot, &paddr);
-                       int len = slot->len;
+                       u_int len = slot->len;
 
                        if (addr == netmap_buffer_base || len > 
NETMAP_BUF_SIZE) {
                                if (do_lock)
@@ -193,25 +170,23 @@ lem_netmap_txsync(struct ifnet *ifp, u_i
                        }
 
                        slot->flags &= ~NS_REPORT;
-                       curr->upper.data = 0;
-                       curr->lower.data =
-                           htole32( adapter->txd_cmd | len |
-                               (E1000_TXD_CMD_EOP | flags) );
                        if (slot->flags & NS_BUF_CHANGED) {
-                               curr->buffer_addr = htole64(paddr);
                                /* buffer has changed, reload map */
                                netmap_reload_map(adapter->txtag, txbuf->map, 
addr);
+                               curr->buffer_addr = htole64(paddr);
                                slot->flags &= ~NS_BUF_CHANGED;
                        }
+                       curr->upper.data = 0;
+                       curr->lower.data =
+                           htole32( adapter->txd_cmd | len |
+                               (E1000_TXD_CMD_EOP | flags) );
 
                        bus_dmamap_sync(adapter->txtag, txbuf->map,
                            BUS_DMASYNC_PREWRITE);
                        j = (j == lim) ? 0 : j + 1;
                        l = (l == lim) ? 0 : l + 1;
                }
-               kring->nr_hwcur = k;
-
-               /* decrease avail by number of sent packets */
+               kring->nr_hwcur = k; /* the saved ring->cur */
                kring->nr_hwavail -= n;
 
                bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
@@ -231,14 +206,14 @@ lem_netmap_txsync(struct ifnet *ifp, u_i
                }
                delta = l - adapter->next_tx_to_clean;
                if (delta) {
-                       /* some completed, increment hwavail. */
+                       /* some tx completed, increment hwavail. */
                        if (delta < 0)
                                delta += kring->nkr_num_slots;
                        adapter->next_tx_to_clean = l;
                        kring->nr_hwavail += delta;
                }
        }
-       /* update avail to what the hardware knows */
+       /* update avail to what the kernel knows */
        ring->avail = kring->nr_hwavail;
 
        if (do_lock)
@@ -254,12 +229,13 @@ static int
 lem_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int do_lock)
 {
        struct adapter *adapter = ifp->if_softc;
-       struct netmap_adapter *na = NA(adapter->ifp);
+       struct netmap_adapter *na = NA(ifp);
        struct netmap_kring *kring = &na->rx_rings[ring_nr];
        struct netmap_ring *ring = kring->ring;
-       int j, k, l, n, lim = kring->nkr_num_slots - 1;
+       int j, l, n, lim = kring->nkr_num_slots - 1;
+       int force_update = do_lock || kring->nr_kflags & NKR_PENDINTR;
+       u_int k = ring->cur, resvd = ring->reserved;
 
-       k = ring->cur;
        if (k > lim)
                return netmap_ring_reinit(kring);
 
@@ -270,42 +246,50 @@ lem_netmap_rxsync(struct ifnet *ifp, u_i
        bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
                        BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
 
-       /* import newly received packets into the netmap ring
-        * j is an index in the netmap ring, l in the NIC ring, and
-        *      j = (kring->nr_hwcur + kring->nr_hwavail) % ring_size
-        *      l = rxr->next_to_check;
-        * and
-        *      j == (l + kring->nkr_hwofs) % ring_size
+       /*
+        * Import newly received packets into the netmap ring.
+        * j is an index in the netmap ring, l in the NIC ring.
         */
        l = adapter->next_rx_desc_to_check;
-       j = netmap_ridx_n2k(na, ring_nr, l);
-       for (n = 0; ; n++) {
-               struct e1000_rx_desc *curr = &adapter->rx_desc_base[l];

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
_______________________________________________
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to