Author: luigi
Date: Tue Feb 14 22:49:34 2012
New Revision: 231717
URL: http://svn.freebsd.org/changeset/base/231717

Log:
  MFC: bring in the core netmap code (disconnected
  from the build). As usual, device driver patches
  will come separately.

Added:
  stable/8/share/man/man4/netmap.4   (contents, props changed)
  stable/8/sys/dev/netmap/
  stable/8/sys/dev/netmap/if_em_netmap.h   (contents, props changed)
  stable/8/sys/dev/netmap/if_igb_netmap.h   (contents, props changed)
  stable/8/sys/dev/netmap/if_lem_netmap.h   (contents, props changed)
  stable/8/sys/dev/netmap/if_re_netmap.h   (contents, props changed)
  stable/8/sys/dev/netmap/ixgbe_netmap.h   (contents, props changed)
  stable/8/sys/dev/netmap/netmap.c   (contents, props changed)
  stable/8/sys/dev/netmap/netmap_kern.h   (contents, props changed)
  stable/8/sys/net/netmap.h   (contents, props changed)
  stable/8/sys/net/netmap_user.h   (contents, props changed)
  stable/8/tools/tools/netmap/
  stable/8/tools/tools/netmap/Makefile   (contents, props changed)
  stable/8/tools/tools/netmap/README   (contents, props changed)
  stable/8/tools/tools/netmap/bridge.c   (contents, props changed)
  stable/8/tools/tools/netmap/click-test.cfg   (contents, props changed)
  stable/8/tools/tools/netmap/pcap.c   (contents, props changed)
  stable/8/tools/tools/netmap/pkt-gen.c   (contents, props changed)
Modified:
  stable/8/share/man/man4/Makefile
  stable/8/sys/conf/NOTES
  stable/8/sys/conf/files
  stable/8/sys/conf/options

Modified: stable/8/share/man/man4/Makefile
==============================================================================
--- stable/8/share/man/man4/Makefile    Tue Feb 14 22:27:43 2012        
(r231716)
+++ stable/8/share/man/man4/Makefile    Tue Feb 14 22:49:34 2012        
(r231717)
@@ -246,6 +246,7 @@ MAN=        aac.4 \
        net80211.4 \
        netgraph.4 \
        netintro.4 \
+       netmap.4 \
        ${_nfe.4} \
        ${_nfsmb.4} \
        ng_async.4 \

Added: stable/8/share/man/man4/netmap.4
==============================================================================
--- /dev/null   00:00:00 1970   (empty, because file is newly added)
+++ stable/8/share/man/man4/netmap.4    Tue Feb 14 22:49:34 2012        
(r231717)
@@ -0,0 +1,299 @@
+.\" Copyright (c) 2011 Matteo Landi, Luigi Rizzo, Universita` di Pisa
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\" 
+.\" This document is derived in part from the enet man page (enet.4)
+.\" distributed with 4.3BSD Unix.
+.\"
+.\" $FreeBSD$
+.\" $Id: netmap.4 9662 2011-11-16 13:18:06Z luigi $: 
stable/8/share/man/man4/bpf.4 181694 2008-08-13 17:45:06Z ed $
+.\"
+.Dd November 16, 2011
+.Dt NETMAP 4
+.Os
+.Sh NAME
+.Nm netmap
+.Nd a framework for fast packet I/O
+.Sh SYNOPSIS
+.Cd device netmap
+.Sh DESCRIPTION
+.Nm
+is a framework for fast and safe access to network devices
+(reaching 14.88 Mpps at less than 1 GHz).
+.Nm
+uses memory mapped buffers and metadata
+(buffer indexes and lengths) to communicate with the kernel,
+which is in charge of validating information through 
+.Pa ioctl()
+and
+.Pa select()/poll().
+.Nm
+can exploit the parallelism in multiqueue devices and
+multicore systems.
+.Pp
+.Pp
+.Nm
+requires explicit support in device drivers.
+For a list of supported devices, see the end of this manual page.
+.Sh OPERATION
+.Nm
+clients must first open the
+.Pa open("/dev/netmap") ,
+and then issue an
+.Pa ioctl(...,NIOCREGIF,...)
+to bind the file descriptor to a network device.
+.Pp
+When a device is put in
+.Nm
+mode, its data path is disconnected from the host stack.
+The processes owning the file descriptor 
+can exchange packets with the device, or with the host stack,
+through an mmapped memory region that contains pre-allocated
+buffers and metadata.
+.Pp
+Non blocking I/O is done with special
+.Pa ioctl()'s ,
+whereas the file descriptor can be passed to
+.Pa select()/poll()
+to be notified about incoming packet or available transmit buffers.
+.Ss Data structures
+All data structures for all devices in
+.Nm
+mode are in a memory
+region shared by the kernel and all processes
+who open
+.Pa /dev/netmap
+(NOTE: visibility may be restricted in future implementations).
+All references between the shared data structure
+are relative (offsets or indexes). Some macros help converting
+them into actual pointers.
+.Pp
+The data structures in shared memory are the following:
+.Pp
+.Bl -tag -width XXX
+.It Dv struct netmap_if (one per interface)
+indicates the number of rings supported by an interface, their
+sizes, and the offsets of the
+.Pa netmap_rings
+associated to the interface.
+The offset of a
+.Pa struct netmap_if
+in the shared memory region is indicated by the
+.Pa nr_offset
+field in the structure returned by the
+.Pa NIOCREGIF
+(see below).
+.Bd -literal
+struct netmap_if {
+    char ni_name[IFNAMSIZ]; /* name of the interface. */
+    const u_int ni_num_queues; /* number of hw ring pairs */
+    const ssize_t   ring_ofs[]; /* offset of tx and rx rings */
+};
+.Ed
+.It Dv struct netmap_ring (one per ring)
+contains the index of the current read or write slot (cur),
+the number of slots available for reception or transmission (avail),
+and an array of
+.Pa slots
+describing the buffers.
+There is one ring pair for each of the N hardware ring pairs
+supported by the card (numbered 0..N-1), plus
+one ring pair (numbered N) for packets from/to the host stack.
+.Bd -literal
+struct netmap_ring {
+    const ssize_t buf_ofs;
+    const uint32_t num_slots; /* number of slots in the ring. */
+    uint32_t avail; /* number of usable slots */
+    uint32_t cur; /* 'current' index for the user side */
+
+    const uint16_t nr_buf_size;
+    uint16_t flags;
+    struct netmap_slot slot[0]; /* array of slots. */
+}
+.Ed
+.It Dv struct netmap_slot (one per packet)
+contains the metadata for a packet: a buffer index (buf_idx),
+a buffer length (len), and some flags.
+.Bd -literal
+struct netmap_slot {
+    uint32_t buf_idx; /* buffer index */
+    uint16_t len;   /* packet length */
+    uint16_t flags; /* buf changed, etc. */
+#define NS_BUF_CHANGED  0x0001  /* must resync, buffer changed */
+#define NS_REPORT       0x0002  /* tell hw to report results
+                                 * e.g. by generating an interrupt
+                                 */
+};
+.Ed
+.It Dv packet buffers
+are fixed size (approximately 2k) buffers allocated by the kernel
+that contain packet data. Buffers addresses are computed through
+macros.
+.El
+.Pp
+Some macros support the access to objects in the shared memory
+region. In particular:
+.Bd -literal
+struct netmap_if *nifp;
+struct netmap_ring *txring = NETMAP_TXRING(nifp, i);
+struct netmap_ring *rxring = NETMAP_RXRING(nifp, i);
+int i = txring->slot[txring->cur].buf_idx;
+char *buf = NETMAP_BUF(txring, i);
+.Ed
+.Ss IOCTLS
+.Pp
+.Nm
+supports some ioctl() to synchronize the state of the rings
+between the kernel and the user processes, plus some
+to query and configure the interface.
+The former do not require any argument, whereas the latter
+use a
+.Pa struct netmap_req
+defined as follows:
+.Bd -literal
+struct nmreq {
+        char      nr_name[IFNAMSIZ];
+        uint32_t  nr_offset;      /* nifp offset in the shared region */
+        uint32_t  nr_memsize;     /* size of the shared region */
+        uint32_t  nr_numdescs;    /* descriptors per queue */
+        uint16_t  nr_numqueues;
+        uint16_t  nr_ringid;      /* ring(s) we care about */
+#define NETMAP_HW_RING  0x4000    /* low bits indicate one hw ring */
+#define NETMAP_SW_RING  0x2000    /* we process the sw ring */
+#define NETMAP_NO_TX_POLL 0x1000  /* no gratuitous txsync on poll */
+#define NETMAP_RING_MASK 0xfff    /* the actual ring number */
+};
+
+.Ed
+A device descriptor obtained through
+.Pa /dev/netmap
+also supports the ioctl supported by network devices.
+.Pp
+The netmap-specific
+.Xr ioctl 2
+command codes below are defined in
+.In net/netmap.h
+and are:
+.Bl -tag -width XXXX
+.It Dv NIOCGINFO
+returns information about the interface named in nr_name.
+On return, nr_memsize indicates the size of the shared netmap
+memory region (this is device-independent),
+nr_numslots indicates how many buffers are in a ring,
+nr_numrings indicates the number of rings supported by the hardware.
+.Pp
+If the device does not support netmap, the ioctl returns EINVAL.
+.It Dv NIOCREGIF
+puts the interface named in nr_name into netmap mode, disconnecting
+it from the host stack, and/or defines which rings are controlled
+through this file descriptor.
+On return, it gives the same info as NIOCGINFO, and nr_ringid
+indicates the identity of the rings controlled through the file
+descriptor.
+.Pp
+Possible values for nr_ringid are
+.Bl -tag -width XXXXX
+.It 0
+default, all hardware rings
+.It NETMAP_SW_RING
+the ``host rings'' connecting to the host stack
+.It NETMAP_HW_RING + i
+the i-th hardware ring
+.El
+By default, a
+.Nm poll
+or
+.Nm select
+call pushes out any pending packets on the transmit ring, even if
+no write events are specified.
+The feature can be disabled by or-ing
+.Nm NETMAP_NO_TX_SYNC
+to nr_ringid.
+But normally you should keep this feature unless you are using
+separate file descriptors for the send and receive rings, because
+otherwise packets are pushed out only if NETMAP_TXSYNC is called,
+or the send queue is full.
+.Pp
+.Pa NIOCREGIF
+can be used multiple times to change the association of a
+file descriptor to a ring pair, always within the same device.
+.It Dv NIOCUNREGIF
+brings an interface back to normal mode.
+.It Dv NIOCTXSYNC
+tells the hardware of new packets to transmit, and updates the
+number of slots available for transmission.
+.It Dv NIOCRXSYNC
+tells the hardware of consumed packets, and asks for newly available
+packets.
+.El
+.Ss SYSTEM CALLS
+.Nm
+uses
+.Nm select
+and
+.Nm poll
+to wake up processes when significant events occur.
+.Sh EXAMPLES
+The following code implements a traffic generator
+.Pp
+.Bd -literal -compact
+#include <net/netmap.h>
+#include <net/netmap_user.h>
+struct netmap_if *nifp;
+struct netmap_ring *ring;
+struct netmap_request nmr;
+
+fd = open("/dev/netmap", O_RDWR);
+bzero(&nmr, sizeof(nmr));
+strcpy(nmr.nm_name, "ix0");
+ioctl(fd, NIOCREG, &nmr);
+p = mmap(0, nmr.memsize, fd);
+nifp = NETMAP_IF(p, nmr.offset);
+ring = NETMAP_TXRING(nifp, 0);
+fds.fd = fd;
+fds.events = POLLOUT;
+for (;;) {
+    poll(list, 1, -1);
+    while (ring->avail-- > 0) {
+        i = ring->cur;
+        buf = NETMAP_BUF(ring, ring->slot[i].buf_index);
+        ... prepare packet in buf ...
+        ring->slot[i].len = ... packet length ...
+        ring->cur = NETMAP_RING_NEXT(ring, i);
+    }
+}
+.Ed
+.Sh SUPPORTED INTERFACES
+.Nm
+supports the following interfaces:
+.Xr em 4 ,
+.Xr ixgbe 4 ,
+.Xr re 4 ,
+.Sh AUTHORS
+The
+.Nm
+framework has been designed and implemented by
+.An Luigi Rizzo
+and
+.An Matteo Landi
+in 2011 at the Universita` di Pisa.

Modified: stable/8/sys/conf/NOTES
==============================================================================
--- stable/8/sys/conf/NOTES     Tue Feb 14 22:27:43 2012        (r231716)
+++ stable/8/sys/conf/NOTES     Tue Feb 14 22:49:34 2012        (r231717)
@@ -780,6 +780,12 @@ device             sppp
 #  simultaneous BPF clients programs runnable.  DHCP requires bpf.
 device         bpf
 
+#  The `netmap' device implements memory-mapped access to network
+#  devices from userspace, enabling wire-speed packet capture and
+#  generation even at 10Gbit/s. Requires support in the device
+#  driver. Supported drivers are ixgbe, e1000, re.
+device         netmap
+
 #  The `disc' device implements a minimal network interface,
 #  which throws away all packets sent and never receives any.  It is
 #  included for testing and benchmarking purposes.

Modified: stable/8/sys/conf/files
==============================================================================
--- stable/8/sys/conf/files     Tue Feb 14 22:27:43 2012        (r231716)
+++ stable/8/sys/conf/files     Tue Feb 14 22:49:34 2012        (r231717)
@@ -1385,6 +1385,7 @@ dev/mxge/mxge_rss_ethp_z8e.c      optional mx
 dev/my/if_my.c                 optional my
 dev/ncv/ncr53c500.c            optional ncv
 dev/ncv/ncr53c500_pccard.c     optional ncv pccard
+dev/netmap/netmap.c            optional netmap
 dev/nge/if_nge.c               optional nge
 dev/nxge/if_nxge.c             optional nxge
 dev/nxge/xgehal/xgehal-device.c        optional nxge

Modified: stable/8/sys/conf/options
==============================================================================
--- stable/8/sys/conf/options   Tue Feb 14 22:27:43 2012        (r231716)
+++ stable/8/sys/conf/options   Tue Feb 14 22:49:34 2012        (r231717)
@@ -680,6 +680,7 @@ ISAPNP                      opt_isa.h
 
 # various 'device presence' options.
 DEV_BPF                        opt_bpf.h
+DEV_NETMAP             opt_global.h
 DEV_MCA                        opt_mca.h
 DEV_CARP               opt_carp.h
 DEV_PTY                        opt_tty.h

Added: stable/8/sys/dev/netmap/if_em_netmap.h
==============================================================================
--- /dev/null   00:00:00 1970   (empty, because file is newly added)
+++ stable/8/sys/dev/netmap/if_em_netmap.h      Tue Feb 14 22:49:34 2012        
(r231717)
@@ -0,0 +1,397 @@
+/*
+ * Copyright (C) 2011 Matteo Landi, Luigi Rizzo. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * $FreeBSD$
+ * $Id: if_em_netmap.h 9802 2011-12-02 18:42:37Z luigi $
+ *
+ * netmap changes for if_em.
+ *
+ * For structure and details on the individual functions please see
+ * ixgbe_netmap.h
+ */
+
+#include <net/netmap.h>
+#include <sys/selinfo.h>
+#include <vm/vm.h>
+#include <vm/pmap.h>    /* vtophys ? */
+#include <dev/netmap/netmap_kern.h>
+
+static void    em_netmap_block_tasks(struct adapter *);
+static void    em_netmap_unblock_tasks(struct adapter *);
+static int     em_netmap_reg(struct ifnet *, int onoff);
+static int     em_netmap_txsync(struct ifnet *, u_int, int);
+static int     em_netmap_rxsync(struct ifnet *, u_int, int);
+static void    em_netmap_lock_wrapper(struct ifnet *, int, u_int);
+
+static void
+em_netmap_attach(struct adapter *adapter)
+{
+       struct netmap_adapter na;
+
+       bzero(&na, sizeof(na));
+
+       na.ifp = adapter->ifp;
+       na.separate_locks = 1;
+       na.num_tx_desc = adapter->num_tx_desc;
+       na.num_rx_desc = adapter->num_rx_desc;
+       na.nm_txsync = em_netmap_txsync;
+       na.nm_rxsync = em_netmap_rxsync;
+       na.nm_lock = em_netmap_lock_wrapper;
+       na.nm_register = em_netmap_reg;
+       netmap_attach(&na, adapter->num_queues);
+}
+
+
+/*
+ * wrapper to export locks to the generic code
+ */
+static void
+em_netmap_lock_wrapper(struct ifnet *ifp, int what, u_int queueid)
+{
+       struct adapter *adapter = ifp->if_softc;
+
+       ASSERT(queueid < adapter->num_queues);
+       switch (what) {
+       case NETMAP_CORE_LOCK:
+               EM_CORE_LOCK(adapter);
+               break;
+       case NETMAP_CORE_UNLOCK:
+               EM_CORE_UNLOCK(adapter);
+               break;
+       case NETMAP_TX_LOCK:
+               EM_TX_LOCK(&adapter->tx_rings[queueid]);
+               break;
+       case NETMAP_TX_UNLOCK:
+               EM_TX_UNLOCK(&adapter->tx_rings[queueid]);
+               break;
+       case NETMAP_RX_LOCK:
+               EM_RX_LOCK(&adapter->rx_rings[queueid]);
+               break;
+       case NETMAP_RX_UNLOCK:
+               EM_RX_UNLOCK(&adapter->rx_rings[queueid]);
+               break;
+       }
+}
+
+
+// XXX do we need to block/unblock the tasks ?
+static void
+em_netmap_block_tasks(struct adapter *adapter)
+{
+       if (adapter->msix > 1) { /* MSIX */
+               int i;
+               struct tx_ring *txr = adapter->tx_rings;
+               struct rx_ring *rxr = adapter->rx_rings;
+
+               for (i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
+                       taskqueue_block(txr->tq);
+                       taskqueue_drain(txr->tq, &txr->tx_task);
+                       taskqueue_block(rxr->tq);
+                       taskqueue_drain(rxr->tq, &rxr->rx_task);
+               }
+       } else {        /* legacy */
+               taskqueue_block(adapter->tq);
+               taskqueue_drain(adapter->tq, &adapter->link_task);
+               taskqueue_drain(adapter->tq, &adapter->que_task);
+       }
+}
+
+
+static void
+em_netmap_unblock_tasks(struct adapter *adapter)
+{
+       if (adapter->msix > 1) {
+               struct tx_ring *txr = adapter->tx_rings;
+               struct rx_ring *rxr = adapter->rx_rings;
+               int i;
+
+               for (i = 0; i < adapter->num_queues; i++) {
+                       taskqueue_unblock(txr->tq);
+                       taskqueue_unblock(rxr->tq);
+               }
+       } else { /* legacy */
+               taskqueue_unblock(adapter->tq);
+       }
+}
+
+/*
+ * register-unregister routine
+ */
+static int
+em_netmap_reg(struct ifnet *ifp, int onoff)
+{
+       struct adapter *adapter = ifp->if_softc;
+       struct netmap_adapter *na = NA(ifp);
+       int error = 0;
+
+       if (na == NULL)
+               return EINVAL;  /* no netmap support here */
+
+       em_disable_intr(adapter);
+
+       /* Tell the stack that the interface is no longer active */
+       ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
+
+       em_netmap_block_tasks(adapter);
+
+       if (onoff) {
+               ifp->if_capenable |= IFCAP_NETMAP;
+
+               na->if_transmit = ifp->if_transmit;
+               ifp->if_transmit = netmap_start;
+
+               em_init_locked(adapter);
+               if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) 
== 0) {
+                       error = ENOMEM;
+                       goto fail;
+               }
+       } else {
+fail:
+               /* restore if_transmit */
+               ifp->if_transmit = na->if_transmit;
+               ifp->if_capenable &= ~IFCAP_NETMAP;
+               em_init_locked(adapter);        /* also enable intr */
+       }
+       em_netmap_unblock_tasks(adapter);
+       return (error);
+}
+
+/*
+ * Reconcile hardware and user view of the transmit ring.
+ */
+static int
+em_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int do_lock)
+{
+       struct adapter *adapter = ifp->if_softc;
+       struct tx_ring *txr = &adapter->tx_rings[ring_nr];
+       struct netmap_adapter *na = NA(adapter->ifp);
+       struct netmap_kring *kring = &na->tx_rings[ring_nr];
+       struct netmap_ring *ring = kring->ring;
+       int j, k, l, n = 0, lim = kring->nkr_num_slots - 1;
+
+       /* generate an interrupt approximately every half ring */
+       int report_frequency = kring->nkr_num_slots >> 1;
+
+       k = ring->cur;
+       if (k > lim)
+               return netmap_ring_reinit(kring);
+
+       if (do_lock)
+               EM_TX_LOCK(txr);
+       bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
+                       BUS_DMASYNC_POSTREAD);
+
+       /* check for new packets to send.
+        * j indexes the netmap ring, l indexes the nic ring, and
+        *      j = kring->nr_hwcur, l = E1000_TDT (not tracked),
+        *      j == (l + kring->nkr_hwofs) % ring_size
+        */
+       j = kring->nr_hwcur;
+       if (j != k) {   /* we have packets to send */
+               l = j - kring->nkr_hwofs;
+               if (l < 0)
+                       l += lim + 1;
+               while (j != k) {
+                       struct netmap_slot *slot = &ring->slot[j];
+                       struct e1000_tx_desc *curr = &txr->tx_base[l];
+                       struct em_buffer *txbuf = &txr->tx_buffers[l];
+                       int flags = ((slot->flags & NS_REPORT) ||
+                               j == 0 || j == report_frequency) ?
+                                       E1000_TXD_CMD_RS : 0;
+                       uint64_t paddr;
+                       void *addr = PNMB(slot, &paddr);
+                       int len = slot->len;
+                       if (addr == netmap_buffer_base || len > 
NETMAP_BUF_SIZE) {
+                               if (do_lock)
+                                       EM_TX_UNLOCK(txr);
+                               return netmap_ring_reinit(kring);
+                       }
+
+                       slot->flags &= ~NS_REPORT;
+                       curr->upper.data = 0;
+                       curr->lower.data = 
+                           htole32(adapter->txd_cmd | len |
+                               (E1000_TXD_CMD_EOP | flags) );
+                       if (slot->flags & NS_BUF_CHANGED) {
+                               curr->buffer_addr = htole64(paddr);
+                               /* buffer has changed, reload map */
+                               netmap_reload_map(txr->txtag, txbuf->map, addr);
+                               slot->flags &= ~NS_BUF_CHANGED;
+                       }
+
+                       bus_dmamap_sync(txr->txtag, txbuf->map,
+                               BUS_DMASYNC_PREWRITE);
+                       j = (j == lim) ? 0 : j + 1;
+                       l = (l == lim) ? 0 : l + 1;
+                       n++;
+               }
+               kring->nr_hwcur = k;
+
+               /* decrease avail by number of sent packets */
+               kring->nr_hwavail -= n;
+
+               bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
+                   BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
+
+               E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), l);
+       }
+
+       if (n == 0 || kring->nr_hwavail < 1) {
+               int delta;
+
+               /* record completed transmissions using THD. */
+               l = E1000_READ_REG(&adapter->hw, E1000_TDH(ring_nr));
+               if (l >= kring->nkr_num_slots) { /* XXX can happen */
+                       D("TDH wrap %d", l);
+                       l -= kring->nkr_num_slots;
+               }
+               delta = l - txr->next_to_clean;
+               if (delta) {
+                       /* some completed, increment hwavail. */
+                       if (delta < 0)
+                               delta += kring->nkr_num_slots;
+                       txr->next_to_clean = l;
+                       kring->nr_hwavail += delta;
+               }
+       }
+       /* update avail to what the hardware knows */
+       ring->avail = kring->nr_hwavail;
+
+       if (do_lock)
+               EM_TX_UNLOCK(txr);
+       return 0;
+}
+
+/*
+ * Reconcile kernel and user view of the receive ring.
+ */
+static int
+em_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int do_lock)
+{
+       struct adapter *adapter = ifp->if_softc;
+       struct rx_ring *rxr = &adapter->rx_rings[ring_nr];
+       struct netmap_adapter *na = NA(adapter->ifp);
+       struct netmap_kring *kring = &na->rx_rings[ring_nr];
+       struct netmap_ring *ring = kring->ring;
+       int j, k, l, n, lim = kring->nkr_num_slots - 1;
+
+       k = ring->cur;
+       if (k > lim)
+               return netmap_ring_reinit(kring);
+ 
+       if (do_lock)
+               EM_RX_LOCK(rxr);
+       /* XXX check sync modes */
+       bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
+                       BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
+
+       /* import newly received packets into the netmap ring.
+        * j is an index in the netmap ring, l in the NIC ring, and
+        *      j = (kring->nr_hwcur + kring->nr_hwavail) % ring_size
+        *      l = rxr->next_to_check;
+        * and
+        *      j == (l + kring->nkr_hwofs) % ring_size
+        */
+       l = rxr->next_to_check;
+       j = l + kring->nkr_hwofs;
+       /* here nkr_hwofs can be negative so must check for j < 0 */
+       if (j < 0)
+               j += lim + 1;
+       else if (j > lim)
+               j -= lim + 1;
+       for (n = 0; ; n++) {
+               struct e1000_rx_desc *curr = &rxr->rx_base[l];
+
+               if ((curr->status & E1000_RXD_STAT_DD) == 0)
+                       break;
+               ring->slot[j].len = le16toh(curr->length);
+               bus_dmamap_sync(rxr->rxtag, rxr->rx_buffers[l].map,
+                       BUS_DMASYNC_POSTREAD);
+               j = (j == lim) ? 0 : j + 1;
+               /* make sure next_to_refresh follows next_to_check */
+               rxr->next_to_refresh = l;       // XXX
+               l = (l == lim) ? 0 : l + 1;
+       }
+       if (n) {
+               rxr->next_to_check = l;
+               kring->nr_hwavail += n;
+       }
+
+       /* skip past packets that userspace has already processed */
+       j = kring->nr_hwcur;
+       if (j != k) { /* userspace has read some packets. */
+               n = 0;
+               l = j - kring->nkr_hwofs; /* NIC ring index */
+               /* here nkr_hwofs can be negative so check for l > lim */
+               if (l < 0)
+                       l += lim + 1;
+               else if (l > lim)
+                       l -= lim + 1;
+               while (j != k) {
+                       struct netmap_slot *slot = &ring->slot[j];
+                       struct e1000_rx_desc *curr = &rxr->rx_base[l];
+                       struct em_buffer *rxbuf = &rxr->rx_buffers[l];
+                       uint64_t paddr;
+                       void *addr = PNMB(slot, &paddr);
+
+                       if (addr == netmap_buffer_base) { /* bad buf */
+                               if (do_lock)
+                                       EM_RX_UNLOCK(rxr);
+                               return netmap_ring_reinit(kring);
+                       }
+
+                       curr->status = 0;
+                       if (slot->flags & NS_BUF_CHANGED) {
+                               curr->buffer_addr = htole64(paddr);
+                               /* buffer has changed, reload map */
+                               netmap_reload_map(rxr->rxtag, rxbuf->map, addr);
+                               slot->flags &= ~NS_BUF_CHANGED;
+                       }
+
+                       bus_dmamap_sync(rxr->rxtag, rxbuf->map,
+                           BUS_DMASYNC_PREREAD);
+
+                       j = (j == lim) ? 0 : j + 1;
+                       l = (l == lim) ? 0 : l + 1;
+                       n++;
+               }
+               kring->nr_hwavail -= n;
+               kring->nr_hwcur = k;
+               bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
+                   BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
+               /*
+                * IMPORTANT: we must leave one free slot in the ring,
+                * so move l back by one unit
+                */
+               l = (l == 0) ? lim : l - 1;
+               E1000_WRITE_REG(&adapter->hw, E1000_RDT(rxr->me), l);
+       }
+       /* tell userspace that there are new packets */
+       ring->avail = kring->nr_hwavail ;
+       if (do_lock)
+               EM_RX_UNLOCK(rxr);
+       return 0;
+}

Added: stable/8/sys/dev/netmap/if_igb_netmap.h
==============================================================================
--- /dev/null   00:00:00 1970   (empty, because file is newly added)
+++ stable/8/sys/dev/netmap/if_igb_netmap.h     Tue Feb 14 22:49:34 2012        
(r231717)
@@ -0,0 +1,357 @@
+/*
+ * Copyright (C) 2011 Universita` di Pisa. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * $FreeBSD$
+ * $Id: if_igb_netmap.h 9802 2011-12-02 18:42:37Z luigi $
+ *
+ * netmap modifications for igb
+ * contribured by Ahmed Kooli
+ */
+
+#include <net/netmap.h>
+#include <sys/selinfo.h>
+#include <vm/vm.h>
+#include <vm/pmap.h>    /* vtophys ? */
+#include <dev/netmap/netmap_kern.h>
+
+static int     igb_netmap_reg(struct ifnet *, int onoff);
+static int     igb_netmap_txsync(struct ifnet *, u_int, int);
+static int     igb_netmap_rxsync(struct ifnet *, u_int, int);
+static void    igb_netmap_lock_wrapper(struct ifnet *, int, u_int);
+
+
+static void
+igb_netmap_attach(struct adapter *adapter)
+{
+       struct netmap_adapter na;
+
+       bzero(&na, sizeof(na));
+
+       na.ifp = adapter->ifp;
+       na.separate_locks = 1;
+       na.num_tx_desc = adapter->num_tx_desc;
+       na.num_rx_desc = adapter->num_rx_desc;
+       na.nm_txsync = igb_netmap_txsync;
+       na.nm_rxsync = igb_netmap_rxsync;
+       na.nm_lock = igb_netmap_lock_wrapper;
+       na.nm_register = igb_netmap_reg;
+       netmap_attach(&na, adapter->num_queues);
+}      
+
+
+/*
+ * wrapper to export locks to the generic code
+ */
+static void
+igb_netmap_lock_wrapper(struct ifnet *ifp, int what, u_int queueid)
+{
+       struct adapter *adapter = ifp->if_softc;
+
+       ASSERT(queueid < adapter->num_queues);
+       switch (what) {
+       case NETMAP_CORE_LOCK:
+               IGB_CORE_LOCK(adapter);
+               break;
+       case NETMAP_CORE_UNLOCK:
+               IGB_CORE_UNLOCK(adapter);
+               break;
+       case NETMAP_TX_LOCK:
+               IGB_TX_LOCK(&adapter->tx_rings[queueid]);
+               break;
+       case NETMAP_TX_UNLOCK:
+               IGB_TX_UNLOCK(&adapter->tx_rings[queueid]);
+               break;
+       case NETMAP_RX_LOCK:
+               IGB_RX_LOCK(&adapter->rx_rings[queueid]);
+               break;
+       case NETMAP_RX_UNLOCK:
+               IGB_RX_UNLOCK(&adapter->rx_rings[queueid]);
+               break;
+       }
+}
+
+
+/*
+ * support for netmap register/unregisted. We are already under core lock.
+ * only called on the first init or the last unregister.
+ */
+static int
+igb_netmap_reg(struct ifnet *ifp, int onoff)
+{
+       struct adapter *adapter = ifp->if_softc;
+       struct netmap_adapter *na = NA(ifp);
+       int error = 0;
+
+       if (na == NULL)
+               return EINVAL;
+
+       igb_disable_intr(adapter);
+
+       /* Tell the stack that the interface is no longer active */
+       ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
+
+       if (onoff) {
+               ifp->if_capenable |= IFCAP_NETMAP;
+
+               /* save if_transmit to restore it later */
+               na->if_transmit = ifp->if_transmit;
+               ifp->if_transmit = netmap_start;
+
+               igb_init_locked(adapter);
+               if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) 
== 0) {
+                       error = ENOMEM;
+                       goto fail;
+               }
+       } else {
+fail:
+               /* restore if_transmit */
+               ifp->if_transmit = na->if_transmit;
+               ifp->if_capenable &= ~IFCAP_NETMAP;
+               igb_init_locked(adapter);       /* also enables intr */
+       }
+       return (error);
+}
+
+
+/*
+ * Reconcile kernel and user view of the transmit ring.
+ */
+static int
+igb_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int do_lock)
+{
+       struct adapter *adapter = ifp->if_softc;
+       struct tx_ring *txr = &adapter->tx_rings[ring_nr];
+       struct netmap_adapter *na = NA(adapter->ifp);
+       struct netmap_kring *kring = &na->tx_rings[ring_nr];
+       struct netmap_ring *ring = kring->ring;
+       int j, k, l, n = 0, lim = kring->nkr_num_slots - 1;
+
+       /* generate an interrupt approximately every half ring */
+       int report_frequency = kring->nkr_num_slots >> 1;
+
+       k = ring->cur;
+       if (k > lim)
+               return netmap_ring_reinit(kring);
+
+       if (do_lock)
+               IGB_TX_LOCK(txr);
+       bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
+           BUS_DMASYNC_POSTREAD);
+
+       /* update avail to what the hardware knows */
+       ring->avail = kring->nr_hwavail;
+
+       j = kring->nr_hwcur; /* netmap ring index */
+       if (j != k) {   /* we have new packets to send */
+               u32 olinfo_status = 0;
+
+               l = j - kring->nkr_hwofs; /* NIC ring index */
+               if (l < 0)
+                       l += lim + 1;
+               /* 82575 needs the queue index added */
+               if (adapter->hw.mac.type == e1000_82575)
+                       olinfo_status |= txr->me << 4;
+
+               while (j != k) {
+                       struct netmap_slot *slot = &ring->slot[j];
+                       struct igb_tx_buffer *txbuf = &txr->tx_buffers[l];
+                       union e1000_adv_tx_desc *curr =
+                           (union e1000_adv_tx_desc *)&txr->tx_base[l];
+                       uint64_t paddr;
+                       void *addr = PNMB(slot, &paddr);
+                       int flags = ((slot->flags & NS_REPORT) ||
+                               j == 0 || j == report_frequency) ?
+                                       E1000_ADVTXD_DCMD_RS : 0;
+                       int len = slot->len;
+
+                       if (addr == netmap_buffer_base || len > 
NETMAP_BUF_SIZE) {
+                               if (do_lock)
+                                       IGB_TX_UNLOCK(txr);
+                               return netmap_ring_reinit(kring);
+                       }
+
+                       slot->flags &= ~NS_REPORT;
+                       // XXX do we need to set the address ?
+                       curr->read.buffer_addr = htole64(paddr);
+                       curr->read.olinfo_status =
+                           htole32(olinfo_status |
+                               (len<< E1000_ADVTXD_PAYLEN_SHIFT));
+                       curr->read.cmd_type_len =
+                           htole32(len | E1000_ADVTXD_DTYP_DATA |
+                                   E1000_ADVTXD_DCMD_IFCS |
+                                   E1000_ADVTXD_DCMD_DEXT |
+                                   E1000_ADVTXD_DCMD_EOP | flags);
+                       if (slot->flags & NS_BUF_CHANGED) {
+                               /* buffer has changed, reload map */
+                               netmap_reload_map(txr->txtag, txbuf->map, addr);
+                               slot->flags &= ~NS_BUF_CHANGED;
+                       }
+
+                       bus_dmamap_sync(txr->txtag, txbuf->map,
+                               BUS_DMASYNC_PREWRITE);
+                       j = (j == lim) ? 0 : j + 1;
+                       l = (l == lim) ? 0 : l + 1;
+                       n++;
+               }
+               kring->nr_hwcur = k;
+
+               /* decrease avail by number of sent packets */
+               kring->nr_hwavail -= n;
+               ring->avail = kring->nr_hwavail;
+
+               /* Set the watchdog XXX ? */
+               txr->queue_status = IGB_QUEUE_WORKING;
+               txr->watchdog_time = ticks;
+
+               bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
+                   BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
+
+               E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), l);
+       }
+       if (n == 0 || kring->nr_hwavail < 1) {
+               int delta;
+
+               /* record completed transmission using TDH */
+               l = E1000_READ_REG(&adapter->hw, E1000_TDH(ring_nr));
+               if (l >= kring->nkr_num_slots) /* XXX can it happen ? */
+                       l -= kring->nkr_num_slots;
+               delta = l - txr->next_to_clean;
+               if (delta) {
+                       /* new tx were completed */
+                       if (delta < 0)
+                               delta += kring->nkr_num_slots;
+                       txr->next_to_clean = l;
+                       kring->nr_hwavail += delta;
+                       ring->avail = kring->nr_hwavail;
+               }
+       }

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
_______________________________________________
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to