Author: luigi
Date: Tue Feb  5 09:40:31 2013
New Revision: 246355
URL: http://svnweb.freebsd.org/changeset/base/246355

Log:
  MFH: sync netmap with the version in HEAD

Deleted:
  stable/9/sys/dev/netmap/netmap_mem1.c
Modified:
  stable/9/sys/dev/netmap/if_em_netmap.h
  stable/9/sys/dev/netmap/if_igb_netmap.h
  stable/9/sys/dev/netmap/if_lem_netmap.h
  stable/9/sys/dev/netmap/if_re_netmap.h
  stable/9/sys/dev/netmap/netmap.c
  stable/9/sys/dev/netmap/netmap_kern.h
  stable/9/sys/dev/netmap/netmap_mem2.c
  stable/9/sys/net/netmap.h
  stable/9/sys/net/netmap_user.h

Modified: stable/9/sys/dev/netmap/if_em_netmap.h
==============================================================================
--- stable/9/sys/dev/netmap/if_em_netmap.h      Tue Feb  5 05:16:02 2013        
(r246354)
+++ stable/9/sys/dev/netmap/if_em_netmap.h      Tue Feb  5 09:40:31 2013        
(r246355)
@@ -171,7 +171,7 @@ em_netmap_txsync(struct ifnet *ifp, u_in
        u_int j, k, l, n = 0, lim = kring->nkr_num_slots - 1;
 
        /* generate an interrupt approximately every half ring */
-       int report_frequency = kring->nkr_num_slots >> 1;
+       u_int report_frequency = kring->nkr_num_slots >> 1;
 
        k = ring->cur;
        if (k > lim)
@@ -292,6 +292,8 @@ em_netmap_rxsync(struct ifnet *ifp, u_in
        l = rxr->next_to_check;
        j = netmap_idx_n2k(kring, l);
        if (netmap_no_pendintr || force_update) {
+               uint16_t slot_flags = kring->nkr_slot_flags;
+
                for (n = 0; ; n++) {
                        struct e1000_rx_desc *curr = &rxr->rx_base[l];
                        uint32_t staterr = le32toh(curr->status);
@@ -299,6 +301,7 @@ em_netmap_rxsync(struct ifnet *ifp, u_in
                        if ((staterr & E1000_RXD_STAT_DD) == 0)
                                break;
                        ring->slot[j].len = le16toh(curr->length);
+                       ring->slot[j].flags = slot_flags;
                        bus_dmamap_sync(rxr->rxtag, rxr->rx_buffers[l].map,
                                BUS_DMASYNC_POSTREAD);
                        j = (j == lim) ? 0 : j + 1;

Modified: stable/9/sys/dev/netmap/if_igb_netmap.h
==============================================================================
--- stable/9/sys/dev/netmap/if_igb_netmap.h     Tue Feb  5 05:16:02 2013        
(r246354)
+++ stable/9/sys/dev/netmap/if_igb_netmap.h     Tue Feb  5 09:40:31 2013        
(r246355)
@@ -125,7 +125,7 @@ igb_netmap_txsync(struct ifnet *ifp, u_i
        u_int j, k, l, n = 0, lim = kring->nkr_num_slots - 1;
 
        /* generate an interrupt approximately every half ring */
-       int report_frequency = kring->nkr_num_slots >> 1;
+       u_int report_frequency = kring->nkr_num_slots >> 1;
 
        k = ring->cur;
        if (k > lim)
@@ -263,6 +263,8 @@ igb_netmap_rxsync(struct ifnet *ifp, u_i
        l = rxr->next_to_check;
        j = netmap_idx_n2k(kring, l);
        if (netmap_no_pendintr || force_update) {
+               uint16_t slot_flags = kring->nkr_slot_flags;
+
                for (n = 0; ; n++) {
                        union e1000_adv_rx_desc *curr = &rxr->rx_base[l];
                        uint32_t staterr = le32toh(curr->wb.upper.status_error);
@@ -270,6 +272,7 @@ igb_netmap_rxsync(struct ifnet *ifp, u_i
                        if ((staterr & E1000_RXD_STAT_DD) == 0)
                                break;
                        ring->slot[j].len = le16toh(curr->wb.upper.length);
+                       ring->slot[j].flags = slot_flags;
                        bus_dmamap_sync(rxr->ptag,
                                rxr->rx_buffers[l].pmap, BUS_DMASYNC_POSTREAD);
                        j = (j == lim) ? 0 : j + 1;

Modified: stable/9/sys/dev/netmap/if_lem_netmap.h
==============================================================================
--- stable/9/sys/dev/netmap/if_lem_netmap.h     Tue Feb  5 05:16:02 2013        
(r246354)
+++ stable/9/sys/dev/netmap/if_lem_netmap.h     Tue Feb  5 09:40:31 2013        
(r246355)
@@ -253,6 +253,8 @@ lem_netmap_rxsync(struct ifnet *ifp, u_i
        l = adapter->next_rx_desc_to_check;
        j = netmap_idx_n2k(kring, l);
        if (netmap_no_pendintr || force_update) {
+               uint16_t slot_flags = kring->nkr_slot_flags;
+
                for (n = 0; ; n++) {
                        struct e1000_rx_desc *curr = &adapter->rx_desc_base[l];
                        uint32_t staterr = le32toh(curr->status);
@@ -266,6 +268,7 @@ lem_netmap_rxsync(struct ifnet *ifp, u_i
                                len = 0;
                        }
                        ring->slot[j].len = len;
+                       ring->slot[j].flags = slot_flags;
                        bus_dmamap_sync(adapter->rxtag,
                                adapter->rx_buffer_area[l].map,
                                    BUS_DMASYNC_POSTREAD);

Modified: stable/9/sys/dev/netmap/if_re_netmap.h
==============================================================================
--- stable/9/sys/dev/netmap/if_re_netmap.h      Tue Feb  5 05:16:02 2013        
(r246354)
+++ stable/9/sys/dev/netmap/if_re_netmap.h      Tue Feb  5 09:40:31 2013        
(r246355)
@@ -245,6 +245,8 @@ re_netmap_rxsync(struct ifnet *ifp, u_in
        l = sc->rl_ldata.rl_rx_prodidx; /* next pkt to check */
        j = netmap_idx_n2k(kring, l); /* the kring index */
        if (netmap_no_pendintr || force_update) {
+               uint16_t slot_flags = kring->nkr_slot_flags;
+
                for (n = kring->nr_hwavail; n < lim ; n++) {
                        struct rl_desc *cur_rx = &sc->rl_ldata.rl_rx_list[l];
                        uint32_t rxstat = le32toh(cur_rx->rl_cmdstat);
@@ -256,6 +258,7 @@ re_netmap_rxsync(struct ifnet *ifp, u_in
                        /* XXX subtract crc */
                        total_len = (total_len < 4) ? 0 : total_len - 4;
                        kring->ring->slot[j].len = total_len;
+                       kring->ring->slot[j].flags = slot_flags;
                        /*  sync was in re_newbuf() */
                        bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag,
                            rxd[l].rx_dmamap, BUS_DMASYNC_POSTREAD);

Modified: stable/9/sys/dev/netmap/netmap.c
==============================================================================
--- stable/9/sys/dev/netmap/netmap.c    Tue Feb  5 05:16:02 2013        
(r246354)
+++ stable/9/sys/dev/netmap/netmap.c    Tue Feb  5 09:40:31 2013        
(r246355)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2011 Matteo Landi, Luigi Rizzo. All rights reserved.
+ * Copyright (C) 2011-2012 Matteo Landi, Luigi Rizzo. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -23,6 +23,8 @@
  * SUCH DAMAGE.
  */
 
+#define NM_BRIDGE
+
 /*
  * This module supports memory mapped access to network devices,
  * see netmap(4).
@@ -52,6 +54,16 @@
  *    transmit or receive queues (or all queues for a given interface).
  */
 
+#ifdef linux
+#include "bsd_glue.h"
+static netdev_tx_t linux_netmap_start(struct sk_buff *skb, struct net_device 
*dev);
+#endif /* linux */
+
+#ifdef __APPLE__
+#include "osx_glue.h"
+#endif /* __APPLE__ */
+
+#ifdef __FreeBSD__
 #include <sys/cdefs.h> /* prerequisite */
 __FBSDID("$FreeBSD$");
 
@@ -78,21 +90,16 @@ __FBSDID("$FreeBSD$");
 #include <net/if.h>
 #include <net/bpf.h>           /* BIOCIMMEDIATE */
 #include <net/vnet.h>
-#include <net/netmap.h>
-#include <dev/netmap/netmap_kern.h>
 #include <machine/bus.h>       /* bus_dmamap_* */
 
 MALLOC_DEFINE(M_NETMAP, "netmap", "Network memory map");
+#endif /* __FreeBSD__ */
 
-/*
- * lock and unlock for the netmap memory allocator
- */
-#define NMA_LOCK()     mtx_lock(&nm_mem->nm_mtx);
-#define NMA_UNLOCK()   mtx_unlock(&nm_mem->nm_mtx);
-struct netmap_mem_d;
-static struct netmap_mem_d *nm_mem;    /* Our memory allocator. */
+#include <net/netmap.h>
+#include <dev/netmap/netmap_kern.h>
 
 u_int netmap_total_buffers;
+u_int netmap_buf_size;
 char *netmap_buffer_base;      /* address of an invalid buffer */
 
 /* user-controlled variables */
@@ -105,16 +112,215 @@ SYSCTL_INT(_dev_netmap, OID_AUTO, verbos
     CTLFLAG_RW, &netmap_verbose, 0, "Verbose mode");
 SYSCTL_INT(_dev_netmap, OID_AUTO, no_timestamp,
     CTLFLAG_RW, &netmap_no_timestamp, 0, "no_timestamp");
-int netmap_buf_size = 2048;
-TUNABLE_INT("hw.netmap.buf_size", &netmap_buf_size);
-SYSCTL_INT(_dev_netmap, OID_AUTO, buf_size,
-    CTLFLAG_RD, &netmap_buf_size, 0, "Size of packet buffers");
 int netmap_mitigate = 1;
 SYSCTL_INT(_dev_netmap, OID_AUTO, mitigate, CTLFLAG_RW, &netmap_mitigate, 0, 
"");
 int netmap_no_pendintr = 1;
 SYSCTL_INT(_dev_netmap, OID_AUTO, no_pendintr,
     CTLFLAG_RW, &netmap_no_pendintr, 0, "Always look for new received 
packets.");
 
+int netmap_drop = 0;   /* debugging */
+int netmap_flags = 0;  /* debug flags */
+int netmap_fwd = 0;    /* force transparent mode */
+int netmap_copy = 0;   /* debugging, copy content */
+
+SYSCTL_INT(_dev_netmap, OID_AUTO, drop, CTLFLAG_RW, &netmap_drop, 0 , "");
+SYSCTL_INT(_dev_netmap, OID_AUTO, flags, CTLFLAG_RW, &netmap_flags, 0 , "");
+SYSCTL_INT(_dev_netmap, OID_AUTO, fwd, CTLFLAG_RW, &netmap_fwd, 0 , "");
+SYSCTL_INT(_dev_netmap, OID_AUTO, copy, CTLFLAG_RW, &netmap_copy, 0 , "");
+
+#ifdef NM_BRIDGE /* support for netmap bridge */
+
+/*
+ * system parameters.
+ *
+ * All switched ports have prefix NM_NAME.
+ * The switch has a max of NM_BDG_MAXPORTS ports (often stored in a bitmap,
+ * so a practical upper bound is 64).
+ * Each tx ring is read-write, whereas rx rings are readonly (XXX not done 
yet).
+ * The virtual interfaces use per-queue lock instead of core lock.
+ * In the tx loop, we aggregate traffic in batches to make all operations
+ * faster. The batch size is NM_BDG_BATCH
+ */
+#define        NM_NAME                 "vale"  /* prefix for the interface */
+#define NM_BDG_MAXPORTS                16      /* up to 64 ? */
+#define NM_BRIDGE_RINGSIZE     1024    /* in the device */
+#define NM_BDG_HASH            1024    /* forwarding table entries */
+#define NM_BDG_BATCH           1024    /* entries in the forwarding buffer */
+#define        NM_BRIDGES              4       /* number of bridges */
+int netmap_bridge = NM_BDG_BATCH; /* bridge batch size */
+SYSCTL_INT(_dev_netmap, OID_AUTO, bridge, CTLFLAG_RW, &netmap_bridge, 0 , "");
+
+#ifdef linux
+#define        ADD_BDG_REF(ifp)        (NA(ifp)->if_refcount++)
+#define        DROP_BDG_REF(ifp)       (NA(ifp)->if_refcount-- <= 1)
+#else /* !linux */
+#define        ADD_BDG_REF(ifp)        (ifp)->if_refcount++
+#define        DROP_BDG_REF(ifp)       refcount_release(&(ifp)->if_refcount)
+#ifdef __FreeBSD__
+#include <sys/endian.h>
+#include <sys/refcount.h>
+#endif /* __FreeBSD__ */
+#define prefetch(x)    __builtin_prefetch(x)
+#endif /* !linux */
+
+static void bdg_netmap_attach(struct ifnet *ifp);
+static int bdg_netmap_reg(struct ifnet *ifp, int onoff);
+/* per-tx-queue entry */
+struct nm_bdg_fwd {    /* forwarding entry for a bridge */
+       void *buf;
+       uint64_t dst;   /* dst mask */
+       uint32_t src;   /* src index ? */
+       uint16_t len;   /* src len */
+};
+
+struct nm_hash_ent {
+       uint64_t        mac;    /* the top 2 bytes are the epoch */
+       uint64_t        ports;
+};
+
+/*
+ * Interfaces for a bridge are all in ports[].
+ * The array has fixed size, an empty entry does not terminate
+ * the search.
+ */
+struct nm_bridge {
+       struct ifnet *bdg_ports[NM_BDG_MAXPORTS];
+       int n_ports;
+       uint64_t act_ports;
+       int freelist;   /* first buffer index */
+       NM_SELINFO_T si;        /* poll/select wait queue */
+       NM_LOCK_T bdg_lock;     /* protect the selinfo ? */
+
+       /* the forwarding table, MAC+ports */
+       struct nm_hash_ent ht[NM_BDG_HASH];
+
+       int namelen;    /* 0 means free */
+       char basename[IFNAMSIZ];
+};
+
+struct nm_bridge nm_bridges[NM_BRIDGES];
+
+#define BDG_LOCK(b)    mtx_lock(&(b)->bdg_lock)
+#define BDG_UNLOCK(b)  mtx_unlock(&(b)->bdg_lock)
+
+/*
+ * NA(ifp)->bdg_port   port index
+ */
+
+// XXX only for multiples of 64 bytes, non overlapped.
+static inline void
+pkt_copy(void *_src, void *_dst, int l)
+{
+        uint64_t *src = _src;
+        uint64_t *dst = _dst;
+        if (unlikely(l >= 1024)) {
+                bcopy(src, dst, l);
+                return;
+        }
+        for (; likely(l > 0); l-=64) {
+                *dst++ = *src++;
+                *dst++ = *src++;
+                *dst++ = *src++;
+                *dst++ = *src++;
+                *dst++ = *src++;
+                *dst++ = *src++;
+                *dst++ = *src++;
+                *dst++ = *src++;
+        }
+}
+
+/*
+ * locate a bridge among the existing ones.
+ * a ':' in the name terminates the bridge name. Otherwise, just NM_NAME.
+ * We assume that this is called with a name of at least NM_NAME chars.
+ */
+static struct nm_bridge *
+nm_find_bridge(const char *name)
+{
+       int i, l, namelen, e;
+       struct nm_bridge *b = NULL;
+
+       namelen = strlen(NM_NAME);      /* base length */
+       l = strlen(name);               /* actual length */
+       for (i = namelen + 1; i < l; i++) {
+               if (name[i] == ':') {
+                       namelen = i;
+                       break;
+               }
+       }
+       if (namelen >= IFNAMSIZ)
+               namelen = IFNAMSIZ;
+       ND("--- prefix is '%.*s' ---", namelen, name);
+
+       /* use the first entry for locking */
+       BDG_LOCK(nm_bridges); // XXX do better
+       for (e = -1, i = 1; i < NM_BRIDGES; i++) {
+               b = nm_bridges + i;
+               if (b->namelen == 0)
+                       e = i;  /* record empty slot */
+               else if (strncmp(name, b->basename, namelen) == 0) {
+                       ND("found '%.*s' at %d", namelen, name, i);
+                       break;
+               }
+       }
+       if (i == NM_BRIDGES) { /* all full */
+               if (e == -1) { /* no empty slot */
+                       b = NULL;
+               } else {
+                       b = nm_bridges + e;
+                       strncpy(b->basename, name, namelen);
+                       b->namelen = namelen;
+               }
+       }
+       BDG_UNLOCK(nm_bridges);
+       return b;
+}
+#endif /* NM_BRIDGE */
+
+
+/*
+ * Fetch configuration from the device, to cope with dynamic
+ * reconfigurations after loading the module.
+ */
+static int
+netmap_update_config(struct netmap_adapter *na)
+{
+       struct ifnet *ifp = na->ifp;
+       u_int txr, txd, rxr, rxd;
+
+       txr = txd = rxr = rxd = 0;
+       if (na->nm_config) {
+               na->nm_config(ifp, &txr, &txd, &rxr, &rxd);
+       } else {
+               /* take whatever we had at init time */
+               txr = na->num_tx_rings;
+               txd = na->num_tx_desc;
+               rxr = na->num_rx_rings;
+               rxd = na->num_rx_desc;
+       }       
+
+       if (na->num_tx_rings == txr && na->num_tx_desc == txd &&
+           na->num_rx_rings == rxr && na->num_rx_desc == rxd)
+               return 0; /* nothing changed */
+       if (netmap_verbose || na->refcount > 0) {
+               D("stored config %s: txring %d x %d, rxring %d x %d",
+                       ifp->if_xname,
+                       na->num_tx_rings, na->num_tx_desc,
+                       na->num_rx_rings, na->num_rx_desc);
+               D("new config %s: txring %d x %d, rxring %d x %d",
+                       ifp->if_xname, txr, txd, rxr, rxd);
+       }
+       if (na->refcount == 0) {
+               D("configuration changed (but fine)");
+               na->num_tx_rings = txr;
+               na->num_tx_desc = txd;
+               na->num_rx_rings = rxr;
+               na->num_rx_desc = rxd;
+               return 0;
+       }
+       D("configuration changed while active, this is bad...");
+       return 1;
+}
 
 /*------------- memory allocator -----------------*/
 #ifdef NETMAP_MEM2
@@ -124,23 +330,62 @@ SYSCTL_INT(_dev_netmap, OID_AUTO, no_pen
 #endif /* !NETMAP_MEM2 */
 /*------------ end of memory allocator ----------*/
 
-/* Structure associated to each thread which registered an interface. */
+
+/* Structure associated to each thread which registered an interface.
+ *
+ * The first 4 fields of this structure are written by NIOCREGIF and
+ * read by poll() and NIOC?XSYNC.
+ * There is low contention among writers (actually, a correct user program
+ * should have no contention among writers) and among writers and readers,
+ * so we use a single global lock to protect the structure initialization.
+ * Since initialization involves the allocation of memory, we reuse the memory
+ * allocator lock.
+ * Read access to the structure is lock free. Readers must check that
+ * np_nifp is not NULL before using the other fields.
+ * If np_nifp is NULL initialization has not been performed, so they should
+ * return an error to userlevel.
+ *
+ * The ref_done field is used to regulate access to the refcount in the
+ * memory allocator. The refcount must be incremented at most once for
+ * each open("/dev/netmap"). The increment is performed by the first
+ * function that calls netmap_get_memory() (currently called by
+ * mmap(), NIOCGINFO and NIOCREGIF).
+ * If the refcount is incremented, it is then decremented when the
+ * private structure is destroyed.
+ */
 struct netmap_priv_d {
-       struct netmap_if *np_nifp;      /* netmap interface descriptor. */
+       struct netmap_if * volatile np_nifp;    /* netmap interface descriptor. 
*/
 
        struct ifnet    *np_ifp;        /* device for which we hold a reference 
*/
        int             np_ringid;      /* from the ioctl */
        u_int           np_qfirst, np_qlast;    /* range of rings to scan */
        uint16_t        np_txpoll;
+
+       unsigned long   ref_done;       /* use with NMA_LOCK held */
 };
 
 
+static int
+netmap_get_memory(struct netmap_priv_d* p)
+{
+       int error = 0;
+       NMA_LOCK();
+       if (!p->ref_done) {
+               error = netmap_memory_finalize();
+               if (!error)
+                       p->ref_done = 1;
+       }
+       NMA_UNLOCK();
+       return error;
+}
+
 /*
  * File descriptor's private data destructor.
  *
  * Call nm_register(ifp,0) to stop netmap mode on the interface and
  * revert to normal operation. We expect that np_ifp has not gone.
  */
+/* call with NMA_LOCK held */
 static void
 netmap_dtor_locked(void *data)
 {
@@ -153,7 +398,8 @@ netmap_dtor_locked(void *data)
        if (na->refcount <= 0) {        /* last instance */
                u_int i, j, lim;
 
-               D("deleting last netmap instance for %s", ifp->if_xname);
+               if (netmap_verbose)
+                       D("deleting last instance for %s", ifp->if_xname);
                /*
                 * there is a race here with *_netmap_task() and
                 * netmap_poll(), which don't run under NETMAP_REG_LOCK.
@@ -180,7 +426,6 @@ netmap_dtor_locked(void *data)
                selwakeuppri(&na->tx_si, PI_NET);
                selwakeuppri(&na->rx_si, PI_NET);
                /* release all buffers */
-               NMA_LOCK();
                for (i = 0; i < na->num_tx_rings + 1; i++) {
                        struct netmap_ring *ring = na->tx_rings[i].ring;
                        lim = na->tx_rings[i].nkr_num_slots;
@@ -200,30 +445,136 @@ netmap_dtor_locked(void *data)
                /* XXX kqueue(9) needed; these will mirror knlist_init. */
                /* knlist_destroy(&na->tx_si.si_note); */
                /* knlist_destroy(&na->rx_si.si_note); */
-               NMA_UNLOCK();
                netmap_free_rings(na);
                wakeup(na);
        }
        netmap_if_free(nifp);
 }
 
+static void
+nm_if_rele(struct ifnet *ifp)
+{
+#ifndef NM_BRIDGE
+       if_rele(ifp);
+#else /* NM_BRIDGE */
+       int i, full;
+       struct nm_bridge *b;
+
+       if (strncmp(ifp->if_xname, NM_NAME, sizeof(NM_NAME) - 1)) {
+               if_rele(ifp);
+               return;
+       }
+       if (!DROP_BDG_REF(ifp))
+               return;
+       b = ifp->if_bridge;
+       BDG_LOCK(nm_bridges);
+       BDG_LOCK(b);
+       ND("want to disconnect %s from the bridge", ifp->if_xname);
+       full = 0;
+       for (i = 0; i < NM_BDG_MAXPORTS; i++) {
+               if (b->bdg_ports[i] == ifp) {
+                       b->bdg_ports[i] = NULL;
+                       bzero(ifp, sizeof(*ifp));
+                       free(ifp, M_DEVBUF);
+                       break;
+               }
+               else if (b->bdg_ports[i] != NULL)
+                       full = 1;
+       }
+       BDG_UNLOCK(b);
+       if (full == 0) {
+               ND("freeing bridge %d", b - nm_bridges);
+               b->namelen = 0;
+       }
+       BDG_UNLOCK(nm_bridges);
+       if (i == NM_BDG_MAXPORTS)
+               D("ouch, cannot find ifp to remove");
+#endif /* NM_BRIDGE */
+}
 
 static void
 netmap_dtor(void *data)
 {
        struct netmap_priv_d *priv = data;
        struct ifnet *ifp = priv->np_ifp;
-       struct netmap_adapter *na = NA(ifp);
+       struct netmap_adapter *na;
 
-       na->nm_lock(ifp, NETMAP_REG_LOCK, 0);
-       netmap_dtor_locked(data);
-       na->nm_lock(ifp, NETMAP_REG_UNLOCK, 0);
+       NMA_LOCK();
+       if (ifp) {
+               na = NA(ifp);
+               na->nm_lock(ifp, NETMAP_REG_LOCK, 0);
+               netmap_dtor_locked(data);
+               na->nm_lock(ifp, NETMAP_REG_UNLOCK, 0);
 
-       if_rele(ifp);
+               nm_if_rele(ifp);
+       }
+       if (priv->ref_done) {
+               netmap_memory_deref();
+       }
+       NMA_UNLOCK();
        bzero(priv, sizeof(*priv));     /* XXX for safety */
        free(priv, M_DEVBUF);
 }
 
+#ifdef __FreeBSD__
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pager.h>
+#include <vm/uma.h>
+
+static struct cdev_pager_ops saved_cdev_pager_ops;
+
+static int
+netmap_dev_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot,
+    vm_ooffset_t foff, struct ucred *cred, u_short *color)
+{
+       if (netmap_verbose)
+               D("first mmap for %p", handle);
+       return saved_cdev_pager_ops.cdev_pg_ctor(handle,
+                       size, prot, foff, cred, color);
+}
+
+static void
+netmap_dev_pager_dtor(void *handle)
+{
+       saved_cdev_pager_ops.cdev_pg_dtor(handle);
+       ND("ready to release memory for %p", handle);
+}
+
+
+static struct cdev_pager_ops netmap_cdev_pager_ops = {
+        .cdev_pg_ctor = netmap_dev_pager_ctor,
+        .cdev_pg_dtor = netmap_dev_pager_dtor,
+        .cdev_pg_fault = NULL,
+};
+
+static int
+netmap_mmap_single(struct cdev *cdev, vm_ooffset_t *foff,
+       vm_size_t objsize,  vm_object_t *objp, int prot)
+{
+       vm_object_t obj;
+
+       ND("cdev %p foff %jd size %jd objp %p prot %d", cdev,
+           (intmax_t )*foff, (intmax_t )objsize, objp, prot);
+       obj = vm_pager_allocate(OBJT_DEVICE, cdev, objsize, prot, *foff,
+            curthread->td_ucred);
+       ND("returns obj %p", obj);
+       if (obj == NULL)
+               return EINVAL;
+       if (saved_cdev_pager_ops.cdev_pg_fault == NULL) {
+               ND("initialize cdev_pager_ops");
+               saved_cdev_pager_ops = *(obj->un_pager.devp.ops);
+               netmap_cdev_pager_ops.cdev_pg_fault =
+                       saved_cdev_pager_ops.cdev_pg_fault;
+       };
+       obj->un_pager.devp.ops = &netmap_cdev_pager_ops;
+       *objp = obj;
+       return 0;
+}
+#endif /* __FreeBSD__ */
+
 
 /*
  * mmap(2) support for the "netmap" device.
@@ -235,6 +586,7 @@ netmap_dtor(void *data)
  * Return 0 on success, -1 otherwise.
  */
 
+#ifdef __FreeBSD__
 static int
 netmap_mmap(__unused struct cdev *dev,
 #if __FreeBSD_version < 900000
@@ -245,75 +597,222 @@ netmap_mmap(__unused struct cdev *dev,
 #endif
        )
 {
+       int error = 0;
+       struct netmap_priv_d *priv;
+
        if (nprot & PROT_EXEC)
                return (-1);    // XXX -1 or EINVAL ?
 
+       error = devfs_get_cdevpriv((void **)&priv);
+       if (error == EBADF) {   /* called on fault, memory is initialized */
+               ND(5, "handling fault at ofs 0x%x", offset);
+               error = 0;
+       } else if (error == 0)  /* make sure memory is set */
+               error = netmap_get_memory(priv);
+       if (error)
+               return (error);
+
        ND("request for offset 0x%x", (uint32_t)offset);
        *paddr = netmap_ofstophys(offset);
 
-       return (0);
+       return (*paddr ? 0 : ENOMEM);
 }
 
+static int
+netmap_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
+{
+       if (netmap_verbose)
+               D("dev %p fflag 0x%x devtype %d td %p",
+                       dev, fflag, devtype, td);
+       return 0;
+}
+
+static int
+netmap_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
+{
+       struct netmap_priv_d *priv;
+       int error;
+
+       priv = malloc(sizeof(struct netmap_priv_d), M_DEVBUF,
+                             M_NOWAIT | M_ZERO);
+       if (priv == NULL)
+               return ENOMEM;
+
+       error = devfs_set_cdevpriv(priv, netmap_dtor);
+       if (error)
+               return error;
+
+       return 0;
+}
+#endif /* __FreeBSD__ */
+
 
 /*
  * Handlers for synchronization of the queues from/to the host.
- *
- * netmap_sync_to_host() passes packets up. We are called from a
- * system call in user process context, and the only contention
- * can be among multiple user threads erroneously calling
- * this routine concurrently. In principle we should not even
- * need to lock.
+ * Netmap has two operating modes:
+ * - in the default mode, the rings connected to the host stack are
+ *   just another ring pair managed by userspace;
+ * - in transparent mode (XXX to be defined) incoming packets
+ *   (from the host or the NIC) are marked as NS_FORWARD upon
+ *   arrival, and the user application has a chance to reset the
+ *   flag for packets that should be dropped.
+ *   On the RXSYNC or poll(), packets in RX rings between
+ *   kring->nr_kcur and ring->cur with NS_FORWARD still set are moved
+ *   to the other side.
+ * The transfer NIC --> host is relatively easy, just encapsulate
+ * into mbufs and we are done. The host --> NIC side is slightly
+ * harder because there might not be room in the tx ring so it
+ * might take a while before releasing the buffer.
+ */
+
+/*
+ * pass a chain of buffers to the host stack as coming from 'dst'
  */
 static void
-netmap_sync_to_host(struct netmap_adapter *na)
+netmap_send_up(struct ifnet *dst, struct mbuf *head)
 {
-       struct netmap_kring *kring = &na->tx_rings[na->num_tx_rings];
-       struct netmap_ring *ring = kring->ring;
-       struct mbuf *head = NULL, *tail = NULL, *m;
-       u_int k, n, lim = kring->nkr_num_slots - 1;
+       struct mbuf *m;
 
-       k = ring->cur;
-       if (k > lim) {
-               netmap_ring_reinit(kring);
-               return;
+       /* send packets up, outside the lock */
+       while ((m = head) != NULL) {
+               head = head->m_nextpkt;
+               m->m_nextpkt = NULL;
+               if (netmap_verbose & NM_VERB_HOST)
+                       D("sending up pkt %p size %d", m, MBUF_LEN(m));
+               NM_SEND_UP(dst, m);
        }
-       // na->nm_lock(na->ifp, NETMAP_CORE_LOCK, 0);
+}
 
-       /* Take packets from hwcur to cur and pass them up.
+struct mbq {
+       struct mbuf *head;
+       struct mbuf *tail;
+       int count;
+};
+
+/*
+ * put a copy of the buffers marked NS_FORWARD into an mbuf chain.
+ * Run from hwcur to cur - reserved
+ */
+static void
+netmap_grab_packets(struct netmap_kring *kring, struct mbq *q, int force)
+{
+       /* Take packets from hwcur to cur-reserved and pass them up.
         * In case of no buffers we give up. At the end of the loop,
         * the queue is drained in all cases.
+        * XXX handle reserved
         */
+       int k = kring->ring->cur - kring->ring->reserved;
+       u_int n, lim = kring->nkr_num_slots - 1;
+       struct mbuf *m, *tail = q->tail;
+
+       if (k < 0)
+               k = k + kring->nkr_num_slots;
        for (n = kring->nr_hwcur; n != k;) {
-               struct netmap_slot *slot = &ring->slot[n];
+               struct netmap_slot *slot = &kring->ring->slot[n];
 
                n = (n == lim) ? 0 : n + 1;
+               if ((slot->flags & NS_FORWARD) == 0 && !force)
+                       continue;
                if (slot->len < 14 || slot->len > NETMAP_BUF_SIZE) {
                        D("bad pkt at %d len %d", n, slot->len);
                        continue;
                }
-               m = m_devget(NMB(slot), slot->len, 0, na->ifp, NULL);
+               slot->flags &= ~NS_FORWARD; // XXX needed ?
+               m = m_devget(NMB(slot), slot->len, 0, kring->na->ifp, NULL);
 
                if (m == NULL)
                        break;
                if (tail)
                        tail->m_nextpkt = m;
                else
-                       head = m;
+                       q->head = m;
                tail = m;
+               q->count++;
                m->m_nextpkt = NULL;
        }
+       q->tail = tail;
+}
+
+/*
+ * called under main lock to send packets from the host to the NIC
+ * The host ring has packets from nr_hwcur to (cur - reserved)
+ * to be sent down. We scan the tx rings, which have just been
+ * flushed so nr_hwcur == cur. Pushing packets down means
+ * increment cur and decrement avail.
+ * XXX to be verified
+ */
+static void
+netmap_sw_to_nic(struct netmap_adapter *na)
+{
+       struct netmap_kring *kring = &na->rx_rings[na->num_rx_rings];
+       struct netmap_kring *k1 = &na->tx_rings[0];
+       int i, howmany, src_lim, dst_lim;
+
+       howmany = kring->nr_hwavail;    /* XXX otherwise cur - reserved - 
nr_hwcur */
+
+       src_lim = kring->nkr_num_slots;
+       for (i = 0; howmany > 0 && i < na->num_tx_rings; i++, k1++) {
+               ND("%d packets left to ring %d (space %d)", howmany, i, 
k1->nr_hwavail);
+               dst_lim = k1->nkr_num_slots;
+               while (howmany > 0 && k1->ring->avail > 0) {
+                       struct netmap_slot *src, *dst, tmp;
+                       src = &kring->ring->slot[kring->nr_hwcur];
+                       dst = &k1->ring->slot[k1->ring->cur];
+                       tmp = *src;
+                       src->buf_idx = dst->buf_idx;
+                       src->flags = NS_BUF_CHANGED;
+
+                       dst->buf_idx = tmp.buf_idx;
+                       dst->len = tmp.len;
+                       dst->flags = NS_BUF_CHANGED;
+                       ND("out len %d buf %d from %d to %d",
+                               dst->len, dst->buf_idx,
+                               kring->nr_hwcur, k1->ring->cur);
+
+                       if (++kring->nr_hwcur >= src_lim)
+                               kring->nr_hwcur = 0;
+                       howmany--;
+                       kring->nr_hwavail--;
+                       if (++k1->ring->cur >= dst_lim)
+                               k1->ring->cur = 0;
+                       k1->ring->avail--;
+               }
+               kring->ring->cur = kring->nr_hwcur; // XXX
+               k1++;
+       }
+}
+
+/*
+ * netmap_sync_to_host() passes packets up. We are called from a
+ * system call in user process context, and the only contention
+ * can be among multiple user threads erroneously calling
+ * this routine concurrently.
+ */
+static void
+netmap_sync_to_host(struct netmap_adapter *na)
+{
+       struct netmap_kring *kring = &na->tx_rings[na->num_tx_rings];
+       struct netmap_ring *ring = kring->ring;
+       u_int k, lim = kring->nkr_num_slots - 1;
+       struct mbq q = { NULL, NULL };
+
+       k = ring->cur;
+       if (k > lim) {
+               netmap_ring_reinit(kring);
+               return;
+       }
+       // na->nm_lock(na->ifp, NETMAP_CORE_LOCK, 0);
+
+       /* Take packets from hwcur to cur and pass them up.
+        * In case of no buffers we give up. At the end of the loop,
+        * the queue is drained in all cases.
+        */
+       netmap_grab_packets(kring, &q, 1);
        kring->nr_hwcur = k;
        kring->nr_hwavail = ring->avail = lim;
        // na->nm_lock(na->ifp, NETMAP_CORE_UNLOCK, 0);
 
-       /* send packets up, outside the lock */
-       while ((m = head) != NULL) {
-               head = head->m_nextpkt;
-               m->m_nextpkt = NULL;
-               if (netmap_verbose & NM_VERB_HOST)
-                       D("sending up pkt %p size %d", m, MBUF_LEN(m));
-               NM_SEND_UP(na->ifp, m);
-       }
+       netmap_send_up(na->ifp, q.head);
 }
 
 /*
@@ -323,15 +822,19 @@ netmap_sync_to_host(struct netmap_adapte
  *
  * This routine also does the selrecord if called from the poll handler
  * (we know because td != NULL).
+ *
+ * NOTE: on linux, selrecord() is defined as a macro and uses pwait
+ *     as an additional hidden argument.
  */
 static void
-netmap_sync_from_host(struct netmap_adapter *na, struct thread *td)
+netmap_sync_from_host(struct netmap_adapter *na, struct thread *td, void 
*pwait)
 {
        struct netmap_kring *kring = &na->rx_rings[na->num_rx_rings];
        struct netmap_ring *ring = kring->ring;
        u_int j, n, lim = kring->nkr_num_slots;
        u_int k = ring->cur, resvd = ring->reserved;
 
+       (void)pwait;    /* disable unused warnings */
        na->nm_lock(na->ifp, NETMAP_CORE_LOCK, 0);
        if (k >= lim) {
                netmap_ring_reinit(kring);
@@ -370,15 +873,73 @@ netmap_sync_from_host(struct netmap_adap
 static int
 get_ifp(const char *name, struct ifnet **ifp)
 {
+#ifdef NM_BRIDGE
+       struct ifnet *iter = NULL;
+
+       do {
+               struct nm_bridge *b;
+               int i, l, cand = -1;
+
+               if (strncmp(name, NM_NAME, sizeof(NM_NAME) - 1))
+                       break;
+               b = nm_find_bridge(name);
+               if (b == NULL) {
+                       D("no bridges available for '%s'", name);
+                       return (ENXIO);
+               }
+               /* XXX locking */
+               BDG_LOCK(b);
+               /* lookup in the local list of ports */
+               for (i = 0; i < NM_BDG_MAXPORTS; i++) {
+                       iter = b->bdg_ports[i];
+                       if (iter == NULL) {
+                               if (cand == -1)
+                                       cand = i; /* potential insert point */
+                               continue;
+                       }
+                       if (!strcmp(iter->if_xname, name)) {
+                               ADD_BDG_REF(iter);
+                               ND("found existing interface");
+                               BDG_UNLOCK(b);
+                               break;
+                       }
+               }
+               if (i < NM_BDG_MAXPORTS) /* already unlocked */
+                       break;
+               if (cand == -1) {
+                       D("bridge full, cannot create new port");
+no_port:
+                       BDG_UNLOCK(b);
+                       *ifp = NULL;
+                       return EINVAL;
+               }
+               ND("create new bridge port %s", name);
+               /* space for forwarding list after the ifnet */
+               l = sizeof(*iter) +
+                        sizeof(struct nm_bdg_fwd)*NM_BDG_BATCH ;
+               iter = malloc(l, M_DEVBUF, M_NOWAIT | M_ZERO);
+               if (!iter)
+                       goto no_port;
+               strcpy(iter->if_xname, name);
+               bdg_netmap_attach(iter);
+               b->bdg_ports[cand] = iter;
+               iter->if_bridge = b;
+               ADD_BDG_REF(iter);
+               BDG_UNLOCK(b);
+               ND("attaching virtual bridge %p", b);
+       } while (0);
+       *ifp = iter;
+       if (! *ifp)
+#endif /* NM_BRIDGE */
        *ifp = ifunit_ref(name);
        if (*ifp == NULL)
                return (ENXIO);
        /* can do this if the capability exists and if_pspare[0]
         * points to the netmap descriptor.
         */
-       if ((*ifp)->if_capabilities & IFCAP_NETMAP && NA(*ifp))
+       if (NETMAP_CAPABLE(*ifp))
                return 0;       /* valid pointer, we hold the refcount */
-       if_rele(*ifp);
+       nm_if_rele(*ifp);
        return EINVAL;  // not NETMAP capable
 }
 
@@ -402,7 +963,7 @@ netmap_ring_reinit(struct netmap_kring *
        u_int i, lim = kring->nkr_num_slots - 1;
        int errors = 0;
 
-       D("called for %s", kring->na->ifp->if_xname);
+       RD(10, "called for %s", kring->na->ifp->if_xname);
        if (ring->cur > lim)
                errors++;
        for (i = 0; i <= lim; i++) {
@@ -424,9 +985,9 @@ netmap_ring_reinit(struct netmap_kring *
                int pos = kring - kring->na->tx_rings;
                int n = kring->na->num_tx_rings + 1;
 
-               D("total %d errors", errors);
+               RD(10, "total %d errors", errors);
                errors++;
-               D("%s %s[%d] reinit, cur %d -> %d avail %d -> %d",
+               RD(10, "%s %s[%d] reinit, cur %d -> %d avail %d -> %d",
                        kring->na->ifp->if_xname,
                        pos < n ?  "TX" : "RX", pos < n ? pos : pos - n,
                        ring->cur, kring->nr_hwcur,
@@ -474,6 +1035,7 @@ netmap_set_ringid(struct netmap_priv_d *
        priv->np_txpoll = (ringid & NETMAP_NO_TX_POLL) ? 0 : 1;
        if (need_lock)
                na->nm_lock(ifp, NETMAP_CORE_UNLOCK, 0);
+    if (netmap_verbose) {
        if (ringid & NETMAP_SW_RING)
                D("ringid %s set to SW RING", ifp->if_xname);
        else if (ringid & NETMAP_HW_RING)
@@ -481,6 +1043,7 @@ netmap_set_ringid(struct netmap_priv_d *
                        priv->np_qfirst);
        else
                D("ringid %s set to all %d HW RINGS", ifp->if_xname, lim);
+    }
        return 0;
 }
 
@@ -498,8 +1061,8 @@ netmap_set_ringid(struct netmap_priv_d *
  * Return 0 on success, errno otherwise.
  */
 static int
-netmap_ioctl(__unused struct cdev *dev, u_long cmd, caddr_t data,

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
_______________________________________________
svn-src-stable-9@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-stable-9
To unsubscribe, send any mail to "svn-src-stable-9-unsubscr...@freebsd.org"

Reply via email to