Author: vmaffione
Date: Tue Oct 30 08:36:36 2018
New Revision: 339906
URL: https://svnweb.freebsd.org/changeset/base/339906

Log:
  MFC r339639:
  
  netmap: align codebase to the current upstream (sha 8374e1a7e6941)
  
  Changelist:
      - Move large parts of VALE code to a new file and header netmap_bdg.[ch].
        This is useful to reuse the code within upcoming projects.
      - Improvements and bug fixes to pipes and monitors.
      - Introduce nm_os_onattach(), nm_os_onenter() and nm_os_onexit() to
        handle differences between FreeBSD and Linux.
      - Introduce some new helper functions to handle more host rings and fake
        rings (netmap_all_rings(), netmap_real_rings(), ...)
      - Added new sysctl to enable/disable hw checksum in emulated netmap mode.
      - nm_inject: add support for NS_MOREFRAG
  
  Approved by: re (gjb)

Added:
  stable/12/sys/dev/netmap/netmap_bdg.c
     - copied unchanged from r339639, head/sys/dev/netmap/netmap_bdg.c
  stable/12/sys/dev/netmap/netmap_bdg.h
     - copied unchanged from r339639, head/sys/dev/netmap/netmap_bdg.h
Modified:
  stable/12/sys/conf/files
  stable/12/sys/dev/netmap/netmap.c
  stable/12/sys/dev/netmap/netmap_freebsd.c
  stable/12/sys/dev/netmap/netmap_generic.c
  stable/12/sys/dev/netmap/netmap_kern.h
  stable/12/sys/dev/netmap/netmap_mem2.c
  stable/12/sys/dev/netmap/netmap_monitor.c
  stable/12/sys/dev/netmap/netmap_pipe.c
  stable/12/sys/dev/netmap/netmap_vale.c
  stable/12/sys/net/netmap.h
  stable/12/sys/net/netmap_user.h
Directory Properties:
  stable/12/   (props changed)

Modified: stable/12/sys/conf/files
==============================================================================
--- stable/12/sys/conf/files    Tue Oct 30 05:04:18 2018        (r339905)
+++ stable/12/sys/conf/files    Tue Oct 30 08:36:36 2018        (r339906)
@@ -2544,6 +2544,7 @@ dev/netmap/netmap_pipe.c  optional netmap
 dev/netmap/netmap_pt.c         optional netmap
 dev/netmap/netmap_vale.c       optional netmap
 dev/netmap/netmap_legacy.c     optional netmap
+dev/netmap/netmap_bdg.c                optional netmap
 # compile-with "${NORMAL_C} -Wconversion -Wextra"
 dev/nfsmb/nfsmb.c              optional nfsmb pci
 dev/nge/if_nge.c               optional nge

Modified: stable/12/sys/dev/netmap/netmap.c
==============================================================================
--- stable/12/sys/dev/netmap/netmap.c   Tue Oct 30 05:04:18 2018        
(r339905)
+++ stable/12/sys/dev/netmap/netmap.c   Tue Oct 30 08:36:36 2018        
(r339906)
@@ -521,6 +521,9 @@ int netmap_generic_txqdisc = 1;
 int netmap_generic_ringsize = 1024;
 int netmap_generic_rings = 1;
 
+/* Non-zero to enable checksum offloading in NIC drivers */
+int netmap_generic_hwcsum = 0;
+
 /* Non-zero if ptnet devices are allowed to use virtio-net headers. */
 int ptnet_vnet_hdr = 1;
 
@@ -549,6 +552,9 @@ SYSCTL_INT(_dev_netmap, OID_AUTO, fwd, CTLFLAG_RW, &ne
 SYSCTL_INT(_dev_netmap, OID_AUTO, admode, CTLFLAG_RW, &netmap_admode, 0,
                "Adapter mode. 0 selects the best option available,"
                "1 forces native adapter, 2 forces emulated adapter");
+SYSCTL_INT(_dev_netmap, OID_AUTO, generic_hwcsum, CTLFLAG_RW, 
&netmap_generic_hwcsum,
+               0, "Hardware checksums. 0 to disable checksum generation by the 
NIC (default),"
+               "1 to enable checksum generation by the NIC");
 SYSCTL_INT(_dev_netmap, OID_AUTO, generic_mit, CTLFLAG_RW, &netmap_generic_mit,
                0, "RX notification interval in nanoseconds");
 SYSCTL_INT(_dev_netmap, OID_AUTO, generic_ringsize, CTLFLAG_RW,
@@ -827,8 +833,8 @@ netmap_krings_create(struct netmap_adapter *na, u_int 
        }
 
        /* account for the (possibly fake) host rings */
-       n[NR_TX] = na->num_tx_rings + 1;
-       n[NR_RX] = na->num_rx_rings + 1;
+       n[NR_TX] = netmap_all_rings(na, NR_TX);
+       n[NR_RX] = netmap_all_rings(na, NR_RX);
 
        len = (n[NR_TX] + n[NR_RX]) *
                (sizeof(struct netmap_kring) + sizeof(struct netmap_kring *))
@@ -930,11 +936,14 @@ netmap_krings_delete(struct netmap_adapter *na)
 void
 netmap_hw_krings_delete(struct netmap_adapter *na)
 {
-       struct mbq *q = &na->rx_rings[na->num_rx_rings]->rx_queue;
+       u_int lim = netmap_real_rings(na, NR_RX), i;
 
-       ND("destroy sw mbq with len %d", mbq_len(q));
-       mbq_purge(q);
-       mbq_safe_fini(q);
+       for (i = nma_get_nrings(na, NR_RX); i < lim; i++) {
+               struct mbq *q = &NMR(na, NR_RX)[i]->rx_queue;
+               ND("destroy sw mbq with len %d", mbq_len(q));
+               mbq_purge(q);
+               mbq_safe_fini(q);
+       }
        netmap_krings_delete(na);
 }
 
@@ -1535,7 +1544,7 @@ netmap_get_na(struct nmreq_header *hdr,
                goto out;
 
        /* try to see if this is a bridge port */
-       error = netmap_get_bdg_na(hdr, na, nmd, create);
+       error = netmap_get_vale_na(hdr, na, nmd, create);
        if (error)
                goto out;
 
@@ -1827,7 +1836,7 @@ netmap_interp_ringid(struct netmap_priv_d *priv, uint3
                        }
                        priv->np_qfirst[t] = (nr_mode == NR_REG_SW ?
                                nma_get_nrings(na, t) : 0);
-                       priv->np_qlast[t] = nma_get_nrings(na, t) + 1;
+                       priv->np_qlast[t] = netmap_all_rings(na, t);
                        ND("%s: %s %d %d", nr_mode == NR_REG_SW ? "SW" : 
"NIC+SW",
                                nm_txrx2str(t),
                                priv->np_qfirst[t], priv->np_qlast[t]);
@@ -2543,7 +2552,7 @@ netmap_ioctl(struct netmap_priv_d *priv, u_long cmd, c
                        NMG_LOCK();
                        hdr->nr_reqtype = NETMAP_REQ_REGISTER;
                        hdr->nr_body = (uintptr_t)&regreq;
-                       error = netmap_get_bdg_na(hdr, &na, NULL, 0);
+                       error = netmap_get_vale_na(hdr, &na, NULL, 0);
                        hdr->nr_reqtype = NETMAP_REQ_PORT_HDR_SET;
                        hdr->nr_body = (uintptr_t)req;
                        if (na && !error) {
@@ -3336,6 +3345,12 @@ netmap_attach_common(struct netmap_adapter *na)
        }
        na->pdev = na; /* make sure netmap_mem_map() is called */
 #endif /* __FreeBSD__ */
+       if (na->na_flags & NAF_HOST_RINGS) {
+               if (na->num_host_rx_rings == 0)
+                       na->num_host_rx_rings = 1;
+               if (na->num_host_tx_rings == 0)
+                       na->num_host_tx_rings = 1;
+       }
        if (na->nm_krings_create == NULL) {
                /* we assume that we have been called by a driver,
                 * since other port types all provide their own
@@ -3357,7 +3372,7 @@ netmap_attach_common(struct netmap_adapter *na)
                /* no special nm_bdg_attach callback. On VALE
                 * attach, we need to interpose a bwrap
                 */
-               na->nm_bdg_attach = netmap_bwrap_attach;
+               na->nm_bdg_attach = netmap_default_bdg_attach;
 #endif
 
        return 0;
@@ -3399,10 +3414,10 @@ out:
 static void
 netmap_hw_dtor(struct netmap_adapter *na)
 {
-       if (nm_iszombie(na) || na->ifp == NULL)
+       if (na->ifp == NULL)
                return;
 
-       WNA(na->ifp) = NULL;
+       NM_DETACH_NA(na->ifp);
 }
 
 
@@ -3426,10 +3441,10 @@ netmap_attach_ext(struct netmap_adapter *arg, size_t s
        }
 
        if (arg == NULL || arg->ifp == NULL)
-               goto fail;
+               return EINVAL;
 
        ifp = arg->ifp;
-       if (NA(ifp) && !NM_NA_VALID(ifp)) {
+       if (NM_NA_CLASH(ifp)) {
                /* If NA(ifp) is not null but there is no valid netmap
                 * adapter it means that someone else is using the same
                 * pointer (e.g. ax25_ptr on linux). This happens for
@@ -3456,28 +3471,8 @@ netmap_attach_ext(struct netmap_adapter *arg, size_t s
 
        NM_ATTACH_NA(ifp, &hwna->up);
 
-#ifdef linux
-       if (ifp->netdev_ops) {
-               /* prepare a clone of the netdev ops */
-#ifndef NETMAP_LINUX_HAVE_NETDEV_OPS
-               hwna->nm_ndo.ndo_start_xmit = ifp->netdev_ops;
-#else
-               hwna->nm_ndo = *ifp->netdev_ops;
-#endif /* NETMAP_LINUX_HAVE_NETDEV_OPS */
-       }
-       hwna->nm_ndo.ndo_start_xmit = linux_netmap_start_xmit;
-       hwna->nm_ndo.ndo_change_mtu = linux_netmap_change_mtu;
-       if (ifp->ethtool_ops) {
-               hwna->nm_eto = *ifp->ethtool_ops;
-       }
-       hwna->nm_eto.set_ringparam = linux_netmap_set_ringparam;
-#ifdef NETMAP_LINUX_HAVE_SET_CHANNELS
-       hwna->nm_eto.set_channels = linux_netmap_set_channels;
-#endif /* NETMAP_LINUX_HAVE_SET_CHANNELS */
-       if (arg->nm_config == NULL) {
-               hwna->up.nm_config = netmap_linux_config;
-       }
-#endif /* linux */
+       nm_os_onattach(ifp);
+
        if (arg->nm_dtor == NULL) {
                hwna->up.nm_dtor = netmap_hw_dtor;
        }
@@ -3545,7 +3540,10 @@ netmap_hw_krings_create(struct netmap_adapter *na)
        int ret = netmap_krings_create(na, 0);
        if (ret == 0) {
                /* initialize the mbq for the sw rx ring */
-               mbq_safe_init(&na->rx_rings[na->num_rx_rings]->rx_queue);
+               u_int lim = netmap_real_rings(na, NR_RX), i;
+               for (i = na->num_rx_rings; i < lim; i++) {
+                       mbq_safe_init(&NMR(na, NR_RX)[i]->rx_queue);
+               }
                ND("initialized sw rx queue %d", na->num_rx_rings);
        }
        return ret;
@@ -3608,8 +3606,14 @@ netmap_transmit(struct ifnet *ifp, struct mbuf *m)
        unsigned int txr;
        struct mbq *q;
        int busy;
+       u_int i;
 
-       kring = na->rx_rings[na->num_rx_rings];
+       i = MBUF_TXQ(m);
+       if (i >= na->num_host_rx_rings) {
+               i = i % na->num_host_rx_rings;
+       }
+       kring = NMR(na, NR_RX)[nma_get_nrings(na, NR_RX) + i];
+
        // XXX [Linux] we do not need this lock
        // if we follow the down/configure/up protocol -gl
        // mtx_lock(&na->core_lock);
@@ -3639,8 +3643,15 @@ netmap_transmit(struct ifnet *ifp, struct mbuf *m)
                goto done;
        }
 
-       if (nm_os_mbuf_has_offld(m)) {
-               RD(1, "%s drop mbuf that needs offloadings", na->name);
+       if (!netmap_generic_hwcsum) {
+               if (nm_os_mbuf_has_csum_offld(m)) {
+                       RD(1, "%s drop mbuf that needs checksum offload", 
na->name);
+                       goto done;
+               }
+       }
+
+       if (nm_os_mbuf_has_seg_offld(m)) {
+               RD(1, "%s drop mbuf that needs generic segmentation offload", 
na->name);
                goto done;
        }
 
@@ -3843,6 +3854,40 @@ netmap_rx_irq(struct ifnet *ifp, u_int q, u_int *work_
        }
 
        return netmap_common_irq(na, q, work_done);
+}
+
+/* set/clear native flags and if_transmit/netdev_ops */
+void
+nm_set_native_flags(struct netmap_adapter *na)
+{
+       struct ifnet *ifp = na->ifp;
+
+       /* We do the setup for intercepting packets only if we are the
+        * first user of this adapapter. */
+       if (na->active_fds > 0) {
+               return;
+       }
+
+       na->na_flags |= NAF_NETMAP_ON;
+       nm_os_onenter(ifp);
+       nm_update_hostrings_mode(na);
+}
+
+void
+nm_clear_native_flags(struct netmap_adapter *na)
+{
+       struct ifnet *ifp = na->ifp;
+
+       /* We undo the setup for intercepting packets only if we are the
+        * last user of this adapapter. */
+       if (na->active_fds > 0) {
+               return;
+       }
+
+       nm_update_hostrings_mode(na);
+       nm_os_onexit(ifp);
+
+       na->na_flags &= ~NAF_NETMAP_ON;
 }
 
 

Copied: stable/12/sys/dev/netmap/netmap_bdg.c (from r339639, 
head/sys/dev/netmap/netmap_bdg.c)
==============================================================================
--- /dev/null   00:00:00 1970   (empty, because file is newly added)
+++ stable/12/sys/dev/netmap/netmap_bdg.c       Tue Oct 30 08:36:36 2018        
(r339906, copy of r339639, head/sys/dev/netmap/netmap_bdg.c)
@@ -0,0 +1,1827 @@
+/*
+ * Copyright (C) 2013-2016 Universita` di Pisa
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *   1. Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *   2. Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+
+/*
+ * This module implements the VALE switch for netmap
+
+--- VALE SWITCH ---
+
+NMG_LOCK() serializes all modifications to switches and ports.
+A switch cannot be deleted until all ports are gone.
+
+For each switch, an SX lock (RWlock on linux) protects
+deletion of ports. When configuring or deleting a new port, the
+lock is acquired in exclusive mode (after holding NMG_LOCK).
+When forwarding, the lock is acquired in shared mode (without NMG_LOCK).
+The lock is held throughout the entire forwarding cycle,
+during which the thread may incur in a page fault.
+Hence it is important that sleepable shared locks are used.
+
+On the rx ring, the per-port lock is grabbed initially to reserve
+a number of slot in the ring, then the lock is released,
+packets are copied from source to destination, and then
+the lock is acquired again and the receive ring is updated.
+(A similar thing is done on the tx ring for NIC and host stack
+ports attached to the switch)
+
+ */
+
+/*
+ * OS-specific code that is used only within this file.
+ * Other OS-specific code that must be accessed by drivers
+ * is present in netmap_kern.h
+ */
+
+#if defined(__FreeBSD__)
+#include <sys/cdefs.h> /* prerequisite */
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <sys/errno.h>
+#include <sys/param.h> /* defines used in kernel.h */
+#include <sys/kernel.h>        /* types used in module initialization */
+#include <sys/conf.h>  /* cdevsw struct, UID, GID */
+#include <sys/sockio.h>
+#include <sys/socketvar.h>     /* struct socket */
+#include <sys/malloc.h>
+#include <sys/poll.h>
+#include <sys/rwlock.h>
+#include <sys/socket.h> /* sockaddrs */
+#include <sys/selinfo.h>
+#include <sys/sysctl.h>
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/bpf.h>           /* BIOCIMMEDIATE */
+#include <machine/bus.h>       /* bus_dmamap_* */
+#include <sys/endian.h>
+#include <sys/refcount.h>
+#include <sys/smp.h>
+
+
+#elif defined(linux)
+
+#include "bsd_glue.h"
+
+#elif defined(__APPLE__)
+
+#warning OSX support is only partial
+#include "osx_glue.h"
+
+#elif defined(_WIN32)
+#include "win_glue.h"
+
+#else
+
+#error Unsupported platform
+
+#endif /* unsupported */
+
+/*
+ * common headers
+ */
+
+#include <net/netmap.h>
+#include <dev/netmap/netmap_kern.h>
+#include <dev/netmap/netmap_mem2.h>
+
+#include <dev/netmap/netmap_bdg.h>
+
+const char*
+netmap_bdg_name(struct netmap_vp_adapter *vp)
+{
+       struct nm_bridge *b = vp->na_bdg;
+       if (b == NULL)
+               return NULL;
+       return b->bdg_basename;
+}
+
+
+#ifndef CONFIG_NET_NS
+/*
+ * XXX in principle nm_bridges could be created dynamically
+ * Right now we have a static array and deletions are protected
+ * by an exclusive lock.
+ */
+static struct nm_bridge *nm_bridges;
+#endif /* !CONFIG_NET_NS */
+
+
+static int
+nm_is_id_char(const char c)
+{
+       return (c >= 'a' && c <= 'z') ||
+              (c >= 'A' && c <= 'Z') ||
+              (c >= '0' && c <= '9') ||
+              (c == '_');
+}
+
+/* Validate the name of a VALE bridge port and return the
+ * position of the ":" character. */
+static int
+nm_vale_name_validate(const char *name)
+{
+       int colon_pos = -1;
+       int i;
+
+       if (!name || strlen(name) < strlen(NM_BDG_NAME)) {
+               return -1;
+       }
+
+       for (i = 0; i < NM_BDG_IFNAMSIZ && name[i]; i++) {
+               if (name[i] == ':') {
+                       colon_pos = i;
+                       break;
+               } else if (!nm_is_id_char(name[i])) {
+                       return -1;
+               }
+       }
+
+       if (strlen(name) - colon_pos > IFNAMSIZ) {
+               /* interface name too long */
+               return -1;
+       }
+
+       return colon_pos;
+}
+
+/*
+ * locate a bridge among the existing ones.
+ * MUST BE CALLED WITH NMG_LOCK()
+ *
+ * a ':' in the name terminates the bridge name. Otherwise, just NM_NAME.
+ * We assume that this is called with a name of at least NM_NAME chars.
+ */
+struct nm_bridge *
+nm_find_bridge(const char *name, int create, struct netmap_bdg_ops *ops)
+{
+       int i, namelen;
+       struct nm_bridge *b = NULL, *bridges;
+       u_int num_bridges;
+
+       NMG_LOCK_ASSERT();
+
+       netmap_bns_getbridges(&bridges, &num_bridges);
+
+       namelen = nm_vale_name_validate(name);
+       if (namelen < 0) {
+               D("invalid bridge name %s", name ? name : NULL);
+               return NULL;
+       }
+
+       /* lookup the name, remember empty slot if there is one */
+       for (i = 0; i < num_bridges; i++) {
+               struct nm_bridge *x = bridges + i;
+
+               if ((x->bdg_flags & NM_BDG_ACTIVE) + x->bdg_active_ports == 0) {
+                       if (create && b == NULL)
+                               b = x;  /* record empty slot */
+               } else if (x->bdg_namelen != namelen) {
+                       continue;
+               } else if (strncmp(name, x->bdg_basename, namelen) == 0) {
+                       ND("found '%.*s' at %d", namelen, name, i);
+                       b = x;
+                       break;
+               }
+       }
+       if (i == num_bridges && b) { /* name not found, can create entry */
+               /* initialize the bridge */
+               ND("create new bridge %s with ports %d", b->bdg_basename,
+                       b->bdg_active_ports);
+               b->ht = nm_os_malloc(sizeof(struct nm_hash_ent) * NM_BDG_HASH);
+               if (b->ht == NULL) {
+                       D("failed to allocate hash table");
+                       return NULL;
+               }
+               strncpy(b->bdg_basename, name, namelen);
+               b->bdg_namelen = namelen;
+               b->bdg_active_ports = 0;
+               for (i = 0; i < NM_BDG_MAXPORTS; i++)
+                       b->bdg_port_index[i] = i;
+               /* set the default function */
+               b->bdg_ops = ops;
+               b->private_data = b->ht;
+               b->bdg_flags = 0;
+               NM_BNS_GET(b);
+       }
+       return b;
+}
+
+
+int
+netmap_bdg_free(struct nm_bridge *b)
+{
+       if ((b->bdg_flags & NM_BDG_ACTIVE) + b->bdg_active_ports != 0) {
+               return EBUSY;
+       }
+
+       ND("marking bridge %s as free", b->bdg_basename);
+       nm_os_free(b->ht);
+       b->bdg_ops = NULL;
+       b->bdg_flags = 0;
+       NM_BNS_PUT(b);
+       return 0;
+}
+
+
+/* remove from bridge b the ports in slots hw and sw
+ * (sw can be -1 if not needed)
+ */
+void
+netmap_bdg_detach_common(struct nm_bridge *b, int hw, int sw)
+{
+       int s_hw = hw, s_sw = sw;
+       int i, lim =b->bdg_active_ports;
+       uint32_t *tmp = b->tmp_bdg_port_index;
+
+       /*
+       New algorithm:
+       make a copy of bdg_port_index;
+       lookup NA(ifp)->bdg_port and SWNA(ifp)->bdg_port
+       in the array of bdg_port_index, replacing them with
+       entries from the bottom of the array;
+       decrement bdg_active_ports;
+       acquire BDG_WLOCK() and copy back the array.
+        */
+
+       if (netmap_verbose)
+               D("detach %d and %d (lim %d)", hw, sw, lim);
+       /* make a copy of the list of active ports, update it,
+        * and then copy back within BDG_WLOCK().
+        */
+       memcpy(b->tmp_bdg_port_index, b->bdg_port_index, 
sizeof(b->tmp_bdg_port_index));
+       for (i = 0; (hw >= 0 || sw >= 0) && i < lim; ) {
+               if (hw >= 0 && tmp[i] == hw) {
+                       ND("detach hw %d at %d", hw, i);
+                       lim--; /* point to last active port */
+                       tmp[i] = tmp[lim]; /* swap with i */
+                       tmp[lim] = hw;  /* now this is inactive */
+                       hw = -1;
+               } else if (sw >= 0 && tmp[i] == sw) {
+                       ND("detach sw %d at %d", sw, i);
+                       lim--;
+                       tmp[i] = tmp[lim];
+                       tmp[lim] = sw;
+                       sw = -1;
+               } else {
+                       i++;
+               }
+       }
+       if (hw >= 0 || sw >= 0) {
+               D("XXX delete failed hw %d sw %d, should panic...", hw, sw);
+       }
+
+       BDG_WLOCK(b);
+       if (b->bdg_ops->dtor)
+               b->bdg_ops->dtor(b->bdg_ports[s_hw]);
+       b->bdg_ports[s_hw] = NULL;
+       if (s_sw >= 0) {
+               b->bdg_ports[s_sw] = NULL;
+       }
+       memcpy(b->bdg_port_index, b->tmp_bdg_port_index, 
sizeof(b->tmp_bdg_port_index));
+       b->bdg_active_ports = lim;
+       BDG_WUNLOCK(b);
+
+       ND("now %d active ports", lim);
+       netmap_bdg_free(b);
+}
+
+
+/* nm_bdg_ctl callback for VALE ports */
+int
+netmap_vp_bdg_ctl(struct nmreq_header *hdr, struct netmap_adapter *na)
+{
+       struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na;
+       struct nm_bridge *b = vpna->na_bdg;
+
+       if (hdr->nr_reqtype == NETMAP_REQ_VALE_ATTACH) {
+               return 0; /* nothing to do */
+       }
+       if (b) {
+               netmap_set_all_rings(na, 0 /* disable */);
+               netmap_bdg_detach_common(b, vpna->bdg_port, -1);
+               vpna->na_bdg = NULL;
+               netmap_set_all_rings(na, 1 /* enable */);
+       }
+       /* I have took reference just for attach */
+       netmap_adapter_put(na);
+       return 0;
+}
+
+int
+netmap_default_bdg_attach(const char *name, struct netmap_adapter *na,
+               struct nm_bridge *b)
+{
+       return NM_NEED_BWRAP;
+}
+
+/* Try to get a reference to a netmap adapter attached to a VALE switch.
+ * If the adapter is found (or is created), this function returns 0, a
+ * non NULL pointer is returned into *na, and the caller holds a
+ * reference to the adapter.
+ * If an adapter is not found, then no reference is grabbed and the
+ * function returns an error code, or 0 if there is just a VALE prefix
+ * mismatch. Therefore the caller holds a reference when
+ * (*na != NULL && return == 0).
+ */
+int
+netmap_get_bdg_na(struct nmreq_header *hdr, struct netmap_adapter **na,
+       struct netmap_mem_d *nmd, int create, struct netmap_bdg_ops *ops)
+{
+       char *nr_name = hdr->nr_name;
+       const char *ifname;
+       struct ifnet *ifp = NULL;
+       int error = 0;
+       struct netmap_vp_adapter *vpna, *hostna = NULL;
+       struct nm_bridge *b;
+       uint32_t i, j;
+       uint32_t cand = NM_BDG_NOPORT, cand2 = NM_BDG_NOPORT;
+       int needed;
+
+       *na = NULL;     /* default return value */
+
+       /* first try to see if this is a bridge port. */
+       NMG_LOCK_ASSERT();
+       if (strncmp(nr_name, ops->name, strlen(ops->name) - 1)) {
+               return 0;  /* no error, but no VALE prefix */
+       }
+
+       b = nm_find_bridge(nr_name, create, ops);
+       if (b == NULL) {
+               ND("no bridges available for '%s'", nr_name);
+               return (create ? ENOMEM : ENXIO);
+       }
+       if (strlen(nr_name) < b->bdg_namelen) /* impossible */
+               panic("x");
+
+       /* Now we are sure that name starts with the bridge's name,
+        * lookup the port in the bridge. We need to scan the entire
+        * list. It is not important to hold a WLOCK on the bridge
+        * during the search because NMG_LOCK already guarantees
+        * that there are no other possible writers.
+        */
+
+       /* lookup in the local list of ports */
+       for (j = 0; j < b->bdg_active_ports; j++) {
+               i = b->bdg_port_index[j];
+               vpna = b->bdg_ports[i];
+               ND("checking %s", vpna->up.name);
+               if (!strcmp(vpna->up.name, nr_name)) {
+                       netmap_adapter_get(&vpna->up);
+                       ND("found existing if %s refs %d", nr_name)
+                       *na = &vpna->up;
+                       return 0;
+               }
+       }
+       /* not found, should we create it? */
+       if (!create)
+               return ENXIO;
+       /* yes we should, see if we have space to attach entries */
+       needed = 2; /* in some cases we only need 1 */
+       if (b->bdg_active_ports + needed >= NM_BDG_MAXPORTS) {
+               D("bridge full %d, cannot create new port", 
b->bdg_active_ports);
+               return ENOMEM;
+       }
+       /* record the next two ports available, but do not allocate yet */
+       cand = b->bdg_port_index[b->bdg_active_ports];
+       cand2 = b->bdg_port_index[b->bdg_active_ports + 1];
+       ND("+++ bridge %s port %s used %d avail %d %d",
+               b->bdg_basename, ifname, b->bdg_active_ports, cand, cand2);
+
+       /*
+        * try see if there is a matching NIC with this name
+        * (after the bridge's name)
+        */
+       ifname = nr_name + b->bdg_namelen + 1;
+       ifp = ifunit_ref(ifname);
+       if (!ifp) {
+               /* Create an ephemeral virtual port.
+                * This block contains all the ephemeral-specific logic.
+                */
+
+               if (hdr->nr_reqtype != NETMAP_REQ_REGISTER) {
+                       error = EINVAL;
+                       goto out;
+               }
+
+               /* bdg_netmap_attach creates a struct netmap_adapter */
+               error = b->bdg_ops->vp_create(hdr, NULL, nmd, &vpna);
+               if (error) {
+                       D("error %d", error);
+                       goto out;
+               }
+               /* shortcut - we can skip get_hw_na(),
+                * ownership check and nm_bdg_attach()
+                */
+
+       } else {
+               struct netmap_adapter *hw;
+
+               /* the vale:nic syntax is only valid for some commands */
+               switch (hdr->nr_reqtype) {
+               case NETMAP_REQ_VALE_ATTACH:
+               case NETMAP_REQ_VALE_DETACH:
+               case NETMAP_REQ_VALE_POLLING_ENABLE:
+               case NETMAP_REQ_VALE_POLLING_DISABLE:
+                       break; /* ok */
+               default:
+                       error = EINVAL;
+                       goto out;
+               }
+
+               error = netmap_get_hw_na(ifp, nmd, &hw);
+               if (error || hw == NULL)
+                       goto out;
+
+               /* host adapter might not be created */
+               error = hw->nm_bdg_attach(nr_name, hw, b);
+               if (error == NM_NEED_BWRAP) {
+                       error = b->bdg_ops->bwrap_attach(nr_name, hw);
+               }
+               if (error)
+                       goto out;
+               vpna = hw->na_vp;
+               hostna = hw->na_hostvp;
+               if (hdr->nr_reqtype == NETMAP_REQ_VALE_ATTACH) {
+                       /* Check if we need to skip the host rings. */
+                       struct nmreq_vale_attach *areq =
+                               (struct nmreq_vale_attach 
*)(uintptr_t)hdr->nr_body;
+                       if (areq->reg.nr_mode != NR_REG_NIC_SW) {
+                               hostna = NULL;
+                       }
+               }
+       }
+
+       BDG_WLOCK(b);
+       vpna->bdg_port = cand;
+       ND("NIC  %p to bridge port %d", vpna, cand);
+       /* bind the port to the bridge (virtual ports are not active) */
+       b->bdg_ports[cand] = vpna;
+       vpna->na_bdg = b;
+       b->bdg_active_ports++;
+       if (hostna != NULL) {
+               /* also bind the host stack to the bridge */
+               b->bdg_ports[cand2] = hostna;
+               hostna->bdg_port = cand2;
+               hostna->na_bdg = b;
+               b->bdg_active_ports++;
+               ND("host %p to bridge port %d", hostna, cand2);
+       }
+       ND("if %s refs %d", ifname, vpna->up.na_refcount);
+       BDG_WUNLOCK(b);
+       *na = &vpna->up;
+       netmap_adapter_get(*na);
+
+out:
+       if (ifp)
+               if_rele(ifp);
+
+       return error;
+}
+
+/* Process NETMAP_REQ_VALE_ATTACH.
+ */
+int
+nm_bdg_ctl_attach(struct nmreq_header *hdr, void *auth_token)
+{
+       struct nmreq_vale_attach *req =
+               (struct nmreq_vale_attach *)(uintptr_t)hdr->nr_body;
+       struct netmap_vp_adapter * vpna;
+       struct netmap_adapter *na = NULL;
+       struct netmap_mem_d *nmd = NULL;
+       struct nm_bridge *b = NULL;
+       int error;
+
+       NMG_LOCK();
+       /* permission check for modified bridges */
+       b = nm_find_bridge(hdr->nr_name, 0 /* don't create */, NULL);
+       if (b && !nm_bdg_valid_auth_token(b, auth_token)) {
+               error = EACCES;
+               goto unlock_exit;
+       }
+
+       if (req->reg.nr_mem_id) {
+               nmd = netmap_mem_find(req->reg.nr_mem_id);
+               if (nmd == NULL) {
+                       error = EINVAL;
+                       goto unlock_exit;
+               }
+       }
+
+       /* check for existing one */
+       error = netmap_get_vale_na(hdr, &na, nmd, 0);
+       if (na) {
+               error = EBUSY;
+               goto unref_exit;
+       }
+       error = netmap_get_vale_na(hdr, &na,
+                               nmd, 1 /* create if not exists */);
+       if (error) { /* no device */
+               goto unlock_exit;
+       }
+
+       if (na == NULL) { /* VALE prefix missing */
+               error = EINVAL;
+               goto unlock_exit;
+       }
+
+       if (NETMAP_OWNED_BY_ANY(na)) {
+               error = EBUSY;
+               goto unref_exit;
+       }
+
+       if (na->nm_bdg_ctl) {
+               /* nop for VALE ports. The bwrap needs to put the hwna
+                * in netmap mode (see netmap_bwrap_bdg_ctl)
+                */
+               error = na->nm_bdg_ctl(hdr, na);
+               if (error)
+                       goto unref_exit;
+               ND("registered %s to netmap-mode", na->name);
+       }
+       vpna = (struct netmap_vp_adapter *)na;
+       req->port_index = vpna->bdg_port;
+       NMG_UNLOCK();
+       return 0;
+
+unref_exit:
+       netmap_adapter_put(na);
+unlock_exit:
+       NMG_UNLOCK();
+       return error;
+}
+
+static inline int
+nm_is_bwrap(struct netmap_adapter *na)
+{
+       return na->nm_register == netmap_bwrap_reg;
+}
+
+/* Process NETMAP_REQ_VALE_DETACH.
+ */
+int
+nm_bdg_ctl_detach(struct nmreq_header *hdr, void *auth_token)
+{
+       struct nmreq_vale_detach *nmreq_det = (void *)(uintptr_t)hdr->nr_body;
+       struct netmap_vp_adapter *vpna;
+       struct netmap_adapter *na;
+       struct nm_bridge *b = NULL;
+       int error;
+
+       NMG_LOCK();
+       /* permission check for modified bridges */
+       b = nm_find_bridge(hdr->nr_name, 0 /* don't create */, NULL);
+       if (b && !nm_bdg_valid_auth_token(b, auth_token)) {
+               error = EACCES;
+               goto unlock_exit;
+       }
+
+       error = netmap_get_vale_na(hdr, &na, NULL, 0 /* don't create */);
+       if (error) { /* no device, or another bridge or user owns the device */
+               goto unlock_exit;
+       }
+
+       if (na == NULL) { /* VALE prefix missing */
+               error = EINVAL;
+               goto unlock_exit;
+       } else if (nm_is_bwrap(na) &&
+                  ((struct netmap_bwrap_adapter *)na)->na_polling_state) {
+               /* Don't detach a NIC with polling */
+               error = EBUSY;
+               goto unref_exit;
+       }
+
+       vpna = (struct netmap_vp_adapter *)na;
+       if (na->na_vp != vpna) {
+               /* trying to detach first attach of VALE persistent port 
attached
+                * to 2 bridges
+                */
+               error = EBUSY;
+               goto unref_exit;
+       }
+       nmreq_det->port_index = vpna->bdg_port;
+
+       if (na->nm_bdg_ctl) {
+               /* remove the port from bridge. The bwrap
+                * also needs to put the hwna in normal mode
+                */
+               error = na->nm_bdg_ctl(hdr, na);
+       }
+
+unref_exit:
+       netmap_adapter_put(na);
+unlock_exit:
+       NMG_UNLOCK();
+       return error;
+
+}
+
+struct nm_bdg_polling_state;
+struct
+nm_bdg_kthread {
+       struct nm_kctx *nmk;
+       u_int qfirst;
+       u_int qlast;
+       struct nm_bdg_polling_state *bps;
+};
+
+struct nm_bdg_polling_state {
+       bool configured;
+       bool stopped;
+       struct netmap_bwrap_adapter *bna;
+       uint32_t mode;
+       u_int qfirst;
+       u_int qlast;
+       u_int cpu_from;
+       u_int ncpus;
+       struct nm_bdg_kthread *kthreads;
+};
+
+static void
+netmap_bwrap_polling(void *data, int is_kthread)
+{
+       struct nm_bdg_kthread *nbk = data;
+       struct netmap_bwrap_adapter *bna;
+       u_int qfirst, qlast, i;
+       struct netmap_kring **kring0, *kring;
+
+       if (!nbk)
+               return;
+       qfirst = nbk->qfirst;
+       qlast = nbk->qlast;
+       bna = nbk->bps->bna;
+       kring0 = NMR(bna->hwna, NR_RX);
+
+       for (i = qfirst; i < qlast; i++) {
+               kring = kring0[i];
+               kring->nm_notify(kring, 0);
+       }
+}
+
+static int
+nm_bdg_create_kthreads(struct nm_bdg_polling_state *bps)
+{
+       struct nm_kctx_cfg kcfg;
+       int i, j;
+
+       bps->kthreads = nm_os_malloc(sizeof(struct nm_bdg_kthread) * 
bps->ncpus);
+       if (bps->kthreads == NULL)
+               return ENOMEM;
+
+       bzero(&kcfg, sizeof(kcfg));
+       kcfg.worker_fn = netmap_bwrap_polling;
+       kcfg.use_kthread = 1;
+       for (i = 0; i < bps->ncpus; i++) {
+               struct nm_bdg_kthread *t = bps->kthreads + i;
+               int all = (bps->ncpus == 1 &&
+                       bps->mode == NETMAP_POLLING_MODE_SINGLE_CPU);
+               int affinity = bps->cpu_from + i;
+
+               t->bps = bps;
+               t->qfirst = all ? bps->qfirst /* must be 0 */: affinity;
+               t->qlast = all ? bps->qlast : t->qfirst + 1;
+               D("kthread %d a:%u qf:%u ql:%u", i, affinity, t->qfirst,
+                       t->qlast);
+
+               kcfg.type = i;
+               kcfg.worker_private = t;
+               t->nmk = nm_os_kctx_create(&kcfg, NULL);
+               if (t->nmk == NULL) {
+                       goto cleanup;
+               }
+               nm_os_kctx_worker_setaff(t->nmk, affinity);
+       }
+       return 0;
+
+cleanup:
+       for (j = 0; j < i; j++) {
+               struct nm_bdg_kthread *t = bps->kthreads + i;
+               nm_os_kctx_destroy(t->nmk);
+       }
+       nm_os_free(bps->kthreads);
+       return EFAULT;
+}
+
+/* A variant of ptnetmap_start_kthreads() */
+static int
+nm_bdg_polling_start_kthreads(struct nm_bdg_polling_state *bps)
+{
+       int error, i, j;
+
+       if (!bps) {
+               D("polling is not configured");
+               return EFAULT;
+       }
+       bps->stopped = false;
+
+       for (i = 0; i < bps->ncpus; i++) {
+               struct nm_bdg_kthread *t = bps->kthreads + i;
+               error = nm_os_kctx_worker_start(t->nmk);

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
_______________________________________________
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to