Diff below is what will be committed, or something similar, once all the issues with the NET_LOCK() are fixed. With it the IPv4 forwarding path is now executed in the 'softnet' thread, w/o holding the KERNEL_LOCK().
This has already been tested by Hrvoje Popovski who measured an improvement from 1.42Mpps to 1.7Mpps with it. He also confirmed this improve the overall latency of the machine. His ssh sessions are now responsive while forwarding that much traffic. I'd appreciate if you could test this diff and report regressions. This cannot be tested if you're using NFS, pflow(4) or BFD. But I'd like to know how the rest behave. I'm not asking for oks. The goal here is to find bugs early. Index: dev/usb/if_umb.c =================================================================== RCS file: /cvs/src/sys/dev/usb/if_umb.c,v retrieving revision 1.9 diff -u -p -r1.9 if_umb.c --- dev/usb/if_umb.c 22 Jan 2017 10:17:39 -0000 1.9 +++ dev/usb/if_umb.c 22 Feb 2017 14:52:27 -0000 @@ -41,6 +41,7 @@ #include <netinet/in.h> #include <netinet/in_var.h> #include <netinet/ip.h> +#include <netinet/ip_var.h> #include <machine/bus.h> @@ -753,7 +754,6 @@ umb_output(struct ifnet *ifp, struct mbu int umb_input(struct ifnet *ifp, struct mbuf *m, void *cookie) { - struct niqueue *inq; uint8_t ipv; if ((ifp->if_flags & IFF_UP) == 0) { @@ -774,11 +774,11 @@ umb_input(struct ifnet *ifp, struct mbuf ifp->if_ibytes += m->m_pkthdr.len; switch (ipv) { case 4: - inq = &ipintrq; - break; + ipv4_input(ifp, m); + return 1; #ifdef INET6 case 6: - inq = &ip6intrq; + niq_enqueue(&ip6intrq, m); break; #endif /* INET6 */ default: @@ -788,7 +788,6 @@ umb_input(struct ifnet *ifp, struct mbuf m_freem(m); return 1; } - niq_enqueue(inq, m); return 1; } Index: net/if_ethersubr.c =================================================================== RCS file: /cvs/src/sys/net/if_ethersubr.c,v retrieving revision 1.242 diff -u -p -r1.242 if_ethersubr.c --- net/if_ethersubr.c 24 Jan 2017 10:08:30 -0000 1.242 +++ net/if_ethersubr.c 22 Feb 2017 14:53:38 -0000 @@ -96,6 +96,7 @@ didn't get a copy, you may request one f #include <net/if_types.h> #include <netinet/in.h> +#include <netinet/ip_var.h> #include <netinet/if_ether.h> #include <netinet/ip_ipsp.h> @@ -374,8 +375,8 @@ ether_input(struct ifnet *ifp, struct mb decapsulate: switch (etype) { case ETHERTYPE_IP: - inq = &ipintrq; - break; + ipv4_input(ifp, m); + return (1); case ETHERTYPE_ARP: if (ifp->if_flags & IFF_NOARP) Index: net/if_spppsubr.c =================================================================== RCS file: /cvs/src/sys/net/if_spppsubr.c,v retrieving revision 1.162 diff -u -p -r1.162 if_spppsubr.c --- net/if_spppsubr.c 24 Jan 2017 10:08:30 -0000 1.162 +++ net/if_spppsubr.c 22 Feb 2017 14:55:49 -0000 @@ -58,8 +58,7 @@ #include <netinet/in.h> #include <netinet/in_var.h> #include <netinet/ip.h> -#include <netinet/tcp.h> -#include <netinet/if_ether.h> +#include <netinet/ip_var.h> #ifdef INET6 #include <netinet6/in6_ifattach.h> @@ -503,8 +502,11 @@ sppp_input(struct ifnet *ifp, struct mbu return; case PPP_IP: if (sp->state[IDX_IPCP] == STATE_OPENED) { - inq = &ipintrq; sp->pp_last_activity = tv.tv_sec; + if (ifp->if_flags & IFF_UP) { + ipv4_input(ifp, m); + return; + } } break; #ifdef INET6 Index: net/if_mpe.c =================================================================== RCS file: /cvs/src/sys/net/if_mpe.c,v retrieving revision 1.58 diff -u -p -r1.58 if_mpe.c --- net/if_mpe.c 24 Jan 2017 10:08:30 -0000 1.58 +++ net/if_mpe.c 22 Feb 2017 14:55:54 -0000 @@ -33,6 +33,7 @@ #include <netinet/in.h> #include <netinet/ip.h> +#include <netinet/ip_var.h> #ifdef INET6 #include <netinet/ip6.h> @@ -396,7 +397,7 @@ mpe_input(struct mbuf *m, struct ifnet * bpf_mtap_af(ifp->if_bpf, AF_INET, m, BPF_DIRECTION_IN); #endif - niq_enqueue(&ipintrq, m); + ipv4_input(ifp, m); } #ifdef INET6 Index: net/if_pppx.c =================================================================== RCS file: /cvs/src/sys/net/if_pppx.c,v retrieving revision 1.57 diff -u -p -r1.57 if_pppx.c --- net/if_pppx.c 24 Jan 2017 10:08:30 -0000 1.57 +++ net/if_pppx.c 22 Feb 2017 15:11:41 -0000 @@ -317,7 +317,6 @@ pppxwrite(dev_t dev, struct uio *uio, in struct pppx_if *pxi; uint32_t proto; struct mbuf *top, **mp, *m; - struct niqueue *ifq; int tlen; int error = 0; size_t mlen; @@ -401,20 +400,18 @@ pppxwrite(dev_t dev, struct uio *uio, in switch (proto) { case AF_INET: - ifq = &ipintrq; + ipv4_input(&pxi->pxi_if, m); break; #ifdef INET6 case AF_INET6: - ifq = &ip6intrq; + if (niq_enqueue(&ip6intrq, top) != 0) + return (ENOBUFS); break; #endif default: m_freem(top); return (EAFNOSUPPORT); } - - if (niq_enqueue(ifq, top) != 0) - return (ENOBUFS); return (error); } Index: net/if_tun.c =================================================================== RCS file: /cvs/src/sys/net/if_tun.c,v retrieving revision 1.173 diff -u -p -r1.173 if_tun.c --- net/if_tun.c 24 Jan 2017 10:08:30 -0000 1.173 +++ net/if_tun.c 22 Feb 2017 15:12:10 -0000 @@ -63,6 +63,7 @@ #include <net/rtable.h> #include <netinet/in.h> +#include <netinet/ip_var.h> #include <netinet/if_ether.h> #ifdef PIPEX @@ -878,7 +879,6 @@ int tun_dev_write(struct tun_softc *tp, struct uio *uio, int ioflag) { struct ifnet *ifp; - struct niqueue *ifq; u_int32_t *th; struct mbuf *top, **mp, *m; int error = 0, tlen; @@ -975,27 +975,25 @@ tun_dev_write(struct tun_softc *tp, stru top->m_pkthdr.ph_rtableid = ifp->if_rdomain; top->m_pkthdr.ph_ifidx = ifp->if_index; + ifp->if_ipackets++; + ifp->if_ibytes += top->m_pkthdr.len; + switch (ntohl(*th)) { case AF_INET: - ifq = &ipintrq; + ipv4_input(ifp, top); break; #ifdef INET6 case AF_INET6: - ifq = &ip6intrq; + if (niq_enqueue(&ip6intrq, top) != 0) { + ifp->if_collisions++; + return (ENOBUFS); + } break; #endif default: m_freem(top); return (EAFNOSUPPORT); } - - if (niq_enqueue(ifq, top) != 0) { - ifp->if_collisions++; - return (ENOBUFS); - } - - ifp->if_ipackets++; - ifp->if_ibytes += top->m_pkthdr.len; return (error); } Index: net/pipex.c =================================================================== RCS file: /cvs/src/sys/net/pipex.c,v retrieving revision 1.92 diff -u -p -r1.92 pipex.c --- net/pipex.c 24 Jan 2017 10:08:30 -0000 1.92 +++ net/pipex.c 22 Feb 2017 14:57:49 -0000 @@ -1149,20 +1149,15 @@ pipex_ip_input(struct mbuf *m0, struct p bpf_mtap_af(ifp->if_bpf, AF_INET, m0, BPF_DIRECTION_IN); #endif - if (niq_enqueue(&ipintrq, m0) != 0) { - ifp->if_collisions++; - goto dropped; - } - ifp->if_ipackets++; ifp->if_ibytes += len; session->stat.ipackets++; session->stat.ibytes += len; + ipv4_input(ifp, m0); return; drop: m_freem(m0); -dropped: session->stat.ierrors++; } Index: net/if.c =================================================================== RCS file: /cvs/src/sys/net/if.c,v retrieving revision 1.487 diff -u -p -r1.487 if.c --- net/if.c 16 Feb 2017 10:15:12 -0000 1.487 +++ net/if.c 22 Feb 2017 15:13:18 -0000 @@ -96,6 +96,7 @@ #include <net/netisr.h> #include <netinet/in.h> +#include <netinet/ip_var.h> #include <netinet/if_ether.h> #include <netinet/igmp.h> #ifdef MROUTING @@ -734,8 +735,6 @@ if_input(struct ifnet *ifp, struct mbuf_ int if_input_local(struct ifnet *ifp, struct mbuf *m, sa_family_t af) { - struct niqueue *ifq = NULL; - #if NBPFILTER > 0 /* * Only send packets to bpf if they are destinated to local @@ -758,33 +757,29 @@ if_input_local(struct ifnet *ifp, struct ifp->if_opackets++; ifp->if_obytes += m->m_pkthdr.len; + ifp->if_ipackets++; + ifp->if_ibytes += m->m_pkthdr.len; + switch (af) { case AF_INET: - ifq = &ipintrq; - break; + ipv4_input(ifp, m); + return (0); #ifdef INET6 case AF_INET6: - ifq = &ip6intrq; + if (niq_enqueue(&ip6intrq, m) != 0) + return (ENOBUFS); break; #endif /* INET6 */ #ifdef MPLS case AF_MPLS: - ifp->if_ipackets++; - ifp->if_ibytes += m->m_pkthdr.len; mpls_input(m); - return (0); + break; #endif /* MPLS */ default: printf("%s: can't handle af%d\n", ifp->if_xname, af); m_freem(m); return (EAFNOSUPPORT); } - - if (niq_enqueue(ifq, m) != 0) - return (ENOBUFS); - - ifp->if_ipackets++; - ifp->if_ibytes += m->m_pkthdr.len; return (0); } Index: net/if_ppp.c =================================================================== RCS file: /cvs/src/sys/net/if_ppp.c,v retrieving revision 1.103 diff -u -p -r1.103 if_ppp.c --- net/if_ppp.c 1 Jan 2017 15:39:01 -0000 1.103 +++ net/if_ppp.c 22 Feb 2017 14:59:26 -0000 @@ -128,6 +128,7 @@ #include <netinet/in.h> #include <netinet/ip.h> +#include <netinet/ip_var.h> #include "bpfilter.h" @@ -1409,10 +1410,8 @@ ppp_inproc(struct ppp_softc *sc, struct m->m_data += PPP_HDRLEN; m->m_len -= PPP_HDRLEN; - if (niq_enqueue(&ipintrq, m) != 0) - rv = 0; /* failure */ - else - rv = 1; /* ipintrq success */ + ipv4_input(ifp, m); + rv = 1; break; default: Index: netinet/ip_divert.c =================================================================== RCS file: /cvs/src/sys/netinet/ip_divert.c,v retrieving revision 1.44 diff -u -p -r1.44 ip_divert.c --- netinet/ip_divert.c 9 Feb 2017 15:32:56 -0000 1.44 +++ netinet/ip_divert.c 22 Feb 2017 15:01:51 -0000 @@ -134,6 +134,7 @@ divert_output(struct inpcb *inp, struct if (dir == PF_IN) { ipaddr.sin_addr = sin->sin_addr; + /* XXXSMP ifa_ifwithaddr() is not safe. */ ifa = ifa_ifwithaddr(sintosa(&ipaddr), m->m_pkthdr.ph_rtableid); if (ifa == NULL) { error = EADDRNOTAVAIL; @@ -150,7 +151,8 @@ divert_output(struct inpcb *inp, struct ip->ip_sum = in_cksum(m, off); in_proto_cksum_out(m, NULL); - niq_enqueue(&ipintrq, m); + /* XXXSMP ``ifa'' is not reference counted. */ + ipv4_input(ifa->ifa_ifp, m); } else { error = ip_output(m, NULL, &inp->inp_route, IP_ALLOWBROADCAST | IP_RAWOUTPUT, NULL, NULL, 0); Index: netinet/ip_gre.c =================================================================== RCS file: /cvs/src/sys/netinet/ip_gre.c,v retrieving revision 1.62 diff -u -p -r1.62 ip_gre.c --- netinet/ip_gre.c 29 Jan 2017 19:58:47 -0000 1.62 +++ netinet/ip_gre.c 22 Feb 2017 15:24:40 -0000 @@ -93,7 +93,6 @@ int gre_input2(struct mbuf *m, int hlen, int proto) { struct greip *gip; - struct niqueue *ifq; struct gre_softc *sc; u_short flags; u_int af; @@ -160,13 +159,11 @@ gre_input2(struct mbuf *m, int hlen, int */ if (gre_wccp == 2) hlen += 4; - case ETHERTYPE_IP: /* shouldn't need a schednetisr(), as */ - ifq = &ipintrq; /* we are in ip_input */ + case ETHERTYPE_IP: af = AF_INET; break; #ifdef INET6 case ETHERTYPE_IPV6: - ifq = &ip6intrq; af = AF_INET6; break; #endif @@ -205,7 +202,19 @@ gre_input2(struct mbuf *m, int hlen, int pf_pkt_addr_changed(m); #endif - niq_enqueue(ifq, m); + switch (af) { + case AF_INET: + ipv4_input(&sc->sc_if, m); + break; +#ifdef INET6 + case AF_INET6: + niq_enqueue(&ip6intrq, m); + break; +#endif + default: + return (0); + } + return (1); /* packet is done, no further processing needed */ } @@ -334,7 +343,7 @@ gre_mobile_input(struct mbuf **mp, int * pf_pkt_addr_changed(m); #endif - niq_enqueue(&ipintrq, m); + ipv4_input(&sc->sc_if, m); return IPPROTO_DONE; } Index: netinet/ip_input.c =================================================================== RCS file: /cvs/src/sys/netinet/ip_input.c,v retrieving revision 1.295 diff -u -p -r1.295 ip_input.c --- netinet/ip_input.c 5 Feb 2017 16:23:38 -0000 1.295 +++ netinet/ip_input.c 22 Feb 2017 15:12:53 -0000 @@ -127,6 +127,7 @@ int ip_sysctl_ipstat(void *, size_t *, v static struct mbuf_queue ipsend_mq; void ip_ours(struct mbuf *); +void ip_local(struct mbuf *); int ip_dooptions(struct mbuf *, struct ifnet *); int in_ouraddr(struct mbuf *, struct ifnet *, struct rtentry **); #ifdef IPSEC @@ -225,19 +226,24 @@ ipintr(void) if ((m->m_flags & M_PKTHDR) == 0) panic("ipintr no HDR"); #endif - ipv4_input(m); + ip_local(m); } } +void +ip_ours(struct mbuf *m) +{ + niq_enqueue(&ipintrq, m); +} + /* * IPv4 input routine. * * Checksum and byte swap header. Process options. Forward or deliver. */ void -ipv4_input(struct mbuf *m) +ipv4_input(struct ifnet *ifp, struct mbuf *m) { - struct ifnet *ifp; struct rtentry *rt = NULL; struct ip *ip; int hlen, len; @@ -246,10 +252,6 @@ ipv4_input(struct mbuf *m) #endif in_addr_t pfrdr = 0; - ifp = if_get(m->m_pkthdr.ph_ifidx); - if (ifp == NULL) - goto bad; - ipstat_inc(ips_total); if (m->m_len < sizeof (struct ip) && (m = m_pullup(m, sizeof (struct ip))) == NULL) { @@ -459,13 +461,11 @@ ipv4_input(struct mbuf *m) #endif /* IPSEC */ ip_forward(m, ifp, rt, pfrdr); - if_put(ifp); return; bad: m_freem(m); out: rtfree(rt); - if_put(ifp); } /* @@ -474,7 +474,7 @@ out: * If fragmented try to reassemble. Pass to next level. */ void -ip_ours(struct mbuf *m) +ip_local(struct mbuf *m) { struct ip *ip = mtod(m, struct ip *); struct ipq *fp; Index: netinet/ip_ipip.c =================================================================== RCS file: /cvs/src/sys/netinet/ip_ipip.c,v retrieving revision 1.71 diff -u -p -r1.71 ip_ipip.c --- netinet/ip_ipip.c 29 Jan 2017 19:58:47 -0000 1.71 +++ netinet/ip_ipip.c 22 Feb 2017 15:08:36 -0000 @@ -118,7 +118,6 @@ ipip_input(struct mbuf **mp, int *offp, int iphlen = *offp; struct sockaddr_in *sin; struct ifnet *ifp; - struct niqueue *ifq = NULL; struct ip *ipo; #ifdef INET6 struct sockaddr_in6 *sin6; @@ -220,6 +219,7 @@ ipip_input(struct mbuf **mp, int *offp, /* Some sanity checks in the inner IP header */ switch (proto) { case IPPROTO_IPV4: + af = AF_INET; ipo = mtod(m, struct ip *); #ifdef INET6 ip6 = NULL; @@ -244,6 +244,7 @@ ipip_input(struct mbuf **mp, int *offp, break; #ifdef INET6 case IPPROTO_IPV6: + af = AF_INET6; ipo = NULL; ip6 = mtod(m, struct ip6_hdr *); itos = (ntohl(ip6->ip6_flow) >> 20) & 0xff; @@ -304,6 +305,14 @@ ipip_input(struct mbuf **mp, int *offp, /* Statistics */ ipipstat.ipips_ibytes += m->m_pkthdr.len - iphlen; +#if NBPFILTER > 0 + if (gifp && gifp->if_bpf) + bpf_mtap_af(gifp->if_bpf, af, m, BPF_DIRECTION_IN); +#endif +#if NPF > 0 + pf_pkt_addr_changed(m); +#endif + /* * Interface pointer stays the same; if no IPsec processing has * been done (or will be done), this will point to a normal @@ -314,32 +323,21 @@ ipip_input(struct mbuf **mp, int *offp, switch (proto) { case IPPROTO_IPV4: - ifq = &ipintrq; - af = AF_INET; + ipv4_input(ifp, m); break; #ifdef INET6 case IPPROTO_IPV6: - ifq = &ip6intrq; - af = AF_INET6; + if (niq_enqueue(&ip6intrq, m) != 0) { + ipipstat.ipips_qfull++; + DPRINTF(("ipip_input(): packet dropped because of full " + "queue\n")); + } break; #endif default: panic("ipip_input: should never reach here"); } -#if NBPFILTER > 0 - if (gifp && gifp->if_bpf) - bpf_mtap_af(gifp->if_bpf, af, m, BPF_DIRECTION_IN); -#endif -#if NPF > 0 - pf_pkt_addr_changed(m); -#endif - - if (niq_enqueue(ifq, m) != 0) { - ipipstat.ipips_qfull++; - DPRINTF(("ipip_input(): packet dropped because of full " - "queue\n")); - } return IPPROTO_DONE; } Index: netinet/ip_var.h =================================================================== RCS file: /cvs/src/sys/netinet/ip_var.h,v retrieving revision 1.68 diff -u -p -r1.68 ip_var.h --- netinet/ip_var.h 1 Feb 2017 20:59:47 -0000 1.68 +++ netinet/ip_var.h 22 Feb 2017 14:53:00 -0000 @@ -248,7 +248,7 @@ int ip_sysctl(int *, u_int, void *, siz void ip_savecontrol(struct inpcb *, struct mbuf **, struct ip *, struct mbuf *); void ipintr(void); -void ipv4_input(struct mbuf *); +void ipv4_input(struct ifnet *, struct mbuf *); void ip_forward(struct mbuf *, struct ifnet *, struct rtentry *, int); int rip_ctloutput(int, struct socket *, int, int, struct mbuf *); void rip_init(void); Index: netinet/ipsec_input.c =================================================================== RCS file: /cvs/src/sys/netinet/ipsec_input.c,v retrieving revision 1.144 diff -u -p -r1.144 ipsec_input.c --- netinet/ipsec_input.c 8 Feb 2017 12:37:43 -0000 1.144 +++ netinet/ipsec_input.c 22 Feb 2017 15:10:34 -0000 @@ -317,7 +317,7 @@ ipsec_common_input_cb(struct mbuf *m, st { int af, sproto; u_int8_t prot; - + struct ifnet *ifp; #if NBPFILTER > 0 struct ifnet *encif; #endif @@ -583,12 +583,13 @@ ipsec_common_input_cb(struct mbuf *m, st /* Call the appropriate IPsec transform callback. */ switch (af) { case AF_INET: - if (niq_enqueue(&ipintrq, m) != 0) { - DPRINTF(("ipsec_common_input_cb(): dropped packet " - "because of full IP queue\n")); - IPSEC_ISTAT(espstat.esps_qfull, ahstat.ahs_qfull, - ipcompstat.ipcomps_qfull); + ifp = if_get(m->m_pkthdr.ph_ifidx); + if (ifp == NULL) { + m_freem(m); + return; } + ipv4_input(ifp, m); + if_put(ifp); return; #ifdef INET6 case AF_INET6: Index: netmpls/mpls_input.c =================================================================== RCS file: /cvs/src/sys/netmpls/mpls_input.c,v retrieving revision 1.57 diff -u -p -r1.57 mpls_input.c --- netmpls/mpls_input.c 22 Aug 2016 15:37:23 -0000 1.57 +++ netmpls/mpls_input.c 22 Feb 2017 15:07:20 -0000 @@ -126,7 +126,13 @@ mpls_input(struct mbuf *m) do_v4: if (mpls_ip_adjttl(m, ttl)) return; - niq_enqueue(&ipintrq, m); + ifp = if_get(m->m_pkthdr.ph_ifidx); + if (ifp == NULL) { + m_freem(m); + return; + } + ipv4_input(ifp, m); + if_put(ifp); return; #ifdef INET6 case MPLS_LABEL_IPV6NULL: Index: net/if.c =================================================================== RCS file: /cvs/src/sys/net/if.c,v retrieving revision 1.487 diff -u -p -r1.487 if.c --- net/if.c 16 Feb 2017 10:15:12 -0000 1.487 +++ net/if.c 22 Feb 2017 16:57:59 -0000 @@ -881,7 +881,7 @@ if_input_process(void *xifidx) struct ifnet *ifp; struct ifih *ifih; struct srp_ref sr; - int s; + int s, s2; ifp = if_get(ifidx); if (ifp == NULL) @@ -894,6 +894,7 @@ if_input_process(void *xifidx) if (!ISSET(ifp->if_xflags, IFXF_CLONED)) add_net_randomness(ml_len(&ml)); + NET_LOCK(s2); s = splnet(); while ((m = ml_dequeue(&ml)) != NULL) { /* @@ -910,6 +911,7 @@ if_input_process(void *xifidx) m_freem(m); } splx(s); + NET_UNLOCK(s2); out: if_put(ifp);