Author: ae Date: Tue Jun 5 21:24:59 2018 New Revision: 334673 URL: https://svnweb.freebsd.org/changeset/base/334673
Log: Rework if_gif(4) to use new encap_lookup_t method to speedup lookup of needed interface when many gif interfaces are present. Remove rmlock from gif_softc, use epoch(9) and CK_LIST instead. Move more AF-related code into AF-related locations. Use hash table to speedup lookup of needed softc. Interfaces with GIF_IGNORE_SOURCE flag are stored in plain CK_LIST. Sysctl net.link.gif.parallel_tunnels is removed. The removal was planed 16 years ago, and actually it could work only for outbound direction. Each protocol, that can be handled by if_gif(4) interface is registered by separate encap handler, this helps avoid invoking the handler for unrelated protocols (GRE, PIM, etc.). This change allows dramatically improve performance when many gif(4) interfaces are used. Sponsored by: Yandex LLC Modified: head/share/man/man4/gif.4 head/sys/net/if_gif.c head/sys/net/if_gif.h head/sys/netinet/in_gif.c head/sys/netinet6/in6_gif.c Modified: head/share/man/man4/gif.4 ============================================================================== --- head/share/man/man4/gif.4 Tue Jun 5 20:54:29 2018 (r334672) +++ head/share/man/man4/gif.4 Tue Jun 5 21:24:59 2018 (r334673) @@ -29,7 +29,7 @@ .\" .\" $FreeBSD$ .\" -.Dd September 10, 2015 +.Dd June 5, 2018 .Dt GIF 4 .Os .Sh NAME @@ -169,14 +169,6 @@ This behavior may be modified at runtime by setting th variable .Va net.link.gif.max_nesting to the desired level of nesting. -Additionally, -.Nm -tunnels are restricted to one per pair of end points. -Parallel tunnels may be enabled by setting the -.Xr sysctl 8 -variable -.Va net.link.gif.parallel_tunnels -to 1. .Sh SEE ALSO .Xr gre 4 , .Xr inet 4 , Modified: head/sys/net/if_gif.c ============================================================================== --- head/sys/net/if_gif.c Tue Jun 5 20:54:29 2018 (r334672) +++ head/sys/net/if_gif.c Tue Jun 5 21:24:59 2018 (r334673) @@ -2,6 +2,7 @@ * SPDX-License-Identifier: BSD-3-Clause * * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. + * Copyright (c) 2018 Andrey V. Elsukov <a...@freebsd.org> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -39,7 +40,6 @@ __FBSDID("$FreeBSD$"); #include <sys/param.h> #include <sys/systm.h> -#include <sys/jail.h> #include <sys/kernel.h> #include <sys/lock.h> #include <sys/malloc.h> @@ -55,7 +55,6 @@ __FBSDID("$FreeBSD$"); #include <sys/syslog.h> #include <sys/priv.h> #include <sys/proc.h> -#include <sys/protosw.h> #include <sys/conf.h> #include <machine/cpu.h> @@ -85,8 +84,6 @@ __FBSDID("$FreeBSD$"); #include <netinet/ip6.h> #include <netinet6/ip6_ecn.h> #include <netinet6/ip6_var.h> -#include <netinet6/scope6_var.h> -#include <netinet6/ip6protosw.h> #endif /* INET6 */ #include <netinet/ip_encap.h> @@ -98,32 +95,17 @@ __FBSDID("$FreeBSD$"); static const char gifname[] = "gif"; -/* - * gif_mtx protects a per-vnet gif_softc_list. - */ -static VNET_DEFINE(struct mtx, gif_mtx); -#define V_gif_mtx VNET(gif_mtx) -static MALLOC_DEFINE(M_GIF, "gif", "Generic Tunnel Interface"); -static VNET_DEFINE(LIST_HEAD(, gif_softc), gif_softc_list); -#define V_gif_softc_list VNET(gif_softc_list) +MALLOC_DEFINE(M_GIF, "gif", "Generic Tunnel Interface"); static struct sx gif_ioctl_sx; SX_SYSINIT(gif_ioctl_sx, &gif_ioctl_sx, "gif_ioctl"); -#define GIF_LIST_LOCK_INIT(x) mtx_init(&V_gif_mtx, "gif_mtx", \ - NULL, MTX_DEF) -#define GIF_LIST_LOCK_DESTROY(x) mtx_destroy(&V_gif_mtx) -#define GIF_LIST_LOCK(x) mtx_lock(&V_gif_mtx) -#define GIF_LIST_UNLOCK(x) mtx_unlock(&V_gif_mtx) - void (*ng_gif_input_p)(struct ifnet *ifp, struct mbuf **mp, int af); void (*ng_gif_input_orphan_p)(struct ifnet *ifp, struct mbuf *m, int af); void (*ng_gif_attach_p)(struct ifnet *ifp); void (*ng_gif_detach_p)(struct ifnet *ifp); static int gif_check_nesting(struct ifnet *, struct mbuf *); -static int gif_set_tunnel(struct ifnet *, struct sockaddr *, - struct sockaddr *); -static void gif_delete_tunnel(struct ifnet *); +static void gif_delete_tunnel(struct gif_softc *); static int gif_ioctl(struct ifnet *, u_long, caddr_t); static int gif_transmit(struct ifnet *, struct mbuf *); static void gif_qflush(struct ifnet *); @@ -132,8 +114,6 @@ static void gif_clone_destroy(struct ifnet *); static VNET_DEFINE(struct if_clone *, gif_cloner); #define V_gif_cloner VNET(gif_cloner) -static int gifmodevent(module_t, int, void *); - SYSCTL_DECL(_net_link); static SYSCTL_NODE(_net_link, IFT_GIF, gif, CTLFLAG_RW, 0, "Generic Tunnel Interface"); @@ -153,21 +133,6 @@ static VNET_DEFINE(int, max_gif_nesting) = MAX_GIF_NES SYSCTL_INT(_net_link_gif, OID_AUTO, max_nesting, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(max_gif_nesting), 0, "Max nested tunnels"); -/* - * By default, we disallow creation of multiple tunnels between the same - * pair of addresses. Some applications require this functionality so - * we allow control over this check here. - */ -#ifdef XBONEHACK -static VNET_DEFINE(int, parallel_tunnels) = 1; -#else -static VNET_DEFINE(int, parallel_tunnels) = 0; -#endif -#define V_parallel_tunnels VNET(parallel_tunnels) -SYSCTL_INT(_net_link_gif, OID_AUTO, parallel_tunnels, - CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(parallel_tunnels), 0, - "Allow parallel tunnels?"); - static int gif_clone_create(struct if_clone *ifc, int unit, caddr_t params) { @@ -176,20 +141,15 @@ gif_clone_create(struct if_clone *ifc, int unit, caddr sc = malloc(sizeof(struct gif_softc), M_GIF, M_WAITOK | M_ZERO); sc->gif_fibnum = curthread->td_proc->p_fibnum; GIF2IFP(sc) = if_alloc(IFT_GIF); - GIF_LOCK_INIT(sc); GIF2IFP(sc)->if_softc = sc; if_initname(GIF2IFP(sc), gifname, unit); GIF2IFP(sc)->if_addrlen = 0; GIF2IFP(sc)->if_mtu = GIF_MTU; GIF2IFP(sc)->if_flags = IFF_POINTOPOINT | IFF_MULTICAST; -#if 0 - /* turn off ingress filter */ - GIF2IFP(sc)->if_flags |= IFF_LINK2; -#endif GIF2IFP(sc)->if_ioctl = gif_ioctl; - GIF2IFP(sc)->if_transmit = gif_transmit; - GIF2IFP(sc)->if_qflush = gif_qflush; + GIF2IFP(sc)->if_transmit = gif_transmit; + GIF2IFP(sc)->if_qflush = gif_qflush; GIF2IFP(sc)->if_output = gif_output; GIF2IFP(sc)->if_capabilities |= IFCAP_LINKSTATE; GIF2IFP(sc)->if_capenable |= IFCAP_LINKSTATE; @@ -198,9 +158,6 @@ gif_clone_create(struct if_clone *ifc, int unit, caddr if (ng_gif_attach_p != NULL) (*ng_gif_attach_p)(GIF2IFP(sc)); - GIF_LIST_LOCK(); - LIST_INSERT_HEAD(&V_gif_softc_list, sc, gif_list); - GIF_LIST_UNLOCK(); return (0); } @@ -211,10 +168,7 @@ gif_clone_destroy(struct ifnet *ifp) sx_xlock(&gif_ioctl_sx); sc = ifp->if_softc; - gif_delete_tunnel(ifp); - GIF_LIST_LOCK(); - LIST_REMOVE(sc, gif_list); - GIF_LIST_UNLOCK(); + gif_delete_tunnel(sc); if (ng_gif_detach_p != NULL) (*ng_gif_detach_p)(ifp); bpfdetach(ifp); @@ -222,8 +176,8 @@ gif_clone_destroy(struct ifnet *ifp) ifp->if_softc = NULL; sx_xunlock(&gif_ioctl_sx); + GIF_WAIT(); if_free(ifp); - GIF_LOCK_DESTROY(sc); free(sc, M_GIF); } @@ -231,10 +185,14 @@ static void vnet_gif_init(const void *unused __unused) { - LIST_INIT(&V_gif_softc_list); - GIF_LIST_LOCK_INIT(); V_gif_cloner = if_clone_simple(gifname, gif_clone_create, gif_clone_destroy, 0); +#ifdef INET + in_gif_init(); +#endif +#ifdef INET6 + in6_gif_init(); +#endif } VNET_SYSINIT(vnet_gif_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, vnet_gif_init, NULL); @@ -244,7 +202,12 @@ vnet_gif_uninit(const void *unused __unused) { if_clone_detach(V_gif_cloner); - GIF_LIST_LOCK_DESTROY(); +#ifdef INET + in_gif_uninit(); +#endif +#ifdef INET6 + in6_gif_uninit(); +#endif } VNET_SYSUNINIT(vnet_gif_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, vnet_gif_uninit, NULL); @@ -272,65 +235,25 @@ static moduledata_t gif_mod = { DECLARE_MODULE(if_gif, gif_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); MODULE_VERSION(if_gif, 1); -int -gif_encapcheck(const struct mbuf *m, int off, int proto, void *arg) +struct gif_list * +gif_hashinit(void) { - GIF_RLOCK_TRACKER; - const struct ip *ip; - struct gif_softc *sc; - int ret; + struct gif_list *hash; + int i; - sc = (struct gif_softc *)arg; - if (sc == NULL || (GIF2IFP(sc)->if_flags & IFF_UP) == 0) - return (0); + hash = malloc(sizeof(struct gif_list) * GIF_HASH_SIZE, + M_GIF, M_WAITOK); + for (i = 0; i < GIF_HASH_SIZE; i++) + CK_LIST_INIT(&hash[i]); - ret = 0; - GIF_RLOCK(sc); + return (hash); +} - /* no physical address */ - if (sc->gif_family == 0) - goto done; +void +gif_hashdestroy(struct gif_list *hash) +{ - switch (proto) { -#ifdef INET - case IPPROTO_IPV4: -#endif -#ifdef INET6 - case IPPROTO_IPV6: -#endif - case IPPROTO_ETHERIP: - break; - default: - goto done; - } - - /* Bail on short packets */ - M_ASSERTPKTHDR(m); - if (m->m_pkthdr.len < sizeof(struct ip)) - goto done; - - ip = mtod(m, const struct ip *); - switch (ip->ip_v) { -#ifdef INET - case 4: - if (sc->gif_family != AF_INET) - goto done; - ret = in_gif_encapcheck(m, off, proto, arg); - break; -#endif -#ifdef INET6 - case 6: - if (m->m_pkthdr.len < sizeof(struct ip6_hdr)) - goto done; - if (sc->gif_family != AF_INET6) - goto done; - ret = in6_gif_encapcheck(m, off, proto, arg); - break; -#endif - } -done: - GIF_RUNLOCK(sc); - return (ret); + free(hash, M_GIF); } static int @@ -357,6 +280,7 @@ gif_transmit(struct ifnet *ifp, struct mbuf *m) } #endif error = ENETDOWN; + GIF_RLOCK(); sc = ifp->if_softc; if ((ifp->if_flags & IFF_MONITOR) != 0 || (ifp->if_flags & IFF_UP) == 0 || @@ -444,6 +368,7 @@ gif_transmit(struct ifnet *ifp, struct mbuf *m) err: if (error) if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); + GIF_RUNLOCK(); return (error); } @@ -616,7 +541,8 @@ gif_input(struct mbuf *m, struct ifnet *ifp, int proto break; #endif case AF_LINK: - n = sizeof(struct etherip_header) + sizeof(struct ether_header); + n = sizeof(struct etherip_header) + + sizeof(struct ether_header); if (n > m->m_len) m = m_pullup(m, n); if (m == NULL) @@ -674,20 +600,11 @@ drop: if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); } -/* XXX how should we handle IPv6 scope on SIOC[GS]IFPHYADDR? */ -int +static int gif_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { - GIF_RLOCK_TRACKER; struct ifreq *ifr = (struct ifreq*)data; - struct sockaddr *dst, *src; struct gif_softc *sc; -#ifdef INET - struct sockaddr_in *sin = NULL; -#endif -#ifdef INET6 - struct sockaddr_in6 *sin6 = NULL; -#endif u_int options; int error; @@ -715,176 +632,25 @@ gif_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) } error = 0; switch (cmd) { - case SIOCSIFPHYADDR: -#ifdef INET6 - case SIOCSIFPHYADDR_IN6: -#endif - error = EINVAL; - switch (cmd) { -#ifdef INET - case SIOCSIFPHYADDR: - src = (struct sockaddr *) - &(((struct in_aliasreq *)data)->ifra_addr); - dst = (struct sockaddr *) - &(((struct in_aliasreq *)data)->ifra_dstaddr); + case SIOCDIFPHYADDR: + if (sc->gif_family == 0) break; -#endif -#ifdef INET6 - case SIOCSIFPHYADDR_IN6: - src = (struct sockaddr *) - &(((struct in6_aliasreq *)data)->ifra_addr); - dst = (struct sockaddr *) - &(((struct in6_aliasreq *)data)->ifra_dstaddr); - break; -#endif - default: - goto bad; - } - /* sa_family must be equal */ - if (src->sa_family != dst->sa_family || - src->sa_len != dst->sa_len) - goto bad; - - /* validate sa_len */ - /* check sa_family looks sane for the cmd */ - switch (src->sa_family) { -#ifdef INET - case AF_INET: - if (src->sa_len != sizeof(struct sockaddr_in)) - goto bad; - if (cmd != SIOCSIFPHYADDR) { - error = EAFNOSUPPORT; - goto bad; - } - if (satosin(src)->sin_addr.s_addr == INADDR_ANY || - satosin(dst)->sin_addr.s_addr == INADDR_ANY) { - error = EADDRNOTAVAIL; - goto bad; - } - break; -#endif -#ifdef INET6 - case AF_INET6: - if (src->sa_len != sizeof(struct sockaddr_in6)) - goto bad; - if (cmd != SIOCSIFPHYADDR_IN6) { - error = EAFNOSUPPORT; - goto bad; - } - error = EADDRNOTAVAIL; - if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(src)->sin6_addr) - || - IN6_IS_ADDR_UNSPECIFIED(&satosin6(dst)->sin6_addr)) - goto bad; - /* - * Check validity of the scope zone ID of the - * addresses, and convert it into the kernel - * internal form if necessary. - */ - error = sa6_embedscope(satosin6(src), 0); - if (error != 0) - goto bad; - error = sa6_embedscope(satosin6(dst), 0); - if (error != 0) - goto bad; - break; -#endif - default: - error = EAFNOSUPPORT; - goto bad; - } - error = gif_set_tunnel(ifp, src, dst); + gif_delete_tunnel(sc); break; - case SIOCDIFPHYADDR: - gif_delete_tunnel(ifp); - break; +#ifdef INET + case SIOCSIFPHYADDR: case SIOCGIFPSRCADDR: case SIOCGIFPDSTADDR: + error = in_gif_ioctl(sc, cmd, data); + break; +#endif #ifdef INET6 + case SIOCSIFPHYADDR_IN6: case SIOCGIFPSRCADDR_IN6: case SIOCGIFPDSTADDR_IN6: -#endif - if (sc->gif_family == 0) { - error = EADDRNOTAVAIL; - break; - } - GIF_RLOCK(sc); - switch (cmd) { -#ifdef INET - case SIOCGIFPSRCADDR: - case SIOCGIFPDSTADDR: - if (sc->gif_family != AF_INET) { - error = EADDRNOTAVAIL; - break; - } - sin = (struct sockaddr_in *)&ifr->ifr_addr; - memset(sin, 0, sizeof(*sin)); - sin->sin_family = AF_INET; - sin->sin_len = sizeof(*sin); - break; -#endif -#ifdef INET6 - case SIOCGIFPSRCADDR_IN6: - case SIOCGIFPDSTADDR_IN6: - if (sc->gif_family != AF_INET6) { - error = EADDRNOTAVAIL; - break; - } - sin6 = (struct sockaddr_in6 *) - &(((struct in6_ifreq *)data)->ifr_addr); - memset(sin6, 0, sizeof(*sin6)); - sin6->sin6_family = AF_INET6; - sin6->sin6_len = sizeof(*sin6); - break; -#endif - default: - error = EAFNOSUPPORT; - } - if (error == 0) { - switch (cmd) { -#ifdef INET - case SIOCGIFPSRCADDR: - sin->sin_addr = sc->gif_iphdr->ip_src; - break; - case SIOCGIFPDSTADDR: - sin->sin_addr = sc->gif_iphdr->ip_dst; - break; -#endif -#ifdef INET6 - case SIOCGIFPSRCADDR_IN6: - sin6->sin6_addr = sc->gif_ip6hdr->ip6_src; - break; - case SIOCGIFPDSTADDR_IN6: - sin6->sin6_addr = sc->gif_ip6hdr->ip6_dst; - break; -#endif - } - } - GIF_RUNLOCK(sc); - if (error != 0) - break; - switch (cmd) { -#ifdef INET - case SIOCGIFPSRCADDR: - case SIOCGIFPDSTADDR: - error = prison_if(curthread->td_ucred, - (struct sockaddr *)sin); - if (error != 0) - memset(sin, 0, sizeof(*sin)); - break; -#endif -#ifdef INET6 - case SIOCGIFPSRCADDR_IN6: - case SIOCGIFPDSTADDR_IN6: - error = prison_if(curthread->td_ucred, - (struct sockaddr *)sin6); - if (error == 0) - error = sa6_recoverscope(sin6); - if (error != 0) - memset(sin6, 0, sizeof(*sin6)); -#endif - } + error = in6_gif_ioctl(sc, cmd, data); break; +#endif case SIOCGTUNFIB: ifr->ifr_fib = sc->gif_fibnum; break; @@ -908,171 +674,63 @@ gif_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) sizeof(options)); if (error) break; - if (options & ~GIF_OPTMASK) + if (options & ~GIF_OPTMASK) { error = EINVAL; - else - sc->gif_options = options; - break; - default: - error = EINVAL; - break; - } -bad: - sx_xunlock(&gif_ioctl_sx); - return (error); -} - -static void -gif_detach(struct gif_softc *sc, int family) -{ - - sx_assert(&gif_ioctl_sx, SA_XLOCKED); - if (sc->gif_ecookie != NULL) { - switch (family) { -#ifdef INET - case AF_INET: - ip_encap_detach(sc->gif_ecookie); break; -#endif -#ifdef INET6 - case AF_INET6: - ip6_encap_detach(sc->gif_ecookie); - break; -#endif } - } - sc->gif_ecookie = NULL; -} - -static int -gif_attach(struct gif_softc *sc, int af) -{ - - sx_assert(&gif_ioctl_sx, SA_XLOCKED); - switch (af) { + if (sc->gif_options != options) { + switch (sc->gif_family) { #ifdef INET - case AF_INET: - return (in_gif_attach(sc)); + case AF_INET: + error = in_gif_setopts(sc, options); + break; #endif #ifdef INET6 - case AF_INET6: - return (in6_gif_attach(sc)); + case AF_INET6: + error = in6_gif_setopts(sc, options); + break; #endif - } - return (EAFNOSUPPORT); -} - -static int -gif_set_tunnel(struct ifnet *ifp, struct sockaddr *src, struct sockaddr *dst) -{ - struct gif_softc *sc = ifp->if_softc; - struct gif_softc *tsc; -#ifdef INET - struct ip *ip; -#endif -#ifdef INET6 - struct ip6_hdr *ip6; -#endif - void *hdr; - int error = 0; - - if (sc == NULL) - return (ENXIO); - /* Disallow parallel tunnels unless instructed otherwise. */ - if (V_parallel_tunnels == 0) { - GIF_LIST_LOCK(); - LIST_FOREACH(tsc, &V_gif_softc_list, gif_list) { - if (tsc == sc || tsc->gif_family != src->sa_family) - continue; -#ifdef INET - if (tsc->gif_family == AF_INET && - tsc->gif_iphdr->ip_src.s_addr == - satosin(src)->sin_addr.s_addr && - tsc->gif_iphdr->ip_dst.s_addr == - satosin(dst)->sin_addr.s_addr) { - error = EADDRNOTAVAIL; - GIF_LIST_UNLOCK(); - goto bad; + default: + /* No need to invoke AF-handler */ + sc->gif_options = options; } -#endif -#ifdef INET6 - if (tsc->gif_family == AF_INET6 && - IN6_ARE_ADDR_EQUAL(&tsc->gif_ip6hdr->ip6_src, - &satosin6(src)->sin6_addr) && - IN6_ARE_ADDR_EQUAL(&tsc->gif_ip6hdr->ip6_dst, - &satosin6(dst)->sin6_addr)) { - error = EADDRNOTAVAIL; - GIF_LIST_UNLOCK(); - goto bad; - } -#endif } - GIF_LIST_UNLOCK(); + break; + default: + error = EINVAL; + break; } - switch (src->sa_family) { + if (error == 0 && sc->gif_family != 0) { + if ( #ifdef INET - case AF_INET: - hdr = ip = malloc(sizeof(struct ip), M_GIF, - M_WAITOK | M_ZERO); - ip->ip_src.s_addr = satosin(src)->sin_addr.s_addr; - ip->ip_dst.s_addr = satosin(dst)->sin_addr.s_addr; - break; + cmd == SIOCSIFPHYADDR || #endif #ifdef INET6 - case AF_INET6: - hdr = ip6 = malloc(sizeof(struct ip6_hdr), M_GIF, - M_WAITOK | M_ZERO); - ip6->ip6_src = satosin6(src)->sin6_addr; - ip6->ip6_dst = satosin6(dst)->sin6_addr; - ip6->ip6_vfc = IPV6_VERSION; - break; + cmd == SIOCSIFPHYADDR_IN6 || #endif - default: - return (EAFNOSUPPORT); + 0) { + ifp->if_drv_flags |= IFF_DRV_RUNNING; + if_link_state_change(ifp, LINK_STATE_UP); + } } - - if (sc->gif_family != src->sa_family) - gif_detach(sc, sc->gif_family); - if (sc->gif_family == 0 || - sc->gif_family != src->sa_family) - error = gif_attach(sc, src->sa_family); - - GIF_WLOCK(sc); - if (sc->gif_family != 0) - free(sc->gif_hdr, M_GIF); - sc->gif_family = src->sa_family; - sc->gif_hdr = hdr; - GIF_WUNLOCK(sc); -#if defined(INET) || defined(INET6) bad: -#endif - if (error == 0 && sc->gif_family != 0) { - ifp->if_drv_flags |= IFF_DRV_RUNNING; - if_link_state_change(ifp, LINK_STATE_UP); - } else { - ifp->if_drv_flags &= ~IFF_DRV_RUNNING; - if_link_state_change(ifp, LINK_STATE_DOWN); - } + sx_xunlock(&gif_ioctl_sx); return (error); } static void -gif_delete_tunnel(struct ifnet *ifp) +gif_delete_tunnel(struct gif_softc *sc) { - struct gif_softc *sc = ifp->if_softc; - int family; - if (sc == NULL) - return; - - GIF_WLOCK(sc); - family = sc->gif_family; - sc->gif_family = 0; - GIF_WUNLOCK(sc); - if (family != 0) { - gif_detach(sc, family); + sx_assert(&gif_ioctl_sx, SA_XLOCKED); + if (sc->gif_family != 0) { + CK_LIST_REMOVE(sc, chain); + /* Wait until it become safe to free gif_hdr */ + GIF_WAIT(); free(sc->gif_hdr, M_GIF); } - ifp->if_drv_flags &= ~IFF_DRV_RUNNING; - if_link_state_change(ifp, LINK_STATE_DOWN); + sc->gif_family = 0; + GIF2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING; + if_link_state_change(GIF2IFP(sc), LINK_STATE_DOWN); } + Modified: head/sys/net/if_gif.h ============================================================================== --- head/sys/net/if_gif.h Tue Jun 5 20:54:29 2018 (r334672) +++ head/sys/net/if_gif.h Tue Jun 5 21:24:59 2018 (r334673) @@ -5,6 +5,7 @@ * SPDX-License-Identifier: BSD-3-Clause * * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. + * Copyright (c) 2018 Andrey V. Elsukov <a...@freebsd.org> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -36,14 +37,9 @@ #define _NET_IF_GIF_H_ #ifdef _KERNEL -#include "opt_inet.h" -#include "opt_inet6.h" -#include <netinet/in.h> - struct ip; struct ip6_hdr; -struct encaptab; extern void (*ng_gif_input_p)(struct ifnet *ifp, struct mbuf **mp, int af); @@ -55,8 +51,6 @@ extern void (*ng_gif_detach_p)(struct ifnet *ifp); struct gif_softc { struct ifnet *gif_ifp; - struct rmlock gif_lock; - const struct encaptab *gif_ecookie; int gif_family; int gif_flags; u_int gif_fibnum; @@ -65,28 +59,22 @@ struct gif_softc { union { void *hdr; struct ip *iphdr; -#ifdef INET6 struct ip6_hdr *ip6hdr; -#endif } gif_uhdr; - LIST_ENTRY(gif_softc) gif_list; /* all gif's are linked */ + + CK_LIST_ENTRY(gif_softc) chain; }; -#define GIF2IFP(sc) ((sc)->gif_ifp) -#define GIF_LOCK_INIT(sc) rm_init(&(sc)->gif_lock, "gif softc") -#define GIF_LOCK_DESTROY(sc) rm_destroy(&(sc)->gif_lock) -#define GIF_RLOCK_TRACKER struct rm_priotracker gif_tracker -#define GIF_RLOCK(sc) rm_rlock(&(sc)->gif_lock, &gif_tracker) -#define GIF_RUNLOCK(sc) rm_runlock(&(sc)->gif_lock, &gif_tracker) -#define GIF_RLOCK_ASSERT(sc) rm_assert(&(sc)->gif_lock, RA_RLOCKED) -#define GIF_WLOCK(sc) rm_wlock(&(sc)->gif_lock) -#define GIF_WUNLOCK(sc) rm_wunlock(&(sc)->gif_lock) -#define GIF_WLOCK_ASSERT(sc) rm_assert(&(sc)->gif_lock, RA_WLOCKED) +CK_LIST_HEAD(gif_list, gif_softc); +MALLOC_DECLARE(M_GIF); +#ifndef GIF_HASH_SIZE +#define GIF_HASH_SIZE (1 << 4) +#endif + +#define GIF2IFP(sc) ((sc)->gif_ifp) #define gif_iphdr gif_uhdr.iphdr #define gif_hdr gif_uhdr.hdr -#ifdef INET6 #define gif_ip6hdr gif_uhdr.ip6hdr -#endif #define GIF_MTU (1280) /* Default MTU */ #define GIF_MTU_MIN (1280) /* Minimum MTU */ @@ -108,21 +96,29 @@ struct etherip_header { /* mbuf adjust factor to force 32-bit alignment of IP header */ #define ETHERIP_ALIGN 2 +#define GIF_RLOCK() epoch_enter_preempt(net_epoch_preempt) +#define GIF_RUNLOCK() epoch_exit_preempt(net_epoch_preempt) +#define GIF_WAIT() epoch_wait_preempt(net_epoch_preempt) + /* Prototypes */ +struct gif_list *gif_hashinit(void); +void gif_hashdestroy(struct gif_list *); + void gif_input(struct mbuf *, struct ifnet *, int, uint8_t); int gif_output(struct ifnet *, struct mbuf *, const struct sockaddr *, struct route *); -int gif_encapcheck(const struct mbuf *, int, int, void *); -#ifdef INET + +void in_gif_init(void); +void in_gif_uninit(void); int in_gif_output(struct ifnet *, struct mbuf *, int, uint8_t); -int in_gif_encapcheck(const struct mbuf *, int, int, void *); -int in_gif_attach(struct gif_softc *); -#endif -#ifdef INET6 +int in_gif_ioctl(struct gif_softc *, u_long, caddr_t); +int in_gif_setopts(struct gif_softc *, u_int); + +void in6_gif_init(void); +void in6_gif_uninit(void); int in6_gif_output(struct ifnet *, struct mbuf *, int, uint8_t); -int in6_gif_encapcheck(const struct mbuf *, int, int, void *); -int in6_gif_attach(struct gif_softc *); -#endif +int in6_gif_ioctl(struct gif_softc *, u_long, caddr_t); +int in6_gif_setopts(struct gif_softc *, u_int); #endif /* _KERNEL */ #define GIFGOPTS _IOWR('i', 150, struct ifreq) Modified: head/sys/netinet/in_gif.c ============================================================================== --- head/sys/netinet/in_gif.c Tue Jun 5 20:54:29 2018 (r334672) +++ head/sys/netinet/in_gif.c Tue Jun 5 21:24:59 2018 (r334673) @@ -2,6 +2,7 @@ * SPDX-License-Identifier: BSD-3-Clause * * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. + * Copyright (c) 2018 Andrey V. Elsukov <a...@freebsd.org> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -38,9 +39,8 @@ __FBSDID("$FreeBSD$"); #include "opt_inet6.h" #include <sys/param.h> -#include <sys/lock.h> -#include <sys/rmlock.h> #include <sys/systm.h> +#include <sys/jail.h> #include <sys/socket.h> #include <sys/sockio.h> #include <sys/mbuf.h> @@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$"); #include <sys/sysctl.h> #include <sys/malloc.h> +#include <net/ethernet.h> #include <net/if.h> #include <net/if_var.h> #include <net/route.h> @@ -75,15 +76,155 @@ static VNET_DEFINE(int, ip_gif_ttl) = GIF_TTL; SYSCTL_INT(_net_inet_ip, IPCTL_GIF_TTL, gifttl, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip_gif_ttl), 0, "Default TTL value for encapsulated packets"); +/* + * We keep interfaces in a hash table using src+dst as key. + * Interfaces with GIF_IGNORE_SOURCE flag are linked into plain list. + */ +static VNET_DEFINE(struct gif_list *, ipv4_hashtbl) = NULL; +static VNET_DEFINE(struct gif_list, ipv4_list) = CK_LIST_HEAD_INITIALIZER(); +#define V_ipv4_hashtbl VNET(ipv4_hashtbl) +#define V_ipv4_list VNET(ipv4_list) + +#define GIF_HASH(src, dst) (V_ipv4_hashtbl[\ + in_gif_hashval((src), (dst)) & (GIF_HASH_SIZE - 1)]) +#define GIF_HASH_SC(sc) GIF_HASH((sc)->gif_iphdr->ip_src.s_addr,\ + (sc)->gif_iphdr->ip_dst.s_addr) +static uint32_t +in_gif_hashval(in_addr_t src, in_addr_t dst) +{ + uint32_t ret; + + ret = fnv_32_buf(&src, sizeof(src), FNV1_32_INIT); + return (fnv_32_buf(&dst, sizeof(dst), ret)); +} + +static int +in_gif_checkdup(const struct gif_softc *sc, in_addr_t src, in_addr_t dst) +{ + struct gif_softc *tmp; + + if (sc->gif_family == AF_INET && + sc->gif_iphdr->ip_src.s_addr == src && + sc->gif_iphdr->ip_dst.s_addr == dst) + return (EEXIST); + + CK_LIST_FOREACH(tmp, &GIF_HASH(src, dst), chain) { + if (tmp == sc) + continue; + if (tmp->gif_iphdr->ip_src.s_addr == src && + tmp->gif_iphdr->ip_dst.s_addr == dst) + return (EADDRNOTAVAIL); + } + return (0); +} + +static void +in_gif_attach(struct gif_softc *sc) +{ + + if (sc->gif_options & GIF_IGNORE_SOURCE) + CK_LIST_INSERT_HEAD(&V_ipv4_list, sc, chain); + else + CK_LIST_INSERT_HEAD(&GIF_HASH_SC(sc), sc, chain); +} + int +in_gif_setopts(struct gif_softc *sc, u_int options) +{ + + /* NOTE: we are protected with gif_ioctl_sx lock */ + MPASS(sc->gif_family == AF_INET); + MPASS(sc->gif_options != options); + + if ((options & GIF_IGNORE_SOURCE) != + (sc->gif_options & GIF_IGNORE_SOURCE)) { + CK_LIST_REMOVE(sc, chain); + sc->gif_options = options; + in_gif_attach(sc); + } + return (0); +} + +int +in_gif_ioctl(struct gif_softc *sc, u_long cmd, caddr_t data) +{ + struct ifreq *ifr = (struct ifreq *)data; + struct sockaddr_in *dst, *src; + struct ip *ip; + int error; + + /* NOTE: we are protected with gif_ioctl_sx lock */ + error = EINVAL; + switch (cmd) { + case SIOCSIFPHYADDR: + src = &((struct in_aliasreq *)data)->ifra_addr; + dst = &((struct in_aliasreq *)data)->ifra_dstaddr; + + /* sanity checks */ + if (src->sin_family != dst->sin_family || + src->sin_family != AF_INET || + src->sin_len != dst->sin_len || + src->sin_len != sizeof(*src)) + break; + if (src->sin_addr.s_addr == INADDR_ANY || + dst->sin_addr.s_addr == INADDR_ANY) { + error = EADDRNOTAVAIL; + break; + } + if (V_ipv4_hashtbl == NULL) + V_ipv4_hashtbl = gif_hashinit(); + error = in_gif_checkdup(sc, src->sin_addr.s_addr, + dst->sin_addr.s_addr); + if (error == EADDRNOTAVAIL) + break; + if (error == EEXIST) { + /* Addresses are the same. Just return. */ + error = 0; + break; + } + ip = malloc(sizeof(*ip), M_GIF, M_WAITOK | M_ZERO); + ip->ip_src.s_addr = src->sin_addr.s_addr; + ip->ip_dst.s_addr = dst->sin_addr.s_addr; + if (sc->gif_family != 0) { + /* Detach existing tunnel first */ + CK_LIST_REMOVE(sc, chain); + GIF_WAIT(); + free(sc->gif_hdr, M_GIF); + /* XXX: should we notify about link state change? */ + } + sc->gif_family = AF_INET; + sc->gif_iphdr = ip; + in_gif_attach(sc); + break; *** DIFF OUTPUT TRUNCATED AT 1000 LINES *** _______________________________________________ svn-src-all@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"