Author: glebius
Date: Wed May  8 23:39:24 2019
New Revision: 347375
URL: https://svnweb.freebsd.org/changeset/base/347375

Log:
  Existense of PCB route caching doesn't allow us to use new fast route
  lookup KPI in ip_output() like it is already used in ip_forward().
  However, when there is no PCB provided we can use fast KPI, gaining
  performance advantage.
  
  Typical case when ip_output() is called without a PCB pointer is a
  sendto(2) on a not connected UDP socket. In practice DNS servers do
  this.
  
  Reviewed by:  melifaro
  Differential Revision:        https://reviews.freebsd.org/D19804

Modified:
  head/sys/net/route.h
  head/sys/net/route_var.h
  head/sys/netinet/in_fib.c
  head/sys/netinet/in_fib.h
  head/sys/netinet/ip_output.c

Modified: head/sys/net/route.h
==============================================================================
--- head/sys/net/route.h        Wed May  8 23:24:47 2019        (r347374)
+++ head/sys/net/route.h        Wed May  8 23:39:24 2019        (r347375)
@@ -210,6 +210,7 @@ struct rtentry {
 #define        NHF_DEFAULT             0x0080  /* Default route */
 #define        NHF_BROADCAST           0x0100  /* RTF_BROADCAST */
 #define        NHF_GATEWAY             0x0200  /* RTF_GATEWAY */
+#define        NHF_HOST                0x0400  /* RTF_HOST */
 
 /* Nexthop request flags */
 #define        NHR_IFAIF               0x01    /* Return ifa_ifp interface */

Modified: head/sys/net/route_var.h
==============================================================================
--- head/sys/net/route_var.h    Wed May  8 23:24:47 2019        (r347374)
+++ head/sys/net/route_var.h    Wed May  8 23:39:24 2019        (r347375)
@@ -67,6 +67,7 @@ fib_rte_to_nh_flags(int rt_flags)
        uint16_t res;
 
        res = (rt_flags & RTF_REJECT) ? NHF_REJECT : 0;
+       res |= (rt_flags & RTF_HOST) ? NHF_HOST : 0;
        res |= (rt_flags & RTF_BLACKHOLE) ? NHF_BLACKHOLE : 0;
        res |= (rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) ? NHF_REDIRECT : 0;
        res |= (rt_flags & RTF_BROADCAST) ? NHF_BROADCAST : 0;

Modified: head/sys/netinet/in_fib.c
==============================================================================
--- head/sys/netinet/in_fib.c   Wed May  8 23:24:47 2019        (r347374)
+++ head/sys/netinet/in_fib.c   Wed May  8 23:39:24 2019        (r347375)
@@ -96,7 +96,6 @@ fib4_rte_to_nh_extended(struct rtentry *rte, struct in
     uint32_t flags, struct nhop4_extended *pnh4)
 {
        struct sockaddr_in *gw;
-       struct in_ifaddr *ia;
 
        if ((flags & NHR_IFAIF) != 0)
                pnh4->nh_ifp = rte->rt_ifa->ifa_ifp;
@@ -113,10 +112,8 @@ fib4_rte_to_nh_extended(struct rtentry *rte, struct in
        gw = (struct sockaddr_in *)rt_key(rte);
        if (gw->sin_addr.s_addr == 0)
                pnh4->nh_flags |= NHF_DEFAULT;
-       /* XXX: Set RTF_BROADCAST if GW address is broadcast */
-
-       ia = ifatoia(rte->rt_ifa);
-       pnh4->nh_src = IA_SIN(ia)->sin_addr;
+       pnh4->nh_ia = ifatoia(rte->rt_ifa);
+       pnh4->nh_src = IA_SIN(pnh4->nh_ia)->sin_addr;
 }
 
 /*

Modified: head/sys/netinet/in_fib.h
==============================================================================
--- head/sys/netinet/in_fib.h   Wed May  8 23:24:47 2019        (r347374)
+++ head/sys/netinet/in_fib.h   Wed May  8 23:39:24 2019        (r347375)
@@ -43,12 +43,13 @@ struct nhop4_basic {
 /* Extended nexthop info used for control protocols */
 struct nhop4_extended {
        struct ifnet    *nh_ifp;        /* Logical egress interface */
+       struct in_ifaddr *nh_ia;        /* Associated address */
        uint16_t        nh_mtu;         /* nexthop mtu */
        uint16_t        nh_flags;       /* nhop flags */
        uint8_t         spare[4];
        struct in_addr  nh_addr;        /* GW/DST IPv4 address */
        struct in_addr  nh_src;         /* default source IPv4 address */
-       uint64_t        spare2[2];
+       uint64_t        spare2;
 };
 
 int fib4_lookup_nh_basic(uint32_t fibnum, struct in_addr dst, uint32_t flags,

Modified: head/sys/netinet/ip_output.c
==============================================================================
--- head/sys/netinet/ip_output.c        Wed May  8 23:24:47 2019        
(r347374)
+++ head/sys/netinet/ip_output.c        Wed May  8 23:39:24 2019        
(r347375)
@@ -72,6 +72,7 @@ __FBSDID("$FreeBSD$");
 #include <net/vnet.h>
 
 #include <netinet/in.h>
+#include <netinet/in_fib.h>
 #include <netinet/in_kdtrace.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
@@ -227,13 +228,12 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct rou
        int hlen = sizeof (struct ip);
        int mtu;
        int error = 0;
-       struct sockaddr_in *dst;
+       struct sockaddr_in *dst, sin;
        const struct sockaddr_in *gw;
        struct in_ifaddr *ia;
+       struct in_addr src;
        int isbroadcast;
        uint16_t ip_len, ip_off;
-       struct route iproute;
-       struct rtentry *rte;    /* cache for ro->ro_rt */
        uint32_t fibnum;
 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
        int no_route_but_check_spd = 0;
@@ -252,11 +252,6 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct rou
 #endif
        }
 
-       if (ro == NULL) {
-               ro = &iproute;
-               bzero(ro, sizeof (*ro));
-       }
-
        if (opt) {
                int len = 0;
                m = ip_insertoptions(m, opt, &len);
@@ -281,26 +276,28 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct rou
        /*
         * dst/gw handling:
         *
-        * dst can be rewritten but always points to &ro->ro_dst.
         * gw is readonly but can point either to dst OR rt_gateway,
         * therefore we need restore gw if we're redoing lookup.
         */
-       gw = dst = (struct sockaddr_in *)&ro->ro_dst;
        fibnum = (inp != NULL) ? inp->inp_inc.inc_fibnum : M_GETFIB(m);
-       rte = ro->ro_rt;
-       if (rte == NULL) {
+       if (ro != NULL)
+               dst = (struct sockaddr_in *)&ro->ro_dst;
+       else
+               dst = &sin;
+       if (ro == NULL || ro->ro_rt == NULL) {
                bzero(dst, sizeof(*dst));
                dst->sin_family = AF_INET;
                dst->sin_len = sizeof(*dst);
                dst->sin_addr = ip->ip_dst;
        }
+       gw = dst;
        NET_EPOCH_ENTER(et);
 again:
        /*
         * Validate route against routing table additions;
         * a better/more specific route might have been added.
         */
-       if (inp)
+       if (inp != NULL && ro != NULL && ro->ro_rt != NULL)
                RT_VALIDATE(ro, &inp->inp_rt_cookie, fibnum);
        /*
         * If there is a cached route,
@@ -310,15 +307,12 @@ again:
         * cache with IPv6.
         * Also check whether routing cache needs invalidation.
         */
-       rte = ro->ro_rt;
-       if (rte && ((rte->rt_flags & RTF_UP) == 0 ||
-                   rte->rt_ifp == NULL ||
-                   !RT_LINK_IS_UP(rte->rt_ifp) ||
-                         dst->sin_family != AF_INET ||
-                         dst->sin_addr.s_addr != ip->ip_dst.s_addr)) {
+       if (ro != NULL && ro->ro_rt != NULL &&
+           ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
+           ro->ro_rt->rt_ifp == NULL || !RT_LINK_IS_UP(ro->ro_rt->rt_ifp) ||
+           dst->sin_family != AF_INET ||
+           dst->sin_addr.s_addr != ip->ip_dst.s_addr))
                RO_INVALIDATE_CACHE(ro);
-               rte = NULL;
-       }
        ia = NULL;
        /*
         * If routing to interface only, short circuit routing lookup.
@@ -338,8 +332,10 @@ again:
                ip->ip_dst.s_addr = INADDR_BROADCAST;
                dst->sin_addr = ip->ip_dst;
                ifp = ia->ia_ifp;
+               mtu = ifp->if_mtu;
                ip->ip_ttl = 1;
                isbroadcast = 1;
+               src = IA_SIN(ia)->sin_addr;
        } else if (flags & IP_ROUTETOIF) {
                if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst),
                                                    M_GETFIB(m)))) == NULL &&
@@ -350,9 +346,11 @@ again:
                        goto bad;
                }
                ifp = ia->ia_ifp;
+               mtu = ifp->if_mtu;
                ip->ip_ttl = 1;
                isbroadcast = ifp->if_flags & IFF_BROADCAST ?
                    in_ifaddr_broadcast(dst->sin_addr, ia) : 0;
+               src = IA_SIN(ia)->sin_addr;
        } else if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) &&
            imo != NULL && imo->imo_multicast_ifp != NULL) {
                /*
@@ -360,15 +358,17 @@ again:
                 * packets if the interface is specified.
                 */
                ifp = imo->imo_multicast_ifp;
+               mtu = ifp->if_mtu;
                IFP_TO_IA(ifp, ia, &in_ifa_tracker);
                isbroadcast = 0;        /* fool gcc */
-       } else {
-               /*
-                * We want to do any cloning requested by the link layer,
-                * as this is probably required in all cases for correct
-                * operation (as it is for ARP).
-                */
-               if (rte == NULL) {
+               src = IA_SIN(ia)->sin_addr;
+       } else if (ro != NULL) {
+               if (ro->ro_rt == NULL) {
+                       /*
+                        * We want to do any cloning requested by the link
+                        * layer, as this is probably required in all cases
+                        * for correct operation (as it is for ARP).
+                        */
 #ifdef RADIX_MPATH
                        rtalloc_mpath_fib(ro,
                            ntohl(ip->ip_src.s_addr ^ ip->ip_dst.s_addr),
@@ -376,12 +376,47 @@ again:
 #else
                        in_rtalloc_ign(ro, 0, fibnum);
 #endif
-                       rte = ro->ro_rt;
+                       if (ro->ro_rt == NULL ||
+                           (ro->ro_rt->rt_flags & RTF_UP) == 0 ||
+                           ro->ro_rt->rt_ifp == NULL ||
+                           !RT_LINK_IS_UP(ro->ro_rt->rt_ifp)) {
+#if defined(IPSEC) || defined(IPSEC_SUPPORT)
+                               /*
+                                * There is no route for this packet, but it is
+                                * possible that a matching SPD entry exists.
+                                */
+                               no_route_but_check_spd = 1;
+                               mtu = 0; /* Silence GCC warning. */
+                               goto sendit;
+#endif
+                               IPSTAT_INC(ips_noroute);
+                               error = EHOSTUNREACH;
+                               goto bad;
+                       }
                }
-               if (rte == NULL ||
-                   (rte->rt_flags & RTF_UP) == 0 ||
-                   rte->rt_ifp == NULL ||
-                   !RT_LINK_IS_UP(rte->rt_ifp)) {
+               ia = ifatoia(ro->ro_rt->rt_ifa);
+               ifp = ro->ro_rt->rt_ifp;
+               counter_u64_add(ro->ro_rt->rt_pksent, 1);
+               rt_update_ro_flags(ro);
+               if (ro->ro_rt->rt_flags & RTF_GATEWAY)
+                       gw = (struct sockaddr_in *)ro->ro_rt->rt_gateway;
+               if (ro->ro_rt->rt_flags & RTF_HOST)
+                       isbroadcast = (ro->ro_rt->rt_flags & RTF_BROADCAST);
+               else if (ifp->if_flags & IFF_BROADCAST)
+                       isbroadcast = in_ifaddr_broadcast(gw->sin_addr, ia);
+               else
+                       isbroadcast = 0;
+               if (ro->ro_rt->rt_flags & RTF_HOST)
+                       mtu = ro->ro_rt->rt_mtu;
+               else
+                       mtu = ifp->if_mtu;
+               src = IA_SIN(ia)->sin_addr;
+       } else {
+               struct nhop4_extended nh;
+
+               bzero(&nh, sizeof(nh));
+               if (fib4_lookup_nh_ext(M_GETFIB(m), ip->ip_dst, 0, 0, &nh) !=
+                   0) {
 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
                        /*
                         * There is no route for this packet, but it is
@@ -395,31 +430,29 @@ again:
                        error = EHOSTUNREACH;
                        goto bad;
                }
-               ia = ifatoia(rte->rt_ifa);
-               ifp = rte->rt_ifp;
-               counter_u64_add(rte->rt_pksent, 1);
-               rt_update_ro_flags(ro);
-               if (rte->rt_flags & RTF_GATEWAY)
-                       gw = (struct sockaddr_in *)rte->rt_gateway;
-               if (rte->rt_flags & RTF_HOST)
-                       isbroadcast = (rte->rt_flags & RTF_BROADCAST);
-               else if (ifp->if_flags & IFF_BROADCAST)
-                       isbroadcast = in_ifaddr_broadcast(gw->sin_addr, ia);
-               else
-                       isbroadcast = 0;
+               ifp = nh.nh_ifp;
+               mtu = nh.nh_mtu;
+               /*
+                * We are rewriting here dst to be gw actually, contradicting
+                * comment at the beginning of the function. However, in this
+                * case we are always dealing with on stack dst.
+                * In case if pfil(9) sends us back to beginning of the
+                * function, the dst would be rewritten by ip_output_pfil().
+                */
+               MPASS(dst == &sin);
+               dst->sin_addr = nh.nh_addr;
+               ia = nh.nh_ia;
+               src = nh.nh_src;
+               isbroadcast = (((nh.nh_flags & (NHF_HOST | NHF_BROADCAST)) ==
+                   (NHF_HOST | NHF_BROADCAST)) ||
+                   ((ifp->if_flags & IFF_BROADCAST) &&
+                   in_ifaddr_broadcast(dst->sin_addr, ia)));
        }
 
-       /*
-        * Calculate MTU.  If we have a route that is up, use that,
-        * otherwise use the interface's MTU.
-        */
-       if (rte != NULL && (rte->rt_flags & (RTF_UP|RTF_HOST)))
-               mtu = rte->rt_mtu;
-       else
-               mtu = ifp->if_mtu;
        /* Catch a possible divide by zero later. */
-       KASSERT(mtu > 0, ("%s: mtu %d <= 0, rte=%p (rt_flags=0x%08x) ifp=%p",
-           __func__, mtu, rte, (rte != NULL) ? rte->rt_flags : 0, ifp));
+       KASSERT(mtu > 0, ("%s: mtu %d <= 0, ro=%p (rt_flags=0x%08x) ifp=%p",
+           __func__, mtu, ro,
+           (ro != NULL && ro->ro_rt != NULL) ? ro->ro_rt->rt_flags : 0, ifp));
 
        if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
                m->m_flags |= M_MCAST;
@@ -455,11 +488,8 @@ again:
                 * If source address not specified yet, use address
                 * of outgoing interface.
                 */
-               if (ip->ip_src.s_addr == INADDR_ANY) {
-                       /* Interface may have no addresses. */
-                       if (ia != NULL)
-                               ip->ip_src = IA_SIN(ia)->sin_addr;
-               }
+               if (ip->ip_src.s_addr == INADDR_ANY)
+                       ip->ip_src = src;
 
                if ((imo == NULL && in_mcast_loop) ||
                    (imo && imo->imo_multicast_loop)) {
@@ -522,12 +552,8 @@ again:
         * If the source address is not specified yet, use the address
         * of the outoing interface.
         */
-       if (ip->ip_src.s_addr == INADDR_ANY) {
-               /* Interface may have no addresses. */
-               if (ia != NULL) {
-                       ip->ip_src = IA_SIN(ia)->sin_addr;
-               }
-       }
+       if (ip->ip_src.s_addr == INADDR_ANY)
+               ip->ip_src = src;
 
        /*
         * Look for broadcast address and
@@ -587,9 +613,10 @@ sendit:
 
                case -1: /* Need to try again */
                        /* Reset everything for a new round */
-                       RO_RTFREE(ro);
-                       ro->ro_prepend = NULL;
-                       rte = NULL;
+                       if (ro != NULL) {
+                               RO_RTFREE(ro);
+                               ro->ro_prepend = NULL;
+                       }
                        gw = dst;
                        ip = mtod(m, struct ip *);
                        goto again;
@@ -733,15 +760,6 @@ sendit:
                IPSTAT_INC(ips_fragmented);
 
 done:
-       if (ro == &iproute)
-               RO_RTFREE(ro);
-       else if (rte == NULL)
-               /*
-                * If the caller supplied a route but somehow the reference
-                * to it has been released need to prevent the caller
-                * calling RTFREE on it again.
-                */
-               ro->ro_rt = NULL;
        NET_EPOCH_EXIT(et);
        return (error);
  bad:
_______________________________________________
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to