On 12/08/15(Wed) 17:03, Martin Pieuchot wrote:
> I'm currently working on the routing table interface to make is safe
> to use by multiple CPUs at the same time. The diff below is a big
> step in this direction and I'd really appreciate if people could test
> it with their usual network setup and report back.
Below is an updated version of the diff now that bluhm@ fixed the
potential loop with RTF_GATEWAY routes.
Note that regression tests will fail because we no longer call
rtalloc(9) inside rt_setgate(). In other words we do not cache
a next-hop route before using it.
Ok?
Index: net/route.c
===================================================================
RCS file: /cvs/src/sys/net/route.c,v
retrieving revision 1.268
diff -u -p -r1.268 route.c
--- net/route.c 4 Nov 2015 10:13:55 -0000 1.268
+++ net/route.c 4 Nov 2015 11:15:00 -0000
@@ -151,6 +151,7 @@ void rt_timer_init(void);
int rtflushclone1(struct rtentry *, void *, u_int);
void rtflushclone(unsigned int, struct rtentry *);
int rt_if_remove_rtdelete(struct rtentry *, void *, u_int);
+struct rtentry *rt_match(struct sockaddr *, int, unsigned int);
struct ifaddr *ifa_ifwithroute(int, struct sockaddr *, struct sockaddr *,
u_int);
@@ -194,6 +195,12 @@ rtisvalid(struct rtentry *rt)
if (rt == NULL)
return (0);
+#ifdef DIAGNOSTIC
+ if (ISSET(rt->rt_flags, RTF_GATEWAY) && (rt->rt_gwroute != NULL) &&
+ ISSET(rt->rt_gwroute->rt_flags, RTF_GATEWAY))
+ panic("next hop must be directly reachable");
+#endif
+
if ((rt->rt_flags & RTF_UP) == 0)
return (0);
@@ -201,11 +208,27 @@ rtisvalid(struct rtentry *rt)
if (rt->rt_ifa == NULL || rt->rt_ifa->ifa_ifp == NULL)
return (0);
+ if (ISSET(rt->rt_flags, RTF_GATEWAY) && !rtisvalid(rt->rt_gwroute))
+ return (0);
+
return (1);
}
+/*
+ * Do the actual lookup for rtalloc(9), do not use directly!
+ *
+ * Return the best matching entry for the destination ``dst''.
+ *
+ * "RT_RESOLVE" means that a corresponding L2 entry should
+ * be added to the routing table and resolved (via ARP or
+ * NDP), if it does not exist.
+ *
+ * "RT_REPORT" indicates that a message should be sent to
+ * userland if no matching route has been found or if an
+ * error occured while adding a L2 entry.
+ */
struct rtentry *
-rtalloc(struct sockaddr *dst, int flags, unsigned int tableid)
+rt_match(struct sockaddr *dst, int flags, unsigned int tableid)
{
struct rtentry *rt0, *rt = NULL;
struct rt_addrinfo info;
@@ -336,6 +359,76 @@ rtalloc_mpath(struct sockaddr *dst, uint
}
#endif /* SMALL_KERNEL */
+/*
+ * Look in the routing table for the best matching entry for
+ * ``dst''.
+ *
+ * If a route with a gateway is found and its next hop is no
+ * longer valid, try to cache it.
+ */
+struct rtentry *
+rtalloc(struct sockaddr *dst, int flags, unsigned int rtableid)
+{
+ struct rtentry *rt, *nhrt;
+
+ rt = rt_match(dst, flags, rtableid);
+
+ /* No match or route to host? We're done. */
+ if (rt == NULL || !ISSET(rt->rt_flags, RTF_GATEWAY))
+ return (rt);
+
+ /* Nothing to do if the next hop is valid. */
+ if (rtisvalid(rt->rt_gwroute))
+ return (rt);
+
+ rtfree(rt->rt_gwroute);
+ rt->rt_gwroute = NULL;
+
+ /*
+ * If we cannot find a valid next hop, return the route
+ * with a gateway.
+ *
+ * XXX Some dragons hiding in the tree certainly depends on
+ * this behavior. But it is safe since rt_checkgate() wont
+ * allow us to us this route later on.
+ */
+ nhrt = rt_match(rt->rt_gateway, flags | RT_RESOLVE, rtableid);
+ if (nhrt == NULL)
+ return (rt);
+
+ /*
+ * Next hop must be reachable, this also prevents rtentry
+ * loops for example when rt->rt_gwroute points to rt.
+ */
+ if (ISSET(nhrt->rt_flags, RTF_CLONING|RTF_GATEWAY)) {
+ rtfree(nhrt);
+ return (rt);
+ }
+
+ /*
+ * Next hop entry must be UP and on the same interface.
+ */
+ if (!ISSET(nhrt->rt_flags, RTF_UP) || nhrt->rt_ifidx != rt->rt_ifidx) {
+ rtfree(nhrt);
+ return (rt);
+ }
+
+ /*
+ * If the MTU of next hop is 0, this will reset the MTU of the
+ * route to run PMTUD again from scratch.
+ */
+ if (!ISSET(rt->rt_locks, RTV_MTU) && (rt->rt_mtu > nhrt->rt_mtu))
+ rt->rt_mtu = nhrt->rt_mtu;
+
+ /*
+ * Do not return the cached next-hop route, rt_checkgate() will
+ * do the magic for us.
+ */
+ rt->rt_gwroute = nhrt;
+
+ return (rt);
+}
+
void
rtref(struct rtentry *rt)
{
@@ -499,7 +592,7 @@ create:
rt->rt_flags |= RTF_MODIFIED;
flags |= RTF_MODIFIED;
stat = &rtstat.rts_newgateway;
- rt_setgate(rt, gateway, rdomain);
+ rt_setgate(rt, gateway);
}
} else
error = EHOSTUNREACH;
@@ -931,8 +1024,7 @@ rtrequest(int req, struct rt_addrinfo *i
* the routing table because the radix MPATH code use
* it to (re)order routes.
*/
- if ((error = rt_setgate(rt, info->rti_info[RTAX_GATEWAY],
- tableid))) {
+ if ((error = rt_setgate(rt, info->rti_info[RTAX_GATEWAY]))) {
free(ndst, M_RTABLE, dlen);
pool_put(&rtentry_pool, rt);
return (error);
@@ -985,7 +1077,7 @@ rtrequest(int req, struct rt_addrinfo *i
}
int
-rt_setgate(struct rtentry *rt, struct sockaddr *gate, unsigned int tableid)
+rt_setgate(struct rtentry *rt, struct sockaddr *gate)
{
int glen = ROUNDUP(gate->sa_len);
struct sockaddr *sa;
@@ -1003,22 +1095,7 @@ rt_setgate(struct rtentry *rt, struct so
rtfree(rt->rt_gwroute);
rt->rt_gwroute = NULL;
}
- if (rt->rt_flags & RTF_GATEWAY) {
- /* XXX is this actually valid to cross tables here? */
- rt->rt_gwroute = rtalloc(gate, RT_REPORT|RT_RESOLVE, tableid);
- /*
- * If we switched gateways, grab the MTU from the new
- * gateway route if the current MTU is 0 or greater
- * than the MTU of gateway.
- * Note that, if the MTU of gateway is 0, we will reset the
- * MTU of the route to run PMTUD again from scratch. XXX
- */
- if (rt->rt_gwroute && !(rt->rt_rmx.rmx_locks & RTV_MTU) &&
- rt->rt_rmx.rmx_mtu &&
- rt->rt_rmx.rmx_mtu > rt->rt_gwroute->rt_rmx.rmx_mtu) {
- rt->rt_rmx.rmx_mtu = rt->rt_gwroute->rt_rmx.rmx_mtu;
- }
- }
+
return (0);
}
@@ -1033,26 +1110,8 @@ rt_checkgate(struct ifnet *ifp, struct r
rt0 = rt;
if (rt->rt_flags & RTF_GATEWAY) {
- if (rt->rt_gwroute && !(rt->rt_gwroute->rt_flags & RTF_UP)) {
- rtfree(rt->rt_gwroute);
- rt->rt_gwroute = NULL;
- }
- if (rt->rt_gwroute == NULL) {
- rt->rt_gwroute = rtalloc(rt->rt_gateway,
- RT_REPORT|RT_RESOLVE, rtableid);
- if (rt->rt_gwroute == NULL)
- return (EHOSTUNREACH);
- }
- /*
- * Next hop must be reachable, this also prevents rtentry
- * loops, for example when rt->rt_gwroute points to rt.
- */
- if (((rt->rt_gwroute->rt_flags & (RTF_UP|RTF_GATEWAY)) !=
- RTF_UP) || (rt->rt_gwroute->rt_ifidx != ifp->if_index)) {
- rtfree(rt->rt_gwroute);
- rt->rt_gwroute = NULL;
+ if (rt->rt_gwroute == NULL)
return (EHOSTUNREACH);
- }
rt = rt->rt_gwroute;
}
Index: net/route.h
===================================================================
RCS file: /cvs/src/sys/net/route.h,v
retrieving revision 1.118
diff -u -p -r1.118 route.h
--- net/route.h 30 Oct 2015 09:39:42 -0000 1.118
+++ net/route.h 4 Nov 2015 11:00:15 -0000
@@ -119,6 +119,8 @@ struct rtentry {
};
#define rt_use rt_rmx.rmx_pksent
#define rt_expire rt_rmx.rmx_expire
+#define rt_locks rt_rmx.rmx_locks
+#define rt_mtu rt_rmx.rmx_mtu
#define RTF_UP 0x1 /* route usable */
#define RTF_GATEWAY 0x2 /* destination is a gateway */
@@ -358,7 +360,7 @@ void rt_maskedcopy(struct sockaddr *,
void rt_sendmsg(struct rtentry *, int, u_int);
void rt_sendaddrmsg(struct rtentry *, int);
void rt_missmsg(int, struct rt_addrinfo *, int, u_int, int, u_int);
-int rt_setgate(struct rtentry *, struct sockaddr *, unsigned int);
+int rt_setgate(struct rtentry *, struct sockaddr *);
int rt_checkgate(struct ifnet *, struct rtentry *, struct sockaddr *,
unsigned int, struct rtentry **);
void rt_setmetrics(u_long, struct rt_metrics *, struct rt_kmetrics *);
Index: net/rtsock.c
===================================================================
RCS file: /cvs/src/sys/net/rtsock.c,v
retrieving revision 1.181
diff -u -p -r1.181 rtsock.c
--- net/rtsock.c 2 Nov 2015 14:40:09 -0000 1.181
+++ net/rtsock.c 4 Nov 2015 11:01:17 -0000
@@ -745,9 +745,8 @@ report:
goto flush;
ifa = info.rti_ifa;
}
- if (info.rti_info[RTAX_GATEWAY] != NULL &&
- (error = rt_setgate(rt, info.rti_info[RTAX_GATEWAY],
- tableid)))
+ if (info.rti_info[RTAX_GATEWAY] != NULL && (error =
+ rt_setgate(rt, info.rti_info[RTAX_GATEWAY])))
goto flush;
if (ifa) {
if (rt->rt_ifa != ifa) {
Index: net/if_spppsubr.c
===================================================================
RCS file: /cvs/src/sys/net/if_spppsubr.c,v
retrieving revision 1.144
diff -u -p -r1.144 if_spppsubr.c
--- net/if_spppsubr.c 2 Nov 2015 11:19:30 -0000 1.144
+++ net/if_spppsubr.c 4 Nov 2015 11:03:24 -0000
@@ -4244,10 +4244,9 @@ sppp_update_gw_walker(struct rtentry *rt
if (rt->rt_ifidx == ifp->if_index) {
if (rt->rt_ifa->ifa_dstaddr->sa_family !=
rt->rt_gateway->sa_family ||
- (rt->rt_flags & RTF_GATEWAY) == 0)
+ !ISSET(rt->rt_flags, RTF_GATEWAY))
return (0); /* do not modify non-gateway routes */
- bcopy(rt->rt_ifa->ifa_dstaddr, rt->rt_gateway,
- rt->rt_ifa->ifa_dstaddr->sa_len);
+ rt_setgate(rt, rt->rt_ifa->ifa_dstaddr);
}
return (0);
}