Author: melifaro Date: Sat May 23 10:21:02 2020 New Revision: 361409 URL: https://svnweb.freebsd.org/changeset/base/361409
Log: Use epoch(9) for rtentries to simplify control plane operations. Currently the only reason of refcounting rtentries is the need to report the rtable operation details immediately after the execution. Delaying rtentry reclamation allows to stop refcounting and simplify the code. Additionally, this change allows to reimplement rib_lookup_info(), which is used by some of the customers to get the matching prefix along with nexthops, in more efficient way. The change keeps per-vnet rtzone uma zone. It adds nh_vnet field to nhop_priv to be able to reliably set curvnet even during vnet teardown. Rest of the reference counting code will be removed in the D24867 . Differential Revision: https://reviews.freebsd.org/D24866 Modified: head/sys/fs/nfsclient/nfs_clvfsops.c head/sys/net/if.c head/sys/net/route.c head/sys/net/route.h head/sys/net/route/nhop.h head/sys/net/route/nhop_ctl.c head/sys/net/route/nhop_var.h head/sys/net/route/route_var.h head/sys/net/rtsock.c head/sys/netinet6/nd6.c head/sys/netinet6/nd6_rtr.c head/sys/nfs/bootp_subr.c Modified: head/sys/fs/nfsclient/nfs_clvfsops.c ============================================================================== --- head/sys/fs/nfsclient/nfs_clvfsops.c Sat May 23 03:32:08 2020 (r361408) +++ head/sys/fs/nfsclient/nfs_clvfsops.c Sat May 23 10:21:02 2020 (r361409) @@ -465,18 +465,21 @@ nfs_mountroot(struct mount *mp) if (nd->mygateway.sin_len != 0 && nd->mygateway.sin_addr.s_addr != 0) { struct sockaddr_in mask, sin; + struct epoch_tracker et; bzero((caddr_t)&mask, sizeof(mask)); sin = mask; sin.sin_family = AF_INET; sin.sin_len = sizeof(sin); /* XXX MRT use table 0 for this sort of thing */ + NET_EPOCH_ENTER(et); CURVNET_SET(TD_TO_VNET(td)); error = rtrequest_fib(RTM_ADD, (struct sockaddr *)&sin, (struct sockaddr *)&nd->mygateway, (struct sockaddr *)&mask, RTF_UP | RTF_GATEWAY, NULL, RT_DEFAULT_FIB); CURVNET_RESTORE(); + NET_EPOCH_EXIT(et); if (error) panic("nfs_mountroot: RTM_ADD: %d", error); } Modified: head/sys/net/if.c ============================================================================== --- head/sys/net/if.c Sat May 23 03:32:08 2020 (r361408) +++ head/sys/net/if.c Sat May 23 10:21:02 2020 (r361409) @@ -1854,18 +1854,17 @@ ifa_maintain_loopback_route(int cmd, const char *otype ifp = ifa->ifa_ifp; + NET_EPOCH_ENTER(et); bzero(&info, sizeof(info)); if (cmd != RTM_DELETE) info.rti_ifp = V_loif; if (cmd == RTM_ADD) { /* explicitly specify (loopback) ifa */ if (info.rti_ifp != NULL) { - NET_EPOCH_ENTER(et); rti_ifa = ifaof_ifpforaddr(ifa->ifa_addr, info.rti_ifp); if (rti_ifa != NULL) ifa_ref(rti_ifa); info.rti_ifa = rti_ifa; - NET_EPOCH_EXIT(et); } } info.rti_flags = ifa->ifa_flags | RTF_HOST | RTF_STATIC | RTF_PINNED; @@ -1874,6 +1873,7 @@ ifa_maintain_loopback_route(int cmd, const char *otype link_init_sdl(ifp, (struct sockaddr *)&null_sdl, ifp->if_type); error = rtrequest1_fib(cmd, &info, NULL, ifp->if_fib); + NET_EPOCH_EXIT(et); if (rti_ifa != NULL) ifa_free(rti_ifa); Modified: head/sys/net/route.c ============================================================================== --- head/sys/net/route.c Sat May 23 03:32:08 2020 (r361408) +++ head/sys/net/route.c Sat May 23 10:21:02 2020 (r361409) @@ -120,10 +120,7 @@ VNET_PCPUSTAT_SYSUNINIT(rtstat); VNET_DEFINE(struct rib_head *, rt_tables); #define V_rt_tables VNET(rt_tables) -VNET_DEFINE(int, rttrash); /* routes not in table but not freed */ -#define V_rttrash VNET(rttrash) - /* * Convert a 'struct radix_node *' to a 'struct rtentry *'. * The operation can be done safely (in this code) because a @@ -148,6 +145,7 @@ static int rt_ifdelroute(const struct rtentry *rt, con static struct rtentry *rt_unlinkrte(struct rib_head *rnh, struct rt_addrinfo *info, int *perror); static void rt_notifydelete(struct rtentry *rt, struct rt_addrinfo *info); +static void destroy_rtentry_epoch(epoch_context_t ctx); #ifdef RADIX_MPATH static struct radix_node *rt_mpath_unlink(struct rib_head *rnh, struct rt_addrinfo *info, struct rtentry *rto, int *perror); @@ -332,6 +330,16 @@ vnet_route_uninit(const void *unused __unused) } } + /* + * dom_rtdetach calls rt_table_destroy(), which + * schedules deletion for all rtentries, nexthops and control + * structures. Wait for the destruction callbacks to fire. + * Note that this should result in freeing all rtentries, but + * nexthops deletions will be scheduled for the next epoch run + * and will be completed after vnet teardown. + */ + epoch_drain_callbacks(net_epoch_preempt); + free(V_rt_tables, M_RTABLE); uma_zdestroy(V_rtzone); } @@ -449,41 +457,54 @@ rtfree(struct rtentry *rt) if ((rt->rt_flags & RTF_UP) == 0) { if (rt->rt_nodes->rn_flags & (RNF_ACTIVE | RNF_ROOT)) panic("rtfree 2"); - /* - * the rtentry must have been removed from the routing table - * so it is represented in rttrash.. remove that now. - */ - V_rttrash--; #ifdef DIAGNOSTIC if (rt->rt_refcnt < 0) { printf("rtfree: %p not freed (neg refs)\n", rt); goto done; } #endif + epoch_call(net_epoch_preempt, destroy_rtentry_epoch, + &rt->rt_epoch_ctx); - /* Unreference nexthop */ - nhop_free(rt->rt_nhop); - /* - * and the rtentry itself of course + * FALLTHROUGH to RT_UNLOCK() so the reporting functions + * have consistent behaviour of operating on unlocked entry. */ - uma_zfree(V_rtzone, rt); - return; } done: RT_UNLOCK(rt); } +static void +destroy_rtentry(struct rtentry *rt) +{ + + /* + * At this moment rnh, nh_control may be already freed. + * nhop interface may have been migrated to a different vnet. + * Use vnet stored in the nexthop to delete the entry. + */ + CURVNET_SET(nhop_get_vnet(rt->rt_nhop)); + + /* Unreference nexthop */ + nhop_free(rt->rt_nhop); + + uma_zfree(V_rtzone, rt); + + CURVNET_RESTORE(); +} + /* - * Temporary RTFREE() function wrapper. - * Intended to use in control plane code to - * avoid exposing internal layout of 'struct rtentry'. + * Epoch callback indicating rtentry is safe to destroy */ -void -rtfree_func(struct rtentry *rt) +static void +destroy_rtentry_epoch(epoch_context_t ctx) { + struct rtentry *rt; - RTFREE(rt); + rt = __containerof(ctx, struct rtentry, rt_epoch_ctx); + + destroy_rtentry(rt); } /* @@ -546,7 +567,7 @@ rib_add_redirect(u_int fibnum, struct sockaddr *dst, s RT_LOCK(rt); flags = rt->rt_flags; - RTFREE_LOCKED(rt); + RT_UNLOCK(rt); RTSTAT_INC(rts_dynamic); @@ -1112,13 +1133,6 @@ rt_notifydelete(struct rtentry *rt, struct rt_addrinfo ifa = rt->rt_nhop->nh_ifa; if (ifa != NULL && ifa->ifa_rtrequest != NULL) ifa->ifa_rtrequest(RTM_DELETE, rt, rt->rt_nhop, info); - - /* - * One more rtentry floating around that is not - * linked to the routing table. rttrash will be decremented - * when RTFREE(rt) is eventually called. - */ - V_rttrash++; } @@ -1386,6 +1400,7 @@ rtrequest1_fib(int req, struct rt_addrinfo *info, stru KASSERT((fibnum < rt_numfibs), ("rtrequest1_fib: bad fibnum")); KASSERT((info->rti_flags & RTF_RNH_LOCKED) == 0, ("rtrequest1_fib: locked")); + NET_EPOCH_ASSERT(); dst = info->rti_info[RTAX_DST]; @@ -1580,13 +1595,10 @@ add_route(struct rib_head *rnh, struct rt_addrinfo *in ifa->ifa_rtrequest(RTM_ADD, rt, rt->rt_nhop, info); /* - * actually return a resultant rtentry and - * give the caller a single reference. + * actually return a resultant rtentry */ - if (ret_nrt) { + if (ret_nrt) *ret_nrt = rt; - RT_ADDREF(rt); - } rnh->rnh_gen++; /* Routing table updated */ RT_UNLOCK(rt); @@ -1622,15 +1634,13 @@ del_route(struct rib_head *rnh, struct rt_addrinfo *in /* * If the caller wants it, then it can have it, - * but it's up to it to free the rtentry as we won't be - * doing it. + * the entry will be deleted after the end of the current epoch. */ - if (ret_nrt) { + if (ret_nrt) *ret_nrt = rt; - RT_UNLOCK(rt); - } else - RTFREE_LOCKED(rt); - + + RTFREE_LOCKED(rt); + return (0); } @@ -1736,10 +1746,8 @@ change_route_one(struct rib_head *rnh, struct rt_addri if ((nh_orig->nh_ifa != nh->nh_ifa) && nh_orig->nh_ifa->ifa_rtrequest) nh_orig->nh_ifa->ifa_rtrequest(RTM_DELETE, rt, nh_orig, info); - if (ret_nrt != NULL) { + if (ret_nrt != NULL) *ret_nrt = rt; - RT_ADDREF(rt); - } RT_UNLOCK(rt); @@ -1757,7 +1765,6 @@ static int change_route(struct rib_head *rnh, struct rt_addrinfo *info, struct rtentry **ret_nrt) { - struct epoch_tracker et; int error; /* Check if updated gateway exists */ @@ -1765,8 +1772,6 @@ change_route(struct rib_head *rnh, struct rt_addrinfo (info->rti_info[RTAX_GATEWAY] == NULL)) return (EINVAL); - NET_EPOCH_ENTER(et); - /* * route change is done in multiple steps, with dropping and * reacquiring lock. In the situations with multiple processes @@ -1779,7 +1784,6 @@ change_route(struct rib_head *rnh, struct rt_addrinfo if (error != EAGAIN) break; } - NET_EPOCH_EXIT(et); return (error); } @@ -1825,6 +1829,7 @@ static inline int rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum) { RIB_RLOCK_TRACKER; + struct epoch_tracker et; struct sockaddr *dst; struct sockaddr *netmask; struct rtentry *rt = NULL; @@ -1957,38 +1962,18 @@ rtinit1(struct ifaddr *ifa, int cmd, int flags, int fi else info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr; info.rti_info[RTAX_NETMASK] = netmask; + NET_EPOCH_ENTER(et); error = rtrequest1_fib(cmd, &info, &rt, fibnum); if (error == 0 && rt != NULL) { /* * notify any listening routing agents of the change */ - RT_LOCK(rt); /* TODO: interface routes/aliases */ - RT_ADDREF(rt); - RT_UNLOCK(rt); rt_newaddrmsg_fib(cmd, ifa, rt, fibnum); - RT_LOCK(rt); - RT_REMREF(rt); - if (cmd == RTM_DELETE) { - /* - * If we are deleting, and we found an entry, - * then it's been removed from the tree.. - * now throw it away. - */ - RTFREE_LOCKED(rt); - } else { - if (cmd == RTM_ADD) { - /* - * We just wanted to add it.. - * we don't actually need a reference. - */ - RT_REMREF(rt); - } - RT_UNLOCK(rt); - } didwork = 1; } + NET_EPOCH_EXIT(et); if (error) a_failure = error; } Modified: head/sys/net/route.h ============================================================================== --- head/sys/net/route.h Sat May 23 03:32:08 2020 (r361408) +++ head/sys/net/route.h Sat May 23 10:21:02 2020 (r361409) @@ -332,8 +332,6 @@ struct rt_addrinfo { #define RT_LINK_IS_UP(ifp) (!((ifp)->if_capabilities & IFCAP_LINKSTATE) \ || (ifp)->if_link_state == LINK_STATE_UP) -#define RTFREE_FUNC(_rt) rtfree_func(_rt) - #define RO_NHFREE(_ro) do { \ if ((_ro)->ro_nh) { \ NH_FREE((_ro)->ro_nh); \ Modified: head/sys/net/route/nhop.h ============================================================================== --- head/sys/net/route/nhop.h Sat May 23 03:32:08 2020 (r361408) +++ head/sys/net/route/nhop.h Sat May 23 10:21:02 2020 (r361409) @@ -176,6 +176,7 @@ struct rib_head; uint32_t nhop_get_idx(const struct nhop_object *nh); enum nhop_type nhop_get_type(const struct nhop_object *nh); int nhop_get_rtflags(const struct nhop_object *nh); +struct vnet *nhop_get_vnet(const struct nhop_object *nh); #endif /* _KERNEL */ Modified: head/sys/net/route/nhop_ctl.c ============================================================================== --- head/sys/net/route/nhop_ctl.c Sat May 23 03:32:08 2020 (r361408) +++ head/sys/net/route/nhop_ctl.c Sat May 23 10:21:02 2020 (r361409) @@ -500,6 +500,9 @@ finalize_nhop(struct nh_control *ctl, struct rt_addrin return (ENOMEM); } + /* Save vnet to ease destruction */ + nh_priv->nh_vnet = curvnet; + /* Reference external objects and calculate (referenced) ifa */ if_ref(nh->nh_ifp); ifa_ref(nh->nh_ifa); @@ -696,6 +699,13 @@ nhop_set_rtflags(struct nhop_object *nh, int rt_flags) { nh->nh_priv->rt_flags = rt_flags; +} + +struct vnet * +nhop_get_vnet(const struct nhop_object *nh) +{ + + return (nh->nh_priv->nh_vnet); } void Modified: head/sys/net/route/nhop_var.h ============================================================================== --- head/sys/net/route/nhop_var.h Sat May 23 03:32:08 2020 (r361408) +++ head/sys/net/route/nhop_var.h Sat May 23 10:21:02 2020 (r361409) @@ -78,6 +78,7 @@ struct nhop_priv { struct nhop_object *nh; /* backreference to the dataplane nhop */ struct nh_control *nh_control; /* backreference to the rnh */ struct nhop_priv *nh_next; /* hash table membership */ + struct vnet *nh_vnet; /* vnet nhop belongs to */ struct epoch_context nh_epoch_ctx; /* epoch data for nhop */ }; Modified: head/sys/net/route/route_var.h ============================================================================== --- head/sys/net/route/route_var.h Sat May 23 03:32:08 2020 (r361408) +++ head/sys/net/route/route_var.h Sat May 23 10:21:02 2020 (r361409) @@ -35,6 +35,7 @@ #ifndef RNF_NORMAL #include <net/radix.h> #endif +#include <sys/epoch.h> #include <netinet/in.h> /* struct sockaddr_in */ #include <sys/counter.h> @@ -148,6 +149,7 @@ struct rtentry { #define rt_endzero rt_mtx struct mtx rt_mtx; /* mutex for routing entry */ struct rtentry *rt_chain; /* pointer to next rtentry to delete */ + struct epoch_context rt_epoch_ctx; /* net epoch tracker */ }; #define RT_LOCK_INIT(_rt) \ Modified: head/sys/net/rtsock.c ============================================================================== --- head/sys/net/rtsock.c Sat May 23 03:32:08 2020 (r361408) +++ head/sys/net/rtsock.c Sat May 23 10:21:02 2020 (r361409) @@ -742,7 +742,6 @@ handle_rtm_get(struct rt_addrinfo *info, u_int fibnum, } } RT_LOCK(rt); - RT_ADDREF(rt); RIB_RUNLOCK(rnh); *ret_nrt = rt; @@ -930,7 +929,6 @@ route_output(struct mbuf *m, struct socket *so, ...) #endif RT_LOCK(saved_nrt); rtm->rtm_index = saved_nrt->rt_nhop->nh_ifp->if_index; - RT_REMREF(saved_nrt); RT_UNLOCK(saved_nrt); } break; @@ -987,8 +985,7 @@ report: flush: NET_EPOCH_EXIT(et); - if (rt != NULL) - RTFREE(rt); + rt = NULL; #ifdef INET6 if (rtm != NULL) { Modified: head/sys/netinet6/nd6.c ============================================================================== --- head/sys/netinet6/nd6.c Sat May 23 03:32:08 2020 (r361408) +++ head/sys/netinet6/nd6.c Sat May 23 10:21:02 2020 (r361409) @@ -1566,14 +1566,17 @@ nd6_free_redirect(const struct llentry *ln) int fibnum; struct sockaddr_in6 sin6; struct rt_addrinfo info; + struct epoch_tracker et; lltable_fill_sa_entry(ln, (struct sockaddr *)&sin6); memset(&info, 0, sizeof(info)); info.rti_info[RTAX_DST] = (struct sockaddr *)&sin6; info.rti_filter = nd6_isdynrte; + NET_EPOCH_ENTER(et); for (fibnum = 0; fibnum < rt_numfibs; fibnum++) rtrequest1_fib(RTM_DELETE, &info, NULL, fibnum); + NET_EPOCH_EXIT(et); } /* Modified: head/sys/netinet6/nd6_rtr.c ============================================================================== --- head/sys/netinet6/nd6_rtr.c Sat May 23 03:32:08 2020 (r361408) +++ head/sys/netinet6/nd6_rtr.c Sat May 23 10:21:02 2020 (r361409) @@ -690,10 +690,8 @@ defrouter_addreq(struct nd_defrouter *new) error = in6_rtrequest(RTM_ADD, (struct sockaddr *)&def, (struct sockaddr *)&gate, (struct sockaddr *)&mask, RTF_GATEWAY, &newrt, fibnum); - if (newrt) { + if (newrt != NULL) rt_routemsg(RTM_ADD, newrt, new->ifp, 0, fibnum); - RTFREE_FUNC(newrt); - } if (error == 0) new->installed = 1; } @@ -708,6 +706,7 @@ defrouter_delreq(struct nd_defrouter *dr) { struct sockaddr_in6 def, mask, gate; struct rtentry *oldrt = NULL; + struct epoch_tracker et; unsigned int fibnum; bzero(&def, sizeof(def)); @@ -720,13 +719,13 @@ defrouter_delreq(struct nd_defrouter *dr) gate.sin6_addr = dr->rtaddr; fibnum = dr->ifp->if_fib; + NET_EPOCH_ENTER(et); in6_rtrequest(RTM_DELETE, (struct sockaddr *)&def, (struct sockaddr *)&gate, (struct sockaddr *)&mask, RTF_GATEWAY, &oldrt, fibnum); - if (oldrt) { + if (oldrt != NULL) rt_routemsg(RTM_DELETE, oldrt, dr->ifp, 0, fibnum); - RTFREE_FUNC(oldrt); - } + NET_EPOCH_EXIT(et); dr->installed = 0; } @@ -1022,6 +1021,7 @@ defrouter_select_fib(int fibnum) } ND6_RUNLOCK(); + NET_EPOCH_ENTER(et); /* * If we selected a router for this FIB and it's different * than the installed one, remove the installed router and @@ -1037,6 +1037,7 @@ defrouter_select_fib(int fibnum) } if (selected_dr != NULL) defrouter_rele(selected_dr); + NET_EPOCH_EXIT(et); } static struct nd_defrouter * @@ -2064,7 +2065,6 @@ nd6_prefix_onlink_rtrequest(struct nd_prefix *pr, stru pr->ndpr_stateflags |= NDPRF_ONLINK; rt_routemsg(RTM_ADD, rt, pr->ndpr_ifp, 0, fibnum); - RTFREE_FUNC(rt); } /* Return the last error we got. */ @@ -2132,7 +2132,6 @@ nd6_prefix_onlink(struct nd_prefix *pr) } /* should we care about ia6_flags? */ } - NET_EPOCH_EXIT(et); if (ifa == NULL) { /* * This can still happen, when, for example, we receive an RA @@ -2145,14 +2144,13 @@ nd6_prefix_onlink(struct nd_prefix *pr) "prefix(%s/%d) on %s\n", __func__, ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr), pr->ndpr_plen, if_name(ifp))); - return (0); + error = 0; + } else { + error = nd6_prefix_onlink_rtrequest(pr, ifa); + ifa_free(ifa); } + NET_EPOCH_EXIT(et); - error = nd6_prefix_onlink_rtrequest(pr, ifa); - - if (ifa != NULL) - ifa_free(ifa); - return (error); } @@ -2167,6 +2165,7 @@ nd6_prefix_offlink(struct nd_prefix *pr) char ip6buf[INET6_ADDRSTRLEN]; uint64_t genid; int fibnum, maxfib, a_failure; + struct epoch_tracker et; ND6_ONLINK_LOCK_ASSERT(); ND6_UNLOCK_ASSERT(); @@ -2193,6 +2192,7 @@ nd6_prefix_offlink(struct nd_prefix *pr) } a_failure = 0; + NET_EPOCH_ENTER(et); for (; fibnum < maxfib; fibnum++) { rt = NULL; error = in6_rtrequest(RTM_DELETE, (struct sockaddr *)&sa6, NULL, @@ -2205,8 +2205,8 @@ nd6_prefix_offlink(struct nd_prefix *pr) /* report route deletion to the routing socket. */ rt_routemsg(RTM_DELETE, rt, ifp, 0, fibnum); - RTFREE_FUNC(rt); } + NET_EPOCH_EXIT(et); error = a_failure; a_failure = 1; if (error == 0) { Modified: head/sys/nfs/bootp_subr.c ============================================================================== --- head/sys/nfs/bootp_subr.c Sat May 23 03:32:08 2020 (r361408) +++ head/sys/nfs/bootp_subr.c Sat May 23 10:21:02 2020 (r361409) @@ -1664,14 +1664,17 @@ retry: goto out; if (gctx->gotrootpath != 0) { + struct epoch_tracker et; kern_setenv("boot.netif.name", ifctx->ifp->if_xname); + NET_EPOCH_ENTER(et); bootpc_add_default_route(ifctx); error = md_mount(&nd->root_saddr, nd->root_hostnam, nd->root_fh, &nd->root_fhsize, &nd->root_args, td); bootpc_remove_default_route(ifctx); + NET_EPOCH_EXIT(et); if (error != 0) { if (gctx->any_root_overrides == 0) panic("nfs_boot: mount root, error=%d", error); _______________________________________________ svn-src-all@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"