From: David Ahern <dsah...@gmail.com> Allow users to specify a nexthop id to use with a route.
Signed-off-by: David Ahern <dsah...@gmail.com> --- include/net/ip6_fib.h | 4 +++ include/net/nexthop.h | 3 ++ net/ipv4/nexthop.c | 5 +++ net/ipv6/addrconf.c | 3 ++ net/ipv6/ip6_fib.c | 17 ++++++++--- net/ipv6/ndisc.c | 2 ++ net/ipv6/route.c | 85 +++++++++++++++++++++++++++++++++++++++++---------- 7 files changed, 98 insertions(+), 21 deletions(-) diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 1f04a26e4c65..170aadcd83b4 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -52,6 +52,7 @@ struct fib6_config { u16 fc_type; /* only 8 bits are used */ u16 fc_delete_all_nh : 1, __unused : 15; + u32 fc_nh_id; struct in6_addr fc_dst; struct in6_addr fc_src; @@ -139,6 +140,8 @@ struct fib6_info { struct fib6_info __rcu *fib6_next; struct fib6_node __rcu *fib6_node; + struct list_head nh_list; + /* Multipath routes: * siblings is a list of fib6_info that have the the same metric/weight, * destination, but not the same gateway. nsiblings is just a cache @@ -171,6 +174,7 @@ struct fib6_info { unused:3; struct rcu_head rcu; + struct nexthop *nh; struct fib6_nh fib6_nh[0]; }; diff --git a/include/net/nexthop.h b/include/net/nexthop.h index dae1518af3f3..759bb39e4ea7 100644 --- a/include/net/nexthop.h +++ b/include/net/nexthop.h @@ -175,6 +175,9 @@ static inline struct fib6_nh *nexthop_fib6_nh(struct nexthop *nh) static inline struct fib6_nh *fib6_info_nh(struct fib6_info *f6i) { + if (f6i->nh) + return nexthop_fib6_nh(f6i->nh); + return f6i->fib6_nh; } diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index d1fc3d21af86..1e77fa94e562 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -317,6 +317,7 @@ static void nexthop_notify(int event, struct nexthop *nh, struct nl_info *info) static void __remove_nexthop_fib(struct net *net, struct nexthop *nh) { + struct fib6_info *f6i, *tmp; struct fib_info *fi; bool do_flush; @@ -328,6 +329,10 @@ static void __remove_nexthop_fib(struct net *net, struct nexthop *nh) if (do_flush) fib_flush(net); + + list_for_each_entry_safe(f6i, tmp, &nh->f6i_list, nh_list) { + ip6_del_rt(net, f6i); + } } /* called on insert failure too */ diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index da5102bff2a9..8131cdd472cb 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2366,6 +2366,9 @@ static struct fib6_info *addrconf_get_prefix_route(const struct in6_addr *pfx, goto out; for_each_fib6_node_rt_rcu(fn) { + /* prefix routes do not use nexthop objects */ + if (rt->nh) + continue; if (rt->fib6_nh->nh_dev->ifindex != dev->ifindex) continue; if ((rt->fib6_flags & flags) != flags) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 5b0ca5b3710d..b6dc644a55cf 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -202,7 +202,10 @@ void fib6_info_destroy_rcu(struct rcu_head *head) } } - fib6_nh_release(f6i->fib6_nh); + if (f6i->nh) + nexthop_put(f6i->nh); + else + fib6_nh_release(f6i->fib6_nh); m = f6i->fib6_metrics; if (m != &dst_default_metrics && refcount_dec_and_test(&m->refcnt)) @@ -1302,6 +1305,8 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt, if (!err) { __fib6_update_sernum_upto_root(rt, sernum); fib6_start_gc(info->nl_net, rt); + if (rt->nh) + list_add(&rt->nh_list, &rt->nh->f6i_list); } out: @@ -1776,6 +1781,9 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn, fib6_purge_rt(rt, fn, net); + if (rt->nh) + list_del(&rt->nh_list); + call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, rt, NULL); if (!info->skip_notify) inet6_rt_notify(RTM_DELROUTE, rt, info, 0); @@ -2251,7 +2259,6 @@ void fib6_gc_cleanup(void) static int ipv6_route_seq_show(struct seq_file *seq, void *v) { struct fib6_info *rt = v; - struct fib6_nh *fib6_nh = rt->fib6_nh; struct ipv6_route_iter *iter = seq->private; const struct net_device *dev; @@ -2262,12 +2269,12 @@ static int ipv6_route_seq_show(struct seq_file *seq, void *v) #else seq_puts(seq, "00000000000000000000000000000000 00 "); #endif - if (rt->fib6_flags & RTF_GATEWAY) - seq_printf(seq, "%pi6", &fib6_nh->nh_gw); + if (!rt->nh && rt->fib6_flags & RTF_GATEWAY) + seq_printf(seq, "%pi6", &rt->fib6_nh->nh_gw); else seq_puts(seq, "00000000000000000000000000000000"); - dev = fib6_nh->nh_dev; + dev = rt->nh ? NULL : rt->fib6_nh->nh_dev; seq_printf(seq, " %08x %08x %08x %08x %8s\n", rt->fib6_metric, atomic_read(&rt->fib6_ref), 0, rt->fib6_flags, dev ? dev->name : ""); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 4bc47b9db35b..1a6b71873dd3 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1277,6 +1277,7 @@ static void ndisc_router_discovery(struct sk_buff *skb) rt = rt6_get_dflt_router(net, &ipv6_hdr(skb)->saddr, skb->dev); if (rt) { + /* routes added from RAs do not use nexthop objects */ fib6_nh = rt->fib6_nh; neigh = ip6_neigh_lookup(&fib6_nh->nh_gw, fib6_nh->nh_dev, NULL, &ipv6_hdr(skb)->saddr); @@ -1307,6 +1308,7 @@ static void ndisc_router_discovery(struct sk_buff *skb) return; } + /* routes added from RAs do not use nexthop objects */ fib6_nh = rt->fib6_nh; neigh = ip6_neigh_lookup(&fib6_nh->nh_gw, fib6_nh->nh_dev, NULL, &ipv6_hdr(skb)->saddr); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 2c140ce95eb4..217be2c72b69 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -439,6 +439,11 @@ struct fib6_info *fib6_multipath_select(const struct net *net, if (!fl6->mp_hash) fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL); + if (match->nh) { + // TO-DO: + return match; + } + if (fl6->mp_hash <= atomic_read(&match->fib6_nh->nh_upper_bound)) return match; @@ -661,13 +666,15 @@ static struct fib6_info *find_match(struct fib6_info *rt, int oif, int strict, int m; bool match_do_rr = false; - if (rt->fib6_nh->nh_flags & RTNH_F_DEAD) - goto out; + if (!rt->nh) { + if (rt->fib6_nh->nh_flags & RTNH_F_DEAD) + goto out; - if (fib6_ignore_linkdown(rt) && - rt->fib6_nh->nh_flags & RTNH_F_LINKDOWN && - !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE)) - goto out; + if (fib6_ignore_linkdown(rt) && + rt->fib6_nh->nh_flags & RTNH_F_LINKDOWN && + !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE)) + goto out; + } if (fib6_check_expired(rt)) goto out; @@ -3064,6 +3071,7 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg, { struct net *net = cfg->fc_nlinfo.nl_net; struct fib6_info *rt = NULL; + struct nexthop *nh = NULL; struct fib6_table *table; int err = -EINVAL; @@ -3099,6 +3107,15 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg, goto out; } #endif + if (cfg->fc_nh_id) { + nh = nexthop_find_by_id(net, cfg->fc_nh_id); + if (!nh) { + NL_SET_ERR_MSG(extack, + "Invalid nexthop id - nexthop does not exist"); + goto out; + } + } + if (cfg->fc_metric == 0) cfg->fc_metric = IP6_RT_PRIO_USER; @@ -3118,7 +3135,7 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg, goto out; err = -ENOMEM; - rt = fib6_info_alloc(gfp_flags, true); + rt = fib6_info_alloc(gfp_flags, !nh); if (!rt) goto out; @@ -3152,9 +3169,16 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg, ipv6_addr_prefix(&rt->fib6_src.addr, &cfg->fc_src, cfg->fc_src_len); rt->fib6_src.plen = cfg->fc_src_len; #endif - err = fib6_nh_init(net, rt->fib6_nh, cfg, extack); - if (err) - goto out; + if (nh) { + nexthop_get(nh); + rt->nh = nh; + if (nexthop_has_gw(nh)) + cfg->fc_flags |= RTF_GATEWAY; + } else { + err = fib6_nh_init(net, rt->fib6_nh, cfg, extack); + if (err) + goto out; + } if (!ipv6_addr_any(&cfg->fc_prefsrc)) { struct net_device *dev = fib6_info_nh_dev(rt); @@ -3327,6 +3351,9 @@ static int ip6_route_del(struct fib6_config *cfg, } continue; } + if (rt->nh && rt->nh->id == cfg->fc_nh_id) + goto del_rt; + if (cfg->fc_ifindex && (!rt->fib6_nh->nh_dev || rt->fib6_nh->nh_dev->ifindex != cfg->fc_ifindex)) @@ -3340,6 +3367,7 @@ static int ip6_route_del(struct fib6_config *cfg, continue; if (!fib6_info_hold_safe(rt)) continue; +del_rt: rcu_read_unlock(); /* if gateway was specified only delete the one hop */ @@ -3482,6 +3510,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu } #ifdef CONFIG_IPV6_ROUTE_INFO +/* RA routes do not use nexthop objects */ static struct fib6_info *rt6_get_route_info(struct net *net, const struct in6_addr *prefix, int prefixlen, const struct in6_addr *gwaddr, @@ -3551,6 +3580,7 @@ static struct fib6_info *rt6_add_route_info(struct net *net, } #endif +/* RA routes do not use nexthop objects */ struct fib6_info *rt6_get_dflt_router(struct net *net, const struct in6_addr *addr, struct net_device *dev) @@ -3892,6 +3922,7 @@ static struct fib6_info *rt6_multipath_first_sibling(const struct fib6_info *rt) return NULL; } +/* not called for rt->nh set */ static bool rt6_is_dead(const struct fib6_info *rt) { if (rt->fib6_nh->nh_flags & RTNH_F_DEAD || @@ -3970,7 +4001,7 @@ static int fib6_ifup(struct fib6_info *rt, void *p_arg) const struct arg_netdev_event *arg = p_arg; struct net *net = dev_net(arg->dev); - if (rt != net->ipv6.fib6_null_entry && + if (rt != net->ipv6.fib6_null_entry && !rt->nh && rt->fib6_nh->nh_dev == arg->dev) { rt->fib6_nh->nh_flags &= ~arg->nh_flags; fib6_update_sernum_upto_root(net, rt); @@ -4179,6 +4210,7 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = { [RTA_IP_PROTO] = { .type = NLA_U8 }, [RTA_SPORT] = { .type = NLA_U16 }, [RTA_DPORT] = { .type = NLA_U16 }, + [RTA_NH_ID] = { .type = NLA_U32 }, }; static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -4224,6 +4256,9 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, cfg->fc_nlinfo.nlh = nlh; cfg->fc_nlinfo.nl_net = sock_net(skb->sk); + if (tb[RTA_NH_ID]) + cfg->fc_nh_id = nla_get_u32(tb[RTA_NH_ID]); + if (tb[RTA_GATEWAY]) { cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]); cfg->fc_flags |= RTF_GATEWAY; @@ -4421,6 +4456,13 @@ static int ip6_route_multipath_add(struct fib6_config *cfg, nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE); if (nla) r_cfg.fc_encap_type = nla_get_u16(nla); + nla = nla_find(attrs, attrlen, RTA_NH_ID); + if (nla) { + err = -EINVAL; + NL_SET_ERR_MSG(extack, + "Multipath API can not use nexthop objects."); + goto cleanup; + } } r_cfg.fc_flags |= (rtnh->rtnh_flags & RTNH_F_ONLINK); @@ -4596,6 +4638,7 @@ static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, static size_t rt6_nlmsg_size(struct fib6_info *rt) { int nexthop_len = 0; + size_t nh_len; if (rt->fib6_nsiblings) { nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */ @@ -4606,23 +4649,29 @@ static size_t rt6_nlmsg_size(struct fib6_info *rt) nexthop_len *= rt->fib6_nsiblings; } + if (rt->nh) { + nh_len = nla_total_size(4); /* RTA_NH_ID */ + } else { + nh_len = lwtunnel_get_encap_size(rt->fib6_nh->nh_lwtstate) + + nla_total_size(16) /* RTA_GATEWAY */ + + nla_total_size(4); /* RTA_OIF */ + } + return NLMSG_ALIGN(sizeof(struct rtmsg)) + nla_total_size(16) /* RTA_SRC */ + nla_total_size(16) /* RTA_DST */ - + nla_total_size(16) /* RTA_GATEWAY */ + nla_total_size(16) /* RTA_PREFSRC */ + nla_total_size(4) /* RTA_TABLE */ + nla_total_size(4) /* RTA_IIF */ - + nla_total_size(4) /* RTA_OIF */ + nla_total_size(4) /* RTA_PRIORITY */ + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */ + nla_total_size(sizeof(struct rta_cacheinfo)) + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */ + nla_total_size(1) /* RTA_PREF */ - + lwtunnel_get_encap_size(rt->fib6_nh->nh_lwtstate) - + nexthop_len; + + nexthop_len + nh_len; } +/* not called for rt->nh set */ static int rt6_nexthop_info(struct sk_buff *skb, struct fib6_info *rt, unsigned int *flags, bool skip_oif) { @@ -4777,10 +4826,14 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb, if (nla_put_u32(skb, RTA_PRIORITY, rt->fib6_metric)) goto nla_put_failure; + if (rt->nh) { + if (nla_put_u32(skb, RTA_NH_ID, rt->nh->id)) + goto nla_put_failure; + /* For multipath routes, walk the siblings list and add * each as a nexthop within RTA_MULTIPATH. */ - if (rt->fib6_nsiblings) { + } else if (rt->fib6_nsiblings) { struct fib6_info *sibling, *next_sibling; struct nlattr *mp; -- 2.11.0