From: Roopa Prabhu <ro...@cumulusnetworks.com> Adds support for RTNH_F_DEAD and RTNH_F_LINKDOWN flags on mpls routes due to link events. Also adds code to ignore dead routes during route selection
Signed-off-by: Roopa Prabhu <ro...@cumulusnetworks.com> --- Dave, I know you are only taking bug fixes currently. This patch is borderline a bug fix because Eric thinks it is critical for mpls multipath routes. I can sure resubmit it as a bug fix against net when it is time if you did prefer that. Thanks! RFC to v1: Addressed a few comments from Eric and Robert: - remove support for weighted nexthops - Use rt_nhn_alive in the rt structure to keep count of alive routes. What i have not done is: sort nexthops on link events. I am not comfortable recreating or sorting nexthops on every carrier change. This leaves scope for optimizing in the future v1 to v2: Fix dead nexthop checks as suggested by dave v2 to v3: Fix duplicated argument reported by kbuild test robot net/mpls/af_mpls.c | 189 ++++++++++++++++++++++++++++++++++++++++++++-------- net/mpls/internal.h | 3 + 2 files changed, 165 insertions(+), 27 deletions(-) diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index c70d750..8054904 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -96,22 +96,15 @@ bool mpls_pkt_too_big(const struct sk_buff *skb, unsigned int mtu) } EXPORT_SYMBOL_GPL(mpls_pkt_too_big); -static struct mpls_nh *mpls_select_multipath(struct mpls_route *rt, - struct sk_buff *skb, bool bos) +static u32 mpls_multipath_hash(struct mpls_route *rt, + struct sk_buff *skb, bool bos) { struct mpls_entry_decoded dec; struct mpls_shim_hdr *hdr; bool eli_seen = false; int label_index; - int nh_index = 0; u32 hash = 0; - /* No need to look further into packet if there's only - * one path - */ - if (rt->rt_nhn == 1) - goto out; - for (label_index = 0; label_index < MAX_MP_SELECT_LABELS && !bos; label_index++) { if (!pskb_may_pull(skb, sizeof(*hdr) * label_index)) @@ -165,9 +158,37 @@ static struct mpls_nh *mpls_select_multipath(struct mpls_route *rt, } } - nh_index = hash % rt->rt_nhn; + return hash; +} + +static struct mpls_nh *mpls_select_multipath(struct mpls_route *rt, + struct sk_buff *skb, bool bos) +{ + u32 hash = 0; + int nh_index; + int n = 0; + + /* No need to look further into packet if there's only + * one path + */ + if (rt->rt_nhn == 1) + goto out; + + if (rt->rt_nhn_alive <= 0) + return NULL; + + hash = mpls_multipath_hash(rt, skb, bos); + nh_index = hash % rt->rt_nhn_alive; + for_nexthops(rt) { + if (nh->nh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)) + continue; + if (n == nh_index) + return nh; + n++; + } endfor_nexthops(rt); + out: - return &rt->rt_nh[nh_index]; + return &rt->rt_nh[0]; } static bool mpls_egress(struct mpls_route *rt, struct sk_buff *skb, @@ -365,6 +386,7 @@ static struct mpls_route *mpls_rt_alloc(int num_nh, u8 max_alen) GFP_KERNEL); if (rt) { rt->rt_nhn = num_nh; + rt->rt_nhn_alive = num_nh; rt->rt_max_alen = max_alen_aligned; } @@ -536,6 +558,15 @@ static int mpls_nh_assign_dev(struct net *net, struct mpls_route *rt, RCU_INIT_POINTER(nh->nh_dev, dev); + if (!netif_carrier_ok(dev)) + nh->nh_flags |= RTNH_F_LINKDOWN; + + if (!(dev->flags & IFF_UP)) + nh->nh_flags |= RTNH_F_DEAD; + + if (nh->nh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)) + rt->rt_nhn_alive--; + return 0; errout: @@ -577,7 +608,7 @@ errout: } static int mpls_nh_build(struct net *net, struct mpls_route *rt, - struct mpls_nh *nh, int oif, + struct mpls_nh *nh, int oif, int hops, struct nlattr *via, struct nlattr *newdst) { int err = -ENOMEM; @@ -681,8 +712,8 @@ static int mpls_nh_build_multi(struct mpls_route_config *cfg, goto errout; err = mpls_nh_build(cfg->rc_nlinfo.nl_net, rt, nh, - rtnh->rtnh_ifindex, nla_via, - nla_newdst); + rtnh->rtnh_ifindex, rtnh->rtnh_hops, + nla_via, nla_newdst); if (err) goto errout; @@ -875,34 +906,100 @@ free: return ERR_PTR(err); } -static void mpls_ifdown(struct net_device *dev) +static void mpls_ifdown(struct net_device *dev, int event) { struct mpls_route __rcu **platform_label; struct net *net = dev_net(dev); - struct mpls_dev *mdev; unsigned index; + int dead; platform_label = rtnl_dereference(net->mpls.platform_label); for (index = 0; index < net->mpls.platform_labels; index++) { struct mpls_route *rt = rtnl_dereference(platform_label[index]); + if (!rt) continue; + dead = 0; for_nexthops(rt) { + if ((event == NETDEV_DOWN && + (nh->nh_flags & RTNH_F_DEAD)) || + (event == NETDEV_CHANGE && + (nh->nh_flags & RTNH_F_LINKDOWN))) { + dead++; + continue; + } + if (rtnl_dereference(nh->nh_dev) != dev) continue; - nh->nh_dev = NULL; + switch (event) { + case NETDEV_DOWN: + case NETDEV_UNREGISTER: + nh->nh_flags |= RTNH_F_DEAD; + /* fall through */ + case NETDEV_CHANGE: + nh->nh_flags |= RTNH_F_LINKDOWN; + rt->rt_nhn_alive--; + break; + } + if (event == NETDEV_UNREGISTER) { + nh->nh_dev = NULL; + dead = rt->rt_nhn; + break; + } + dead++; } endfor_nexthops(rt); + + if (dead == rt->rt_nhn) { + switch (event) { + case NETDEV_DOWN: + case NETDEV_UNREGISTER: + rt->rt_flags |= RTNH_F_DEAD; + /* fall through */ + case NETDEV_CHANGE: + rt->rt_flags |= RTNH_F_LINKDOWN; + rt->rt_nhn_alive = 0; + break; + } + } } - mdev = mpls_dev_get(dev); - if (!mdev) - return; + return; +} + +static void mpls_ifup(struct net_device *dev, unsigned int nh_flags) +{ + struct mpls_route __rcu **platform_label; + struct net *net = dev_net(dev); + unsigned index; + int alive; + + platform_label = rtnl_dereference(net->mpls.platform_label); + for (index = 0; index < net->mpls.platform_labels; index++) { + struct mpls_route *rt = rtnl_dereference(platform_label[index]); + + if (!rt) + continue; + alive = 0; + for_nexthops(rt) { + struct net_device *nh_dev = + rtnl_dereference(nh->nh_dev); - mpls_dev_sysctl_unregister(mdev); + if (!(nh->nh_flags & nh_flags)) { + alive++; + continue; + } + if (nh_dev != dev) + continue; + alive++; + nh->nh_flags &= ~nh_flags; + } endfor_nexthops(rt); - RCU_INIT_POINTER(dev->mpls_ptr, NULL); + if (alive > 0) + rt->rt_flags &= ~nh_flags; + rt->rt_nhn_alive = alive; + } - kfree_rcu(mdev, rcu); + return; } static int mpls_dev_notify(struct notifier_block *this, unsigned long event, @@ -910,9 +1007,9 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event, { struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct mpls_dev *mdev; + unsigned int flags; - switch(event) { - case NETDEV_REGISTER: + if (event == NETDEV_REGISTER) { /* For now just support ethernet devices */ if ((dev->type == ARPHRD_ETHER) || (dev->type == ARPHRD_LOOPBACK)) { @@ -920,10 +1017,39 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event, if (IS_ERR(mdev)) return notifier_from_errno(PTR_ERR(mdev)); } - break; + return NOTIFY_OK; + } + mdev = mpls_dev_get(dev); + if (!mdev) + return NOTIFY_OK; + + switch (event) { + case NETDEV_DOWN: + mpls_ifdown(dev, event); + break; + case NETDEV_UP: + flags = dev_get_flags(dev); + if (flags & (IFF_RUNNING | IFF_LOWER_UP)) + mpls_ifup(dev, RTNH_F_DEAD | RTNH_F_LINKDOWN); + else + mpls_ifup(dev, RTNH_F_DEAD); + break; + case NETDEV_CHANGE: + flags = dev_get_flags(dev); + if (flags & (IFF_RUNNING | IFF_LOWER_UP)) + mpls_ifup(dev, RTNH_F_DEAD | RTNH_F_LINKDOWN); + else + mpls_ifdown(dev, event); + break; case NETDEV_UNREGISTER: - mpls_ifdown(dev); + mpls_ifdown(dev, event); + mdev = mpls_dev_get(dev); + if (mdev) { + mpls_dev_sysctl_unregister(mdev); + RCU_INIT_POINTER(dev->mpls_ptr, NULL); + kfree_rcu(mdev, rcu); + } break; case NETDEV_CHANGENAME: mdev = mpls_dev_get(dev); @@ -1237,6 +1363,10 @@ static int mpls_dump_route(struct sk_buff *skb, u32 portid, u32 seq, int event, dev = rtnl_dereference(nh->nh_dev); if (dev && nla_put_u32(skb, RTA_OIF, dev->ifindex)) goto nla_put_failure; + if (nh->nh_flags & RTNH_F_LINKDOWN) + rtm->rtm_flags |= RTNH_F_LINKDOWN; + if (nh->nh_flags & RTNH_F_DEAD) + rtm->rtm_flags |= RTNH_F_DEAD; } else { struct rtnexthop *rtnh; struct nlattr *mp; @@ -1253,6 +1383,11 @@ static int mpls_dump_route(struct sk_buff *skb, u32 portid, u32 seq, int event, dev = rtnl_dereference(nh->nh_dev); if (dev) rtnh->rtnh_ifindex = dev->ifindex; + if (nh->nh_flags & RTNH_F_LINKDOWN) + rtnh->rtnh_flags |= RTNH_F_LINKDOWN; + if (nh->nh_flags & RTNH_F_DEAD) + rtnh->rtnh_flags |= RTNH_F_DEAD; + if (nh->nh_labels && nla_put_labels(skb, RTA_NEWDST, nh->nh_labels, nh->nh_label)) diff --git a/net/mpls/internal.h b/net/mpls/internal.h index bde52ce..4f9bf2b 100644 --- a/net/mpls/internal.h +++ b/net/mpls/internal.h @@ -41,6 +41,7 @@ enum mpls_payload_type { struct mpls_nh { /* next hop label forwarding entry */ struct net_device __rcu *nh_dev; + unsigned int nh_flags; u32 nh_label[MAX_NEW_LABELS]; u8 nh_labels; u8 nh_via_alen; @@ -70,10 +71,12 @@ struct mpls_nh { /* next hop label forwarding entry */ */ struct mpls_route { /* next hop label forwarding entry */ struct rcu_head rt_rcu; + unsigned int rt_flags; u8 rt_protocol; u8 rt_payload_type; u8 rt_max_alen; unsigned int rt_nhn; + unsigned int rt_nhn_alive; struct mpls_nh rt_nh[0]; }; -- 1.9.1 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html