From: Roopa Prabhu <ro...@cumulusnetworks.com> Adds support for both RTNH_F_DEAD and RTNH_F_LINKDOWN flags. This resembles ipv4 fib code. I also picked fib_rebalance from ipv4. Enabled weights support for nexthop, just because the infrastructure is already there.
Signed-off-by: Roopa Prabhu <ro...@cumulusnetworks.com> --- I want to get this in before net-next closes as promised. I have tested it for the dead/linkdown flags. The multipath selection and hash calculation in the face of dead routes needs some more work. I am short on cycles this week and thought of getting some early feedback. Hence sending this out as RFC. I will continue with some more testing. Robert, I am using your hash algo but it needs some more work with dead routes. If you already have any thoughts on this, i will take them. thanks!. net/mpls/af_mpls.c | 228 +++++++++++++++++++++++++++++++++++++++++++++------- net/mpls/internal.h | 4 + 2 files changed, 202 insertions(+), 30 deletions(-) diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index c70d750..7db9678 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -27,6 +27,8 @@ */ #define MAX_MP_SELECT_LABELS 4 +u32 mpls_multipath_secret __read_mostly; + static int zero = 0; static int label_limit = (1 << 20) - 1; @@ -96,22 +98,52 @@ bool mpls_pkt_too_big(const struct sk_buff *skb, unsigned int mtu) } EXPORT_SYMBOL_GPL(mpls_pkt_too_big); -static struct mpls_nh *mpls_select_multipath(struct mpls_route *rt, - struct sk_buff *skb, bool bos) +static void mpls_multipath_rebalance(struct mpls_route *rt) +{ + int total; + int w; + + if (rt->rt_nhn < 2) + return; + + total = 0; + for_nexthops(rt) { + if ((nh->nh_flags & RTNH_F_DEAD) || + (nh->nh_flags & RTNH_F_LINKDOWN)) + continue; + + total += nh->nh_weight; + } endfor_nexthops(rt); + + w = 0; + change_nexthops(rt) { + int upper_bound; + + if ((nh->nh_flags & RTNH_F_DEAD) || + (nh->nh_flags & RTNH_F_LINKDOWN)) { + upper_bound = -1; + } else { + w += nh->nh_weight; + upper_bound = DIV_ROUND_CLOSEST_ULL((u64)w << 31, + total) - 1; + } + + atomic_set(&nh->nh_upper_bound, upper_bound); + } endfor_nexthops(rt); + + net_get_random_once(&mpls_multipath_secret, + sizeof(mpls_multipath_secret)); +} + +static u32 mpls_multipath_hash(struct mpls_route *rt, + struct sk_buff *skb, bool bos) { struct mpls_entry_decoded dec; struct mpls_shim_hdr *hdr; bool eli_seen = false; int label_index; - int nh_index = 0; u32 hash = 0; - /* No need to look further into packet if there's only - * one path - */ - if (rt->rt_nhn == 1) - goto out; - for (label_index = 0; label_index < MAX_MP_SELECT_LABELS && !bos; label_index++) { if (!pskb_may_pull(skb, sizeof(*hdr) * label_index)) @@ -165,9 +197,29 @@ static struct mpls_nh *mpls_select_multipath(struct mpls_route *rt, } } - nh_index = hash % rt->rt_nhn; + return hash; +} + +static struct mpls_nh *mpls_select_multipath(struct mpls_route *rt, + struct sk_buff *skb, bool bos) +{ + u32 hash = 0; + + /* No need to look further into packet if there's only + * one path + */ + if (rt->rt_nhn == 1) + goto out; + + hash = mpls_multipath_hash(rt, skb, bos); + for_nexthops(rt) { + if (hash > atomic_read(&nh->nh_upper_bound)) + continue; + return nh; + } endfor_nexthops(rt); + out: - return &rt->rt_nh[nh_index]; + return &rt->rt_nh[0]; } static bool mpls_egress(struct mpls_route *rt, struct sk_buff *skb, @@ -577,7 +629,7 @@ errout: } static int mpls_nh_build(struct net *net, struct mpls_route *rt, - struct mpls_nh *nh, int oif, + struct mpls_nh *nh, int oif, int hops, struct nlattr *via, struct nlattr *newdst) { int err = -ENOMEM; @@ -597,6 +649,7 @@ static int mpls_nh_build(struct net *net, struct mpls_route *rt, if (err) goto errout; + nh->nh_weight = hops + 1; err = mpls_nh_assign_dev(net, rt, nh, oif); if (err) goto errout; @@ -663,10 +716,9 @@ static int mpls_nh_build_multi(struct mpls_route_config *cfg, if (!rtnh_ok(rtnh, remaining)) goto errout; - /* neither weighted multipath nor any flags - * are supported + /* flags are not supported */ - if (rtnh->rtnh_hops || rtnh->rtnh_flags) + if (rtnh->rtnh_flags) goto errout; attrlen = rtnh_attrlen(rtnh); @@ -681,8 +733,8 @@ static int mpls_nh_build_multi(struct mpls_route_config *cfg, goto errout; err = mpls_nh_build(cfg->rc_nlinfo.nl_net, rt, nh, - rtnh->rtnh_ifindex, nla_via, - nla_newdst); + rtnh->rtnh_ifindex, rtnh->rtnh_hops, + nla_via, nla_newdst); if (err) goto errout; @@ -875,34 +927,111 @@ free: return ERR_PTR(err); } -static void mpls_ifdown(struct net_device *dev) +static void mpls_ifdown(struct net_device *dev, int event) { struct mpls_route __rcu **platform_label; struct net *net = dev_net(dev); - struct mpls_dev *mdev; unsigned index; + int dead; platform_label = rtnl_dereference(net->mpls.platform_label); for (index = 0; index < net->mpls.platform_labels; index++) { struct mpls_route *rt = rtnl_dereference(platform_label[index]); + int changed = 0; + if (!rt) continue; + dead = 0; for_nexthops(rt) { + if ((event == NETDEV_DOWN && + (nh->nh_flags & RTNH_F_DEAD)) || + (event == NETDEV_CHANGE && + (nh->nh_flags & RTNH_F_LINKDOWN))) { + dead++; + continue; + } + if (rtnl_dereference(nh->nh_dev) != dev) continue; - nh->nh_dev = NULL; + switch (event) { + case NETDEV_DOWN: + case NETDEV_UNREGISTER: + nh->nh_flags |= RTNH_F_DEAD; + /* fall through */ + case NETDEV_CHANGE: + nh->nh_flags |= RTNH_F_LINKDOWN; + changed = 1; + break; + } + if (event == NETDEV_UNREGISTER) { + nh->nh_dev = NULL; + dead = rt->rt_nhn; + changed = 1; + break; + } + dead++; } endfor_nexthops(rt); + + if (dead == rt->rt_nhn) { + switch (event) { + case NETDEV_DOWN: + case NETDEV_UNREGISTER: + rt->rt_flags |= RTNH_F_DEAD; + /* fall through */ + case NETDEV_CHANGE: + rt->rt_flags |= RTNH_F_LINKDOWN; + changed = 1; + break; + } + } + + if (changed) + mpls_multipath_rebalance(rt); } - mdev = mpls_dev_get(dev); - if (!mdev) - return; + return; +} + +static void mpls_ifup(struct net_device *dev, unsigned int nh_flags) +{ + struct mpls_route __rcu **platform_label; + struct net *net = dev_net(dev); + unsigned index; + int alive; + + platform_label = rtnl_dereference(net->mpls.platform_label); + for (index = 0; index < net->mpls.platform_labels; index++) { + struct mpls_route *rt = rtnl_dereference(platform_label[index]); + int changed = 0; + + if (!rt) + continue; + alive = 0; + for_nexthops(rt) { + struct net_device *nh_dev = + rtnl_dereference(nh->nh_dev); + + if (!(nh->nh_flags & nh_flags)) { + alive++; + continue; + } + if (nh_dev != dev) + continue; + alive++; + nh->nh_flags &= ~nh_flags; + changed = 1; + } endfor_nexthops(rt); - mpls_dev_sysctl_unregister(mdev); + if (alive > 0) { + rt->rt_flags &= ~nh_flags; + changed = 1; + } - RCU_INIT_POINTER(dev->mpls_ptr, NULL); + if (changed) + mpls_multipath_rebalance(rt); + } - kfree_rcu(mdev, rcu); + return; } static int mpls_dev_notify(struct notifier_block *this, unsigned long event, @@ -910,9 +1039,9 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event, { struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct mpls_dev *mdev; + unsigned int flags; - switch(event) { - case NETDEV_REGISTER: + if (event == NETDEV_REGISTER) { /* For now just support ethernet devices */ if ((dev->type == ARPHRD_ETHER) || (dev->type == ARPHRD_LOOPBACK)) { @@ -920,10 +1049,39 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event, if (IS_ERR(mdev)) return notifier_from_errno(PTR_ERR(mdev)); } - break; + return NOTIFY_OK; + } + mdev = mpls_dev_get(dev); + if (!mdev) + return NOTIFY_OK; + + switch (event) { + case NETDEV_DOWN: + mpls_ifdown(dev, event); + break; + case NETDEV_UP: + flags = dev_get_flags(dev); + if (flags & (IFF_RUNNING | IFF_LOWER_UP)) + mpls_ifup(dev, RTNH_F_DEAD | RTNH_F_LINKDOWN); + else + mpls_ifup(dev, RTNH_F_DEAD); + break; + case NETDEV_CHANGE: + flags = dev_get_flags(dev); + if (flags & (IFF_RUNNING | IFF_LOWER_UP)) + mpls_ifup(dev, RTNH_F_DEAD | RTNH_F_LINKDOWN); + else + mpls_ifdown(dev, event); + break; case NETDEV_UNREGISTER: - mpls_ifdown(dev); + mpls_ifdown(dev, event); + mdev = mpls_dev_get(dev); + if (mdev) { + mpls_dev_sysctl_unregister(mdev); + RCU_INIT_POINTER(dev->mpls_ptr, NULL); + kfree_rcu(mdev, rcu); + } break; case NETDEV_CHANGENAME: mdev = mpls_dev_get(dev); @@ -1237,6 +1395,10 @@ static int mpls_dump_route(struct sk_buff *skb, u32 portid, u32 seq, int event, dev = rtnl_dereference(nh->nh_dev); if (dev && nla_put_u32(skb, RTA_OIF, dev->ifindex)) goto nla_put_failure; + if (nh->nh_flags & RTNH_F_LINKDOWN) + rtm->rtm_flags |= RTNH_F_LINKDOWN; + if (nh->nh_flags & RTNH_F_DEAD) + rtm->rtm_flags |= RTNH_F_DEAD; } else { struct rtnexthop *rtnh; struct nlattr *mp; @@ -1253,6 +1415,12 @@ static int mpls_dump_route(struct sk_buff *skb, u32 portid, u32 seq, int event, dev = rtnl_dereference(nh->nh_dev); if (dev) rtnh->rtnh_ifindex = dev->ifindex; + if (nh->nh_flags & RTNH_F_LINKDOWN) + rtnh->rtnh_flags |= RTNH_F_LINKDOWN; + if (nh->nh_flags & RTNH_F_DEAD) + rtnh->rtnh_flags |= RTNH_F_DEAD; + + rtnh->rtnh_hops = nh->nh_weight - 1; if (nh->nh_labels && nla_put_labels(skb, RTA_NEWDST, nh->nh_labels, nh->nh_label)) diff --git a/net/mpls/internal.h b/net/mpls/internal.h index bde52ce..7014032 100644 --- a/net/mpls/internal.h +++ b/net/mpls/internal.h @@ -41,6 +41,9 @@ enum mpls_payload_type { struct mpls_nh { /* next hop label forwarding entry */ struct net_device __rcu *nh_dev; + unsigned int nh_flags; + int nh_weight; + atomic_t nh_upper_bound; u32 nh_label[MAX_NEW_LABELS]; u8 nh_labels; u8 nh_via_alen; @@ -70,6 +73,7 @@ struct mpls_nh { /* next hop label forwarding entry */ */ struct mpls_route { /* next hop label forwarding entry */ struct rcu_head rt_rcu; + unsigned int rt_flags; u8 rt_protocol; u8 rt_payload_type; u8 rt_max_alen; -- 1.9.1 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html