On 12/31/17 9:15 AM, Ido Schimmel wrote: > By default, IPv6 deletes nexthops from a multipath route when the > nexthop device is put administratively down. This differs from IPv4 > where the nexthops are kept, but marked with the RTNH_F_DEAD flag. A > multipath route is flushed when all of its nexthops become dead. > > Align IPv6 with IPv4 and have it conform to the same guidelines. > > In case the multipath route needs to be flushed, its siblings are > flushed one by one. Otherwise, the nexthops are marked with the > appropriate flags and the tree walker is instructed to skip all the > siblings. > > As explained in previous patches, care is taken to update the sernum of > the affected tree nodes, so as to prevent the use of wrong dst entries. > > Signed-off-by: Ido Schimmel <ido...@mellanox.com> > --- > net/ipv6/route.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++------- > 1 file changed, 46 insertions(+), 7 deletions(-) > > diff --git a/net/ipv6/route.c b/net/ipv6/route.c > index ab95e07b5e74..9b142eb83f3c 100644 > --- a/net/ipv6/route.c > +++ b/net/ipv6/route.c > @@ -3486,8 +3486,10 @@ static int fib6_ifup(struct rt6_info *rt, void *p_arg) > const struct arg_netdev_event *arg = p_arg; > const struct net *net = dev_net(arg->dev); > > - if (rt != net->ipv6.ip6_null_entry && rt->dst.dev == arg->dev) > + if (rt != net->ipv6.ip6_null_entry && rt->dst.dev == arg->dev) { > rt->rt6i_nh_flags &= ~arg->nh_flags; > + fib6_update_sernum_upto_root(dev_net(rt->dst.dev), rt); > + } > > return 0; > } > @@ -3528,6 +3530,35 @@ static void rt6_multipath_flush(struct rt6_info *rt) > iter->should_flush = 1; > } > > +static unsigned int rt6_multipath_dead_count(const struct rt6_info *rt, > + const struct net_device *down_dev) > +{ > + struct rt6_info *iter; > + unsigned int dead = 0; > + > + if (rt->dst.dev == down_dev || rt->rt6i_nh_flags & RTNH_F_DEAD) > + dead++; > + list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) > + if (iter->dst.dev == down_dev || > + iter->rt6i_nh_flags & RTNH_F_DEAD) > + dead++; > + > + return dead; > +} > + > +static void rt6_multipath_nh_flags_set(struct rt6_info *rt, > + const struct net_device *dev, > + unsigned int nh_flags) > +{ > + struct rt6_info *iter; > + > + if (rt->dst.dev == dev) > + rt->rt6i_nh_flags |= nh_flags; > + list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) > + if (iter->dst.dev == dev) > + iter->rt6i_nh_flags |= nh_flags; > +} > + > /* called with write lock held for table with rt */ > static int fib6_ifdown(struct rt6_info *rt, void *p_arg) > { > @@ -3550,13 +3581,21 @@ static int fib6_ifdown(struct rt6_info *rt, void > *p_arg) > } > return -2; > case NETDEV_DOWN: > - if (rt->dst.dev != dev) > - break; > - if (rt->rt6i_nsiblings == 0 || > - !rt->rt6i_idev->cnf.ignore_routes_with_linkdown) > + if (rt->should_flush) > return -1; > - rt->rt6i_nh_flags |= (RTNH_F_DEAD | RTNH_F_LINKDOWN); > - break; > + if (!rt->rt6i_nsiblings) > + return rt->dst.dev == dev ? -1 : 0; > + if (rt6_multipath_uses_dev(rt, dev)) { > + if (rt->rt6i_nsiblings + 1 == > + rt6_multipath_dead_count(rt, dev)) {
I'd prefer a tmp variable to make that line more readable and still within the 80 column guideline. unsigned int count; count = rt6_multipath_dead_count(rt, dev); if (rt->rt6i_nsiblings + 1 == count) { > + rt6_multipath_flush(rt); > + return -1; > + } > + rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD | > + RTNH_F_LINKDOWN); > + fib6_update_sernum(rt); > + } > + return -2; > case NETDEV_CHANGE: > if (rt->dst.dev != dev || > rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) >