On Di, 2015-06-02 at 23:07 -0400, Andy Gospodarek wrote:
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index 6f5f71f..5bd953c 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -2986,6 +2986,7 @@ int dev_forward_skb(struct net_device *dev, struct 
> sk_buff *skb);
>  bool is_skb_forwardable(struct net_device *dev, struct sk_buff *skb);
>  
>  extern int           netdev_budget;
> +extern int           kill_routes_on_linkdown;

Would it make sense to make it per netns?

>  
>  /* Called by rtnetlink.c:rtnl_unlock() */
>  void netdev_run_todo(void);
> diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
> index 6d67383..4fbfda5 100644
> --- a/include/net/fib_rules.h
> +++ b/include/net/fib_rules.h
> @@ -37,6 +37,7 @@ struct fib_lookup_arg {
>       struct fib_rule         *rule;
>       int                     flags;
>  #define FIB_LOOKUP_NOREF     1
> +#define FIB_LOOKUP_ALLOWDEAD 2
>  };
>  
>  struct fib_rules_ops {
> diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
> index 54271ed..efb195b 100644
> --- a/include/net/ip_fib.h
> +++ b/include/net/ip_fib.h
> @@ -250,6 +250,7 @@ struct fib_table *fib_new_table(struct net *net, u32 id);
>  struct fib_table *fib_get_table(struct net *net, u32 id);
>  
>  int __fib_lookup(struct net *net, struct flowi4 *flp, struct fib_result 
> *res);
> +int fib_lookup_flags(struct net *n, struct flowi4 *flp, struct fib_result 
> *res, int flags);
>  
>  static inline int fib_lookup(struct net *net, struct flowi4 *flp,
>                            struct fib_result *res)
> diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
> index 17fb02f..bc264d4 100644
> --- a/include/uapi/linux/rtnetlink.h
> +++ b/include/uapi/linux/rtnetlink.h
> @@ -338,6 +338,7 @@ struct rtnexthop {
>  #define RTNH_F_PERVASIVE     2       /* Do recursive gateway lookup  */
>  #define RTNH_F_ONLINK                4       /* Gateway is forced on link    
> */
>  #define RTNH_F_OFFLOAD               8       /* offloaded route */
> +#define RTNH_F_DEAD_LINKDOWN 16      /* Route dead due to carrier down */
>  
>  /* Macros to handle hexthops */
>  

-- >8 --

> diff --git a/include/uapi/linux/sysctl.h b/include/uapi/linux/sysctl.h
> index 0956373..b6632dc 100644
> --- a/include/uapi/linux/sysctl.h
> +++ b/include/uapi/linux/sysctl.h
> @@ -276,6 +276,7 @@ enum
>       NET_CORE_AEVENT_ETIME=20,
>       NET_CORE_AEVENT_RSEQTH=21,
>       NET_CORE_WARNINGS=22,
> +     NET_CORE_KILL_ROUTES_ON_LINKDOWN=23,
>  };
>  
>  /* /proc/sys/net/ethernet */
> diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c
> index 7e7746a..f319116 100644
> --- a/kernel/sysctl_binary.c
> +++ b/kernel/sysctl_binary.c
> @@ -196,6 +196,7 @@ static const struct bin_table bin_net_core_table[] = {
>       { CTL_INT,      NET_CORE_AEVENT_ETIME,  "xfrm_aevent_etime" },
>       { CTL_INT,      NET_CORE_AEVENT_RSEQTH, "xfrm_aevent_rseqth" },
>       { CTL_INT,      NET_CORE_WARNINGS,      "warnings" },
> +     { CTL_INT,      NET_CORE_KILL_ROUTES_ON_LINKDOWN,       
> "kill_routes_on_linkdown" },
>       {},
>  };

-- >8 --

I don't think we need this hunk any more as we don't update binary
sysctls anymore.

>  
> diff --git a/net/core/dev.c b/net/core/dev.c
> index 0602e91..50dc19c 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -3167,6 +3167,8 @@ EXPORT_SYMBOL(netdev_max_backlog);
>  int netdev_tstamp_prequeue __read_mostly = 1;
>  int netdev_budget __read_mostly = 300;
>  int weight_p __read_mostly = 64;            /* old backlog weight */
> +int kill_routes_on_linkdown = 0;
> +EXPORT_SYMBOL(kill_routes_on_linkdown);
>  
>  /* Called with irq disabled */
>  static inline void ____napi_schedule(struct softnet_data *sd,
> diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
> index 95b6139..fef1804 100644
> --- a/net/core/sysctl_net_core.c
> +++ b/net/core/sysctl_net_core.c
> @@ -392,6 +392,13 @@ static struct ctl_table net_core_table[] = {
>               .mode           = 0644,
>               .proc_handler   = proc_dointvec
>       },
> +     {
> +             .procname       = "kill_routes_on_linkdown",
> +             .data           = &kill_routes_on_linkdown,
> +             .maxlen         = sizeof(int),
> +             .mode           = 0644,
> +             .proc_handler   = proc_dointvec
> +     },
>       { }
>  };
>  
> diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
> index 872494e..94348c7 100644
> --- a/net/ipv4/fib_frontend.c
> +++ b/net/ipv4/fib_frontend.c
> @@ -1107,6 +1107,7 @@ static int fib_netdev_event(struct notifier_block 
> *this, unsigned long event, vo
>       struct net_device *dev = netdev_notifier_info_to_dev(ptr);
>       struct in_device *in_dev;
>       struct net *net = dev_net(dev);
> +     unsigned flags;
>  
>       if (event == NETDEV_UNREGISTER) {
>               fib_disable_ip(dev, 2);
> @@ -1130,10 +1131,17 @@ static int fib_netdev_event(struct notifier_block 
> *this, unsigned long event, vo
>               rt_cache_flush(net);
>               break;
>       case NETDEV_DOWN:
> -             fib_disable_ip(dev, 0);
> +             fib_disable_ip(dev, 1);

Shouldn't this depend on the sysctl?

>               break;
> -     case NETDEV_CHANGEMTU:
>       case NETDEV_CHANGE:
> +             if (kill_routes_on_linkdown) {
> +                     flags = dev_get_flags(dev);
> +                     if (flags & (IFF_RUNNING|IFF_LOWER_UP))
> +                             fib_sync_up(dev);
> +                     else
> +                             fib_sync_down_dev(dev, 0);
> +             }
> +     case NETDEV_CHANGEMTU:
>               rt_cache_flush(net);
>               break;
>       }
> diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
> index 5615198..d135ec9 100644
> --- a/net/ipv4/fib_rules.c
> +++ b/net/ipv4/fib_rules.c
> @@ -49,9 +49,14 @@ struct fib4_rule {
>  
>  int __fib_lookup(struct net *net, struct flowi4 *flp, struct fib_result *res)
>  {
> +     return fib_lookup_flags(net, flp, res, 0);
> +}
>
> +int fib_lookup_flags(struct net *net, struct flowi4 *flp, struct fib_result 
> *res, int flags)
> +{
>       struct fib_lookup_arg arg = {
>               .result = res,
> -             .flags = FIB_LOOKUP_NOREF,
> +             .flags = FIB_LOOKUP_NOREF | flags,
>       };
>       int err;

Can fib_lookup take the flags argument always without adding a new
wrapper?
 
> diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
> index 28ec3c1..c0874ee 100644
> --- a/net/ipv4/fib_semantics.c
> +++ b/net/ipv4/fib_semantics.c
> @@ -266,7 +266,7 @@ static inline int nh_comp(const struct fib_info *fi, 
> const struct fib_info *ofi)
>  #ifdef CONFIG_IP_ROUTE_CLASSID
>                   nh->nh_tclassid != onh->nh_tclassid ||
>  #endif
> -                 ((nh->nh_flags ^ onh->nh_flags) & ~RTNH_F_DEAD))
> +                 ((nh->nh_flags ^ onh->nh_flags) & 
> ~(RTNH_F_DEAD|RTNH_F_DEAD_LINKDOWN)))
>                       return -1;
>               onh++;
>       } endfor_nexthops(fi);
> @@ -318,7 +318,7 @@ static struct fib_info *fib_find_info(const struct 
> fib_info *nfi)
>                   nfi->fib_type == fi->fib_type &&
>                   memcmp(nfi->fib_metrics, fi->fib_metrics,
>                          sizeof(u32) * RTAX_MAX) == 0 &&
> -                 ((nfi->fib_flags ^ fi->fib_flags) & ~RTNH_F_DEAD) == 0 &&
> +                 ((nfi->fib_flags ^ fi->fib_flags) & 
> ~(RTNH_F_DEAD|RTNH_F_DEAD_LINKDOWN)) == 0 &&
>                   (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
>                       return fi;
>       }
> @@ -604,6 +604,8 @@ static int fib_check_nh(struct fib_config *cfg, struct 
> fib_info *fi,
>                               return -ENODEV;
>                       if (!(dev->flags & IFF_UP))
>                               return -ENETDOWN;
> +                     if (!netif_carrier_ok(dev) && kill_routes_on_linkdown)
> +                             nh->nh_flags |= RTNH_F_DEAD;
>                       nh->nh_dev = dev;
>                       dev_hold(dev);
>                       nh->nh_scope = RT_SCOPE_LINK;
> @@ -621,7 +623,7 @@ static int fib_check_nh(struct fib_config *cfg, struct 
> fib_info *fi,
>                       /* It is not necessary, but requires a bit of thinking 
> */
>                       if (fl4.flowi4_scope < RT_SCOPE_LINK)
>                               fl4.flowi4_scope = RT_SCOPE_LINK;
> -                     err = fib_lookup(net, &fl4, &res);
> +                     err = fib_lookup_flags(net, &fl4, &res, 
> FIB_LOOKUP_ALLOWDEAD);
>                       if (err) {
>                               rcu_read_unlock();
>                               return err;
> @@ -636,6 +638,8 @@ static int fib_check_nh(struct fib_config *cfg, struct 
> fib_info *fi,
>               if (!dev)
>                       goto out;
>               dev_hold(dev);
> +             if (!netif_carrier_ok(dev) && kill_routes_on_linkdown)
> +                     nh->nh_flags |= RTNH_F_DEAD;
>               err = (dev->flags & IFF_UP) ? 0 : -ENETDOWN;
>       } else {
>               struct in_device *in_dev;
> @@ -654,6 +658,8 @@ static int fib_check_nh(struct fib_config *cfg, struct 
> fib_info *fi,
>               nh->nh_dev = in_dev->dev;
>               dev_hold(nh->nh_dev);
>               nh->nh_scope = RT_SCOPE_HOST;
> +             if (!netif_carrier_ok(nh->nh_dev) && kill_routes_on_linkdown)
> +                     nh->nh_flags |= RTNH_F_DEAD;
>               err = 0;
>       }
>  out:
> @@ -760,6 +766,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
>       struct fib_info *ofi;
>       int nhs = 1;
>       struct net *net = cfg->fc_nlinfo.nl_net;
> +     int dead;
>  
>       if (cfg->fc_type > RTN_MAX)
>               goto err_inval;
> @@ -920,11 +927,18 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
>               if (!nh->nh_dev)
>                       goto failure;
>       } else {
> +             dead = 0;
>               change_nexthops(fi) {
>                       err = fib_check_nh(cfg, fi, nexthop_nh);
>                       if (err != 0)
>                               goto failure;
> +                     if (nexthop_nh->nh_flags & RTNH_F_DEAD)
> +                             dead++;
>               } endfor_nexthops(fi)
> +             if ((dead == fi->fib_nhs)) {
> +                     fi->fib_flags |= RTNH_F_DEAD;
> +                     fi->fib_flags |= RTNH_F_DEAD_LINKDOWN;
> +             }
>       }
>  
>       if (fi->fib_prefsrc) {
> @@ -1097,6 +1111,8 @@ int fib_sync_down_addr(struct net *net, __be32 local)
>                       continue;
>               if (fi->fib_prefsrc == local) {
>                       fi->fib_flags |= RTNH_F_DEAD;
> +                     /* Addr is gone, more serious than a linkdown */
> +                     fi->fib_flags &= ~RTNH_F_DEAD_LINKDOWN;
>                       ret++;
>               }
>       }
> @@ -1112,7 +1128,7 @@ int fib_sync_down_dev(struct net_device *dev, int force)
>       struct hlist_head *head = &fib_info_devhash[hash];
>       struct fib_nh *nh;
>  
> -     if (force)
> +     if (force > 1)
>               scope = -1;

Why was this changed? I think force can be made a boolean.

>  
>       hlist_for_each_entry(nh, head, nh_hash) {
> @@ -1147,6 +1163,11 @@ int fib_sync_down_dev(struct net_device *dev, int 
> force)
>               } endfor_nexthops(fi)
>               if (dead == fi->fib_nhs) {
>                       fi->fib_flags |= RTNH_F_DEAD;
> +                     /* force marks route down due to admin down and device 
> removal. */
> +                     if (!force)
> +                             fi->fib_flags |= RTNH_F_DEAD_LINKDOWN;
> +                     else
> +                             fi->fib_flags &= ~RTNH_F_DEAD_LINKDOWN;
>                       ret++;
>               }
>       }
> @@ -1223,10 +1244,12 @@ int fib_sync_up(struct net_device *dev)
>       struct hlist_head *head;
>       struct fib_nh *nh;
>       int ret;
> +     int link_up;
>  
>       if (!(dev->flags & IFF_UP))
>               return 0;
>  
> +     link_up = netif_carrier_ok(dev);
>       prev_fi = NULL;
>       hash = fib_devindex_hashfn(dev->ifindex);
>       head = &fib_info_devhash[hash];
> @@ -1253,16 +1276,27 @@ int fib_sync_up(struct net_device *dev)
>                       if (nexthop_nh->nh_dev != dev ||
>                           !__in_dev_get_rtnl(dev))
>                               continue;
> -                     alive++;
> +                     if (link_up) {
> +                             /* Link is up, so mark NH as alive */
> +                             nexthop_nh->nh_flags &= ~RTNH_F_DEAD;
> +                             alive++;
> +                     } else
> +                             nexthop_nh->nh_flags |= RTNH_F_DEAD;
> +
>                       spin_lock_bh(&fib_multipath_lock);
>                       nexthop_nh->nh_power = 0;
> -                     nexthop_nh->nh_flags &= ~RTNH_F_DEAD;
>                       spin_unlock_bh(&fib_multipath_lock);
>               } endfor_nexthops(fi)
>  
>               if (alive > 0) {
> +                     /* Some NHs are alive, unmark the route as dead */
> +                     fi->fib_flags &= ~RTNH_F_DEAD_LINKDOWN;
>                       fi->fib_flags &= ~RTNH_F_DEAD;
>                       ret++;
> +             } else {
> +                     /* No NHs are alive, mark the route as dead */
> +                     fi->fib_flags |= RTNH_F_DEAD_LINKDOWN;
> +                     fi->fib_flags |= RTNH_F_DEAD;
>               }
>       }
>  
> diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
> index 01bce15..eedf287 100644
> --- a/net/ipv4/fib_trie.c
> +++ b/net/ipv4/fib_trie.c
> @@ -1401,12 +1401,18 @@ found:
>  #endif
>                       return err;
>               }
> -             if (fi->fib_flags & RTNH_F_DEAD)
> -                     continue;
> +             if (!(fib_flags & FIB_LOOKUP_ALLOWDEAD)) {
> +                     /* if route is dead and link is down, keep looking  */
> +                     if ((fi->fib_flags & RTNH_F_DEAD) &&
> +                         (fi->fib_flags & RTNH_F_DEAD_LINKDOWN))
> +                             continue;
> +             }
>               for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) {
>                       const struct fib_nh *nh = &fi->fib_nh[nhsel];
>  
> -                     if (nh->nh_flags & RTNH_F_DEAD)
> +                     /* allow next-hop to be added if link is down */
> +                     if ((nh->nh_flags & RTNH_F_DEAD) &&
> +                         !(fi->fib_flags & RTNH_F_DEAD_LINKDOWN))
>                               continue;
>                       if (flp->flowi4_oif && flp->flowi4_oif != nh->nh_oif)
>                               continue;
> @@ -1829,7 +1835,12 @@ int fib_table_flush(struct fib_table *tb)
>               hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) {
>                       struct fib_info *fi = fa->fa_info;
>  
> -                     if (!fi || !(fi->fib_flags & RTNH_F_DEAD)) {
> +                     /* DEAD and DEAD_LINKDOWN will not both be set
> +                      * with IFF_UP is cleared, so do not flush
> +                      * entries with only DEAD set
> +                      */
> +                     if (!fi || !((fi->fib_flags & RTNH_F_DEAD) &&
> +                         !(fi->fib_flags & RTNH_F_DEAD_LINKDOWN))) {
>                               slen = fa->fa_slen;
>                               continue;
>                       }

Otherwise looks good, thanks!

Bye,
Hannes


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to