On Tue, Apr 4, 2017 at 7:45 PM, Eric Dumazet <eric.duma...@gmail.com> wrote:
> On Tue, 2017-04-04 at 18:11 -0700, Cong Wang wrote:
>> On Tue, Apr 4, 2017 at 11:51 AM, Eric Dumazet <eduma...@google.com> wrote:
>> > Looking at fib->fib_metrics, I fail to understand how the following can 
>> > work :
>> >
>> > dst_init_metrics(&rt->dst, fi->fib_metrics, true);
>> >
>> > In the cases fi->fib_metrics is _not_ dst_default_metrics,
>> > fi->fib_metrics can be freed when the fib is deleted,
>> > while dst(s) have still the 'read only pointer'.
>> >
>> > RCU grace period before fi->fib_metrics freeing does not help.
>> >
>> > Without refcounts, it looks like we need to copy the fib_metrics.
>>
>> The dst is obtained from sk_dst_cache which is cached for a fast
>> path where fib_info is obtained in fib_lookup() without refcnt:
>>
>>                 err = fib_table_lookup(tb, flp, res, flags | 
>> FIB_LOOKUP_NOREF);
>>
>>
>> ...
>>                         if (!(fib_flags & FIB_LOOKUP_NOREF))
>>                                 atomic_inc(&fi->fib_clntref);
>>
>>
>> This probably starts from:
>>
>> commit ebc0ffae5dfb4447e0a431ffe7fe1d467c48bbb9
>> Author: Eric Dumazet <eric.duma...@gmail.com>
>> Date:   Tue Oct 5 10:41:36 2010 +0000
>>
>>     fib: RCU conversion of fib_lookup()
>
> Interesting. I might had too many beers tonight, but ...
>
> refcount was removed in 2860583fe840 many months later

Good find! I missed the refcnt in rt_set_nexthop() before that commit.

We need to revert that commit to restore the refcnt for fib_info.
diff --git a/include/net/route.h b/include/net/route.h
index c0874c8..3917d0a 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -69,6 +69,7 @@ struct rtable {
 
        struct list_head        rt_uncached;
        struct uncached_list    *rt_uncached_list;
+       struct fib_info         *fi; /* for refcnt to shared metrics */
 };
 
 static inline bool rt_is_input_route(const struct rtable *rt)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 8471dd1..514d7e9 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1391,6 +1391,11 @@ static void ipv4_dst_destroy(struct dst_entry *dst)
 {
        struct rtable *rt = (struct rtable *) dst;
 
+       if (rt->fi) {
+               fib_info_put(rt->fi);
+               rt->fi = NULL;
+       }
+
        if (!list_empty(&rt->rt_uncached)) {
                struct uncached_list *ul = rt->rt_uncached_list;
 
@@ -1428,6 +1433,16 @@ static bool rt_cache_valid(const struct rtable *rt)
                !rt_is_expired(rt);
 }
 
+static void rt_init_metrics(struct rtable *rt, struct fib_info *fi)
+{
+       if (fi->fib_metrics != (u32 *) dst_default_metrics) {
+               fib_info_hold(fi);
+               rt->fi = fi;
+       }
+
+       dst_init_metrics(&rt->dst, fi->fib_metrics, true);
+}
+
 static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
                           const struct fib_result *res,
                           struct fib_nh_exception *fnhe,
@@ -1442,7 +1457,7 @@ static void rt_set_nexthop(struct rtable *rt, __be32 
daddr,
                        rt->rt_gateway = nh->nh_gw;
                        rt->rt_uses_gateway = 1;
                }
-               dst_init_metrics(&rt->dst, fi->fib_metrics, true);
+               rt_init_metrics(rt, fi);
 #ifdef CONFIG_IP_ROUTE_CLASSID
                rt->dst.tclassid = nh->nh_tclassid;
 #endif
@@ -1494,6 +1509,7 @@ struct rtable *rt_dst_alloc(struct net_device *dev,
                rt->rt_gateway = 0;
                rt->rt_uses_gateway = 0;
                rt->rt_table_id = 0;
+               rt->fi = NULL;
                INIT_LIST_HEAD(&rt->rt_uncached);
 
                rt->dst.output = ip_output;

Reply via email to