Hi Eric, On Sun, Oct 08, 2017 at 09:03:53AM -0700, Eric Dumazet wrote: > Thanks Ido for this patch. > > IMO, we no longer play this read_lock() -> write_lock() game since > ip6_dst_gc() could be called from rt6_make_pcpu_route()
Right, cause we can't deadlock anymore as with the rwlock. > > So we might simplify things quite a bit, by blocking BH (and thus > preventing preemption) > > Something like : > > net/ipv6/route.c | 26 ++++++-------------------- > 1 file changed, 6 insertions(+), 20 deletions(-) > > diff --git a/net/ipv6/route.c b/net/ipv6/route.c > index > 399d1bceec4a6e6736c367e706dd2acbd4093d58..606e80325b21c0e10a02e9c7d5b3fcfbfc26a003 > 100644 > --- a/net/ipv6/route.c > +++ b/net/ipv6/route.c > @@ -1136,15 +1136,7 @@ static struct rt6_info *rt6_make_pcpu_route(struct > rt6_info *rt) > dst_hold(&pcpu_rt->dst); > p = this_cpu_ptr(rt->rt6i_pcpu); > prev = cmpxchg(p, NULL, pcpu_rt); > - if (prev) { > - /* If someone did it before us, return prev instead */ > - /* release refcnt taken by ip6_rt_pcpu_alloc() */ > - dst_release_immediate(&pcpu_rt->dst); > - /* release refcnt taken by above dst_hold() */ > - dst_release_immediate(&pcpu_rt->dst); > - dst_hold(&prev->dst); > - pcpu_rt = prev; > - } > + BUG_ON(prev); Is this BUG_ON() now valid because of the local_bh_disable() in ip6_pol_route()? > > rt6_dst_from_metrics_check(pcpu_rt); > return pcpu_rt; > @@ -1739,31 +1731,25 @@ struct rt6_info *ip6_pol_route(struct net *net, > struct fib6_table *table, > struct rt6_info *pcpu_rt; > > dst_use_noref(&rt->dst, jiffies); > + local_bh_disable(); > pcpu_rt = rt6_get_pcpu_route(rt); > > - if (pcpu_rt) { > - rcu_read_unlock(); > - } else { > + if (!pcpu_rt) { > /* atomic_inc_not_zero() is needed when using rcu */ > if (atomic_inc_not_zero(&rt->rt6i_ref)) { > - /* We have to do the read_unlock first > - * because rt6_make_pcpu_route() may trigger > - * ip6_dst_gc() which will take the write_lock. > - * > - * No dst_hold() on rt is needed because > grabbing > + /* No dst_hold() on rt is needed because > grabbing > * rt->rt6i_ref makes sure rt can't be released. > */ > - rcu_read_unlock(); > pcpu_rt = rt6_make_pcpu_route(rt); > rt6_release(rt); > } else { > /* rt is already removed from tree */ > - rcu_read_unlock(); > pcpu_rt = net->ipv6.ip6_null_entry; > dst_hold(&pcpu_rt->dst); > } > } > - > + local_bh_enable(); > + rcu_read_unlock(); > trace_fib6_table_lookup(net, pcpu_rt, table->tb6_id, fl6); > return pcpu_rt; > } I replaced my patch with yours and I don't trigger the bug anymore. Feel free to add my tag: Tested-by: Ido Schimmel <ido...@mellanox.com> Thanks!