On 7/28/17 12:58 PM, Rolf Neugebauer wrote:
>>> I can readily reproduce this on 4.9.39, 4.11.12 and another user
>>> repro-ed it on 4.12.3. It seems to happen every time. At least one
>>> user reported issues with NFS mounts as well, but we were not able to
>>> reproduce it. It's not clear to me if this is directly related to
>>> 'mount.cifs' or if that just happens to reliably repro it.
>>
>> OK, so commit d747a7a51b00984127a88113c does not help this case
>> either.
> 
> d747a7a51b009("tcp: reset sk_rx_dst in tcp_disconnect()") indeed seems
> a different issue. As I understand that actually caused the ref count
> never to get decremented, while here eventually some cleanup kicks in
> after a long timeout.

It could be a dst is cached on a socket and does not get cleared until
the socket time outs are done.

Test that theory by something like this for IPv4 TCP (similar change for
UDP if the client is UDP based):

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 3a19ea28339f..37db087b6c97 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1855,7 +1855,7 @@ void inet_sk_rx_dst_set(struct sock *sk, const
struct sk_buff *skb)
 {
        struct dst_entry *dst = skb_dst(skb);

-       if (dst && dst_hold_safe(dst)) {
+       if (0 && dst && dst_hold_safe(dst)) {
                sk->sk_rx_dst = dst;
                inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
        }

> 
> I'll also try if I can get some traces out of dev_hold()/dev_put().


Attached patch puts tracepoints in dev_hold / dev_put; very useful for
debugging cases like this. Use perf record and perf script.
From 068b1b8362ec5fd1b9dffdbd6e84474ada2eb829 Mon Sep 17 00:00:00 2001
From: David Ahern <d...@cumulusnetworks.com>
Date: Thu, 11 Feb 2016 02:40:12 -0800
Subject: [PATCH] Add tracepoints to dev_hold and dev_put

Signed-off-by: David Ahern <d...@cumulusnetworks.com>
---
 include/linux/netdevice.h  |  6 ++++++
 include/trace/events/net.h | 38 ++++++++++++++++++++++++++++++++++++++
 net/core/dev.c             | 21 +++++++++++++++++++++
 3 files changed, 65 insertions(+)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 219f53c30cb3..7ef6fc672dfb 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3193,6 +3193,7 @@ extern int                netdev_budget;
 /* Called by rtnetlink.c:rtnl_unlock() */
 void netdev_run_todo(void);
 
+#if 0
 /**
  *     dev_put - release reference to device
  *     @dev: network device
@@ -3214,6 +3215,11 @@ static inline void dev_hold(struct net_device *dev)
 {
        this_cpu_inc(*dev->pcpu_refcnt);
 }
+#else
+void dev_put(struct net_device *dev);
+void dev_hold(struct net_device *dev);
+
+#endif
 
 /* Carrier loss detection, dial on demand. The functions netif_carrier_on
  * and _off may be called from IRQ context, but it is caller
diff --git a/include/trace/events/net.h b/include/trace/events/net.h
index 49cc7c3de252..9ed73dfe9d09 100644
--- a/include/trace/events/net.h
+++ b/include/trace/events/net.h
@@ -236,6 +236,44 @@ DEFINE_EVENT(net_dev_rx_verbose_template, 
netif_rx_ni_entry,
        TP_ARGS(skb)
 );
 
+TRACE_EVENT(dev_put,
+
+       TP_PROTO(struct net_device *dev),
+
+       TP_ARGS(dev),
+
+       TP_STRUCT__entry(
+               __string(       name,           dev->name       )
+               __field(        int,            refcnt )
+       ),
+
+       TP_fast_assign(
+               __assign_str(name, dev->name);
+               __entry->refcnt = netdev_refcnt_read(dev);
+       ),
+
+       TP_printk("dev=%s refcnt %d", __get_str(name), __entry->refcnt)
+);
+
+TRACE_EVENT(dev_hold,
+
+       TP_PROTO(struct net_device *dev),
+
+       TP_ARGS(dev),
+
+       TP_STRUCT__entry(
+               __string(       name,           dev->name       )
+               __field(        int,            refcnt )
+       ),
+
+       TP_fast_assign(
+               __assign_str(name, dev->name);
+               __entry->refcnt = netdev_refcnt_read(dev);
+       ),
+
+       TP_printk("dev=%s refcnt %d", __get_str(name), __entry->refcnt)
+);
+
 #endif /* _TRACE_NET_H */
 
 /* This part must be outside protection */
diff --git a/net/core/dev.c b/net/core/dev.c
index f1284835b8c9..99ac067afd18 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -8117,3 +8117,24 @@ static int __init net_dev_init(void)
 }
 
 subsys_initcall(net_dev_init);
+
+
+void dev_put(struct net_device *dev)
+{
+       trace_dev_put(dev);
+       this_cpu_dec(*dev->pcpu_refcnt);
+}
+EXPORT_SYMBOL(dev_put);
+
+/**
+ *      dev_hold - get reference to device
+ *      @dev: network device
+ *
+ * Hold reference to device to keep it from being freed.
+ */
+void dev_hold(struct net_device *dev)
+{
+       trace_dev_hold(dev);
+       this_cpu_inc(*dev->pcpu_refcnt);
+}
+EXPORT_SYMBOL(dev_hold);
-- 
2.1.4

Reply via email to