On Mon, 12 Feb 2018 18:44:00 -0800 Tonghao Zhang <xiangxia.m....@gmail.com> wrote:
> Sometimes, we want to know how many tcp sockets are in use > different _net_ namespaces. It's a key resource metric. With > this patch, we can get it via /proc/net/sockstat. > > The 'alloc' show in /proc/net/sockstat is the total tcp > sockets in the kernel. This patch moves it to namespace, > via adding a counter because the previous counter is used > in proto(e.g tcp, udp and sctp) memory management. > > Signed-off-by: Tonghao Zhang <xiangxia.m....@gmail.com> > --- > include/net/netns/ipv4.h | 3 +++ > include/net/tcp.h | 2 ++ > net/ipv4/proc.c | 2 +- > net/ipv4/tcp.c | 2 ++ > net/ipv4/tcp_ipv4.c | 34 ++++++++++++++++++++++++++++++++++ > net/ipv4/tcp_minisocks.c | 3 +++ > 6 files changed, 45 insertions(+), 1 deletion(-) > > diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h > index 44668c2..85e91bd 100644 > --- a/include/net/netns/ipv4.h > +++ b/include/net/netns/ipv4.h > @@ -68,6 +68,9 @@ struct netns_ipv4 { > > struct inet_peer_base *peers; > struct sock * __percpu *tcp_sk; > +#ifdef CONFIG_PROC_FS > + int __percpu *tcp_sock_allocated; > +#endif > struct netns_frags frags; > #ifdef CONFIG_NETFILTER > struct xt_table *iptable_filter; > diff --git a/include/net/tcp.h b/include/net/tcp.h > index 093e967..4b24b6e 100644 > --- a/include/net/tcp.h > +++ b/include/net/tcp.h > @@ -1780,6 +1780,8 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, > int tcp_gro_complete(struct sk_buff *skb); > > void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr); > +void tcp_sock_allocated_add(struct net *net, int val); > +int tcp_sock_allocated_get(struct net *net); > > static inline u32 tcp_notsent_lowat(const struct tcp_sock *tp) > { > diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c > index dc5edc8..8a147f7 100644 > --- a/net/ipv4/proc.c > +++ b/net/ipv4/proc.c > @@ -58,7 +58,7 @@ static int sockstat_seq_show(struct seq_file *seq, void *v) > int orphans, sockets; > > orphans = percpu_counter_sum_positive(&tcp_orphan_count); > - sockets = proto_sockets_allocated_sum_positive(&tcp_prot); > + sockets = tcp_sock_allocated_get(net); > > socket_seq_show(seq); > seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %ld\n", > diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c > index 874c931..77fe4a5 100644 > --- a/net/ipv4/tcp.c > +++ b/net/ipv4/tcp.c > @@ -453,6 +453,8 @@ void tcp_init_sock(struct sock *sk) > sk->sk_rcvbuf = sock_net(sk)->ipv4.sysctl_tcp_rmem[1]; > > sk_sockets_allocated_inc(sk); > + if (likely(sk->sk_net_refcnt)) > + tcp_sock_allocated_add(sock_net(sk), 1); > } > EXPORT_SYMBOL(tcp_init_sock); > > diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c > index 95738aa..a7bd0c4 100644 > --- a/net/ipv4/tcp_ipv4.c > +++ b/net/ipv4/tcp_ipv4.c > @@ -1928,6 +1928,8 @@ void tcp_v4_destroy_sock(struct sock *sk) > tcp_saved_syn_free(tp); > > sk_sockets_allocated_dec(sk); > + if (likely(sk->sk_net_refcnt)) > + tcp_sock_allocated_add(sock_net(sk), -1); > } > EXPORT_SYMBOL(tcp_v4_destroy_sock); > > @@ -2446,6 +2448,28 @@ struct proto tcp_prot = { > }; > EXPORT_SYMBOL(tcp_prot); > > +void tcp_sock_allocated_add(struct net *net, int val) > +{ > +#ifdef CONFIG_PROC_FS > + this_cpu_add(*net->ipv4.tcp_sock_allocated, val); > +#endif > +} > +EXPORT_SYMBOL(tcp_sock_allocated_add); > + > +int tcp_sock_allocated_get(struct net *net) > +{ > +#ifdef CONFIG_PROC_FS > + int cpu, res = 0; > + > + for_each_possible_cpu(cpu) > + res += *per_cpu_ptr(net->ipv4.tcp_sock_allocated, cpu); > + return res; > +#else > + return 0; > +#endif > +} > +EXPORT_SYMBOL(tcp_sock_allocated_get); > + > static void __net_exit tcp_sk_exit(struct net *net) > { > int cpu; > @@ -2455,6 +2479,10 @@ static void __net_exit tcp_sk_exit(struct net *net) > for_each_possible_cpu(cpu) > inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.tcp_sk, cpu)); > free_percpu(net->ipv4.tcp_sk); > + > +#ifdef CONFIG_PROC_FS > + free_percpu(net->ipv4.tcp_sock_allocated); > +#endif > } > > static int __net_init tcp_sk_init(struct net *net) > @@ -2465,6 +2493,12 @@ static int __net_init tcp_sk_init(struct net *net) > if (!net->ipv4.tcp_sk) > return -ENOMEM; > > +#ifdef CONFIG_PROC_FS > + net->ipv4.tcp_sock_allocated = alloc_percpu(int); > + if (!net->ipv4.tcp_sock_allocated) > + goto fail; > +#endif > + > for_each_possible_cpu(cpu) { > struct sock *sk; > > diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c > index a8384b0c..573fe43 100644 > --- a/net/ipv4/tcp_minisocks.c > +++ b/net/ipv4/tcp_minisocks.c > @@ -559,6 +559,9 @@ struct sock *tcp_create_openreq_child(const struct sock > *sk, > newtp->rack.reo_wnd_persist = 0; > newtp->rack.dsack_seen = 0; > > + if (likely(newsk->sk_net_refcnt)) > + tcp_sock_allocated_add(sock_net(newsk), 1); > + > __TCP_INC_STATS(sock_net(sk), TCP_MIB_PASSIVEOPENS); > } > return newsk; This makes sense to me only if you get rid of the old tcp_sockets_allocated and the proto field entry for .sockets_allocated. In that case you are replacing a global counter with a per-namespace counter and the impact should be minimal.