Basic sock operations that udp code can use with its own
memory accounting schema. No functional change is introduced
in the existing APIs.

Acked-by: Hannes Frederic Sowa <han...@stressinduktion.org>
Signed-off-by: Paolo Abeni <pab...@redhat.com>
---
 include/linux/skbuff.h |  2 +-
 include/net/sock.h     |  5 +++
 net/core/datagram.c    | 36 +++++++++++--------
 net/core/skbuff.c      |  3 +-
 net/core/sock.c        | 96 +++++++++++++++++++++++++++++++++-----------------
 5 files changed, 94 insertions(+), 48 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index cfb7219..49c489d 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3016,7 +3016,7 @@ static inline void skb_frag_list_init(struct sk_buff *skb)
 #define skb_walk_frags(skb, iter)      \
        for (iter = skb_shinfo(skb)->frag_list; iter; iter = iter->next)
 
-
+void sock_rmem_free(struct sk_buff *skb);
 int __skb_wait_for_more_packets(struct sock *sk, int *err, long *timeo_p,
                                const struct sk_buff *skb);
 struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned flags,
diff --git a/include/net/sock.h b/include/net/sock.h
index c797c57..a37362c 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1274,7 +1274,9 @@ static inline struct inode *SOCK_INODE(struct socket 
*socket)
 /*
  * Functions for memory accounting
  */
+int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind);
 int __sk_mem_schedule(struct sock *sk, int size, int kind);
+void __sk_mem_reduce_allocated(struct sock *sk, int amount);
 void __sk_mem_reclaim(struct sock *sk, int amount);
 
 #define SK_MEM_QUANTUM ((int)PAGE_SIZE)
@@ -1940,6 +1942,9 @@ void sk_reset_timer(struct sock *sk, struct timer_list 
*timer,
 
 void sk_stop_timer(struct sock *sk, struct timer_list *timer);
 
+int __sk_queue_drop_skb(struct sock *sk, struct sk_buff *skb,
+                       unsigned int flags);
+void __sock_enqueue_skb(struct sock *sk, struct sk_buff *skb);
 int __sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
 int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
 
diff --git a/net/core/datagram.c b/net/core/datagram.c
index b7de71f..bfb973a 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -323,6 +323,27 @@ void __skb_free_datagram_locked(struct sock *sk, struct 
sk_buff *skb, int len)
 }
 EXPORT_SYMBOL(__skb_free_datagram_locked);
 
+int __sk_queue_drop_skb(struct sock *sk, struct sk_buff *skb,
+                       unsigned int flags)
+{
+       int err = 0;
+
+       if (flags & MSG_PEEK) {
+               err = -ENOENT;
+               spin_lock_bh(&sk->sk_receive_queue.lock);
+               if (skb == skb_peek(&sk->sk_receive_queue)) {
+                       __skb_unlink(skb, &sk->sk_receive_queue);
+                       atomic_dec(&skb->users);
+                       err = 0;
+               }
+               spin_unlock_bh(&sk->sk_receive_queue.lock);
+       }
+
+       atomic_inc(&sk->sk_drops);
+       return err;
+}
+EXPORT_SYMBOL(__sk_queue_drop_skb);
+
 /**
  *     skb_kill_datagram - Free a datagram skbuff forcibly
  *     @sk: socket
@@ -346,23 +367,10 @@ EXPORT_SYMBOL(__skb_free_datagram_locked);
 
 int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags)
 {
-       int err = 0;
-
-       if (flags & MSG_PEEK) {
-               err = -ENOENT;
-               spin_lock_bh(&sk->sk_receive_queue.lock);
-               if (skb == skb_peek(&sk->sk_receive_queue)) {
-                       __skb_unlink(skb, &sk->sk_receive_queue);
-                       atomic_dec(&skb->users);
-                       err = 0;
-               }
-               spin_unlock_bh(&sk->sk_receive_queue.lock);
-       }
+       int err = __sk_queue_drop_skb(sk, skb, flags);
 
        kfree_skb(skb);
-       atomic_inc(&sk->sk_drops);
        sk_mem_reclaim_partial(sk);
-
        return err;
 }
 EXPORT_SYMBOL(skb_kill_datagram);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 3864b4b6..4dce605 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3657,12 +3657,13 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, 
struct sk_buff **trailer)
 }
 EXPORT_SYMBOL_GPL(skb_cow_data);
 
-static void sock_rmem_free(struct sk_buff *skb)
+void sock_rmem_free(struct sk_buff *skb)
 {
        struct sock *sk = skb->sk;
 
        atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
 }
+EXPORT_SYMBOL_GPL(sock_rmem_free);
 
 /*
  * Note: We dont mem charge error packets (no sk_forward_alloc changes)
diff --git a/net/core/sock.c b/net/core/sock.c
index 51a7304..752308d 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -405,24 +405,12 @@ static void sock_disable_timestamp(struct sock *sk, 
unsigned long flags)
 }
 
 
-int __sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+void __sock_enqueue_skb(struct sock *sk, struct sk_buff *skb)
 {
        unsigned long flags;
        struct sk_buff_head *list = &sk->sk_receive_queue;
 
-       if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) {
-               atomic_inc(&sk->sk_drops);
-               trace_sock_rcvqueue_full(sk, skb);
-               return -ENOMEM;
-       }
-
-       if (!sk_rmem_schedule(sk, skb, skb->truesize)) {
-               atomic_inc(&sk->sk_drops);
-               return -ENOBUFS;
-       }
-
        skb->dev = NULL;
-       skb_set_owner_r(skb, sk);
 
        /* we escape from rcu protected region, make sure we dont leak
         * a norefcounted dst
@@ -436,6 +424,24 @@ int __sock_queue_rcv_skb(struct sock *sk, struct sk_buff 
*skb)
 
        if (!sock_flag(sk, SOCK_DEAD))
                sk->sk_data_ready(sk);
+}
+EXPORT_SYMBOL(__sock_enqueue_skb);
+
+int __sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+{
+       if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) {
+               atomic_inc(&sk->sk_drops);
+               trace_sock_rcvqueue_full(sk, skb);
+               return -ENOMEM;
+       }
+
+       if (!sk_rmem_schedule(sk, skb, skb->truesize)) {
+               atomic_inc(&sk->sk_drops);
+               return -ENOBUFS;
+       }
+
+       skb_set_owner_r(skb, sk);
+       __sock_enqueue_skb(sk, skb);
        return 0;
 }
 EXPORT_SYMBOL(__sock_queue_rcv_skb);
@@ -2088,24 +2094,18 @@ int sk_wait_data(struct sock *sk, long *timeo, const 
struct sk_buff *skb)
 EXPORT_SYMBOL(sk_wait_data);
 
 /**
- *     __sk_mem_schedule - increase sk_forward_alloc and memory_allocated
+ *     __sk_mem_raise_allocated - increase memory_allocated
  *     @sk: socket
  *     @size: memory size to allocate
+ *     @amt: pages to allocate
  *     @kind: allocation type
  *
- *     If kind is SK_MEM_SEND, it means wmem allocation. Otherwise it means
- *     rmem allocation. This function assumes that protocols which have
- *     memory_pressure use sk_wmem_queued as write buffer accounting.
+ *     Similar to __sk_mem_schedule(), but does not update sk_forward_alloc
  */
-int __sk_mem_schedule(struct sock *sk, int size, int kind)
+int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind)
 {
        struct proto *prot = sk->sk_prot;
-       int amt = sk_mem_pages(size);
-       long allocated;
-
-       sk->sk_forward_alloc += amt * SK_MEM_QUANTUM;
-
-       allocated = sk_memory_allocated_add(sk, amt);
+       long allocated = sk_memory_allocated_add(sk, amt);
 
        if (mem_cgroup_sockets_enabled && sk->sk_memcg &&
            !mem_cgroup_charge_skmem(sk->sk_memcg, amt))
@@ -2166,9 +2166,6 @@ suppress_allocation:
 
        trace_sock_exceed_buf_limit(sk, prot, allocated);
 
-       /* Alas. Undo changes. */
-       sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM;
-
        sk_memory_allocated_sub(sk, amt);
 
        if (mem_cgroup_sockets_enabled && sk->sk_memcg)
@@ -2176,18 +2173,40 @@ suppress_allocation:
 
        return 0;
 }
+EXPORT_SYMBOL(__sk_mem_raise_allocated);
+
+/**
+ *     __sk_mem_schedule - increase sk_forward_alloc and memory_allocated
+ *     @sk: socket
+ *     @size: memory size to allocate
+ *     @kind: allocation type
+ *
+ *     If kind is SK_MEM_SEND, it means wmem allocation. Otherwise it means
+ *     rmem allocation. This function assumes that protocols which have
+ *     memory_pressure use sk_wmem_queued as write buffer accounting.
+ */
+int __sk_mem_schedule(struct sock *sk, int size, int kind)
+{
+       int ret, amt = sk_mem_pages(size);
+
+       sk->sk_forward_alloc += amt << SK_MEM_QUANTUM_SHIFT;
+       ret = __sk_mem_raise_allocated(sk, size, amt, kind);
+       if (!ret)
+               sk->sk_forward_alloc -= amt << SK_MEM_QUANTUM_SHIFT;
+       return ret;
+}
 EXPORT_SYMBOL(__sk_mem_schedule);
 
 /**
- *     __sk_mem_reclaim - reclaim memory_allocated
+ *     __sk_mem_reduce_allocated - reclaim memory_allocated
  *     @sk: socket
- *     @amount: number of bytes (rounded down to a SK_MEM_QUANTUM multiple)
+ *     @amount: number of quanta
+ *
+ *     Similar to __sk_mem_reclaim(), but does not update sk_forward_alloc
  */
-void __sk_mem_reclaim(struct sock *sk, int amount)
+void __sk_mem_reduce_allocated(struct sock *sk, int amount)
 {
-       amount >>= SK_MEM_QUANTUM_SHIFT;
        sk_memory_allocated_sub(sk, amount);
-       sk->sk_forward_alloc -= amount << SK_MEM_QUANTUM_SHIFT;
 
        if (mem_cgroup_sockets_enabled && sk->sk_memcg)
                mem_cgroup_uncharge_skmem(sk->sk_memcg, amount);
@@ -2196,6 +2215,19 @@ void __sk_mem_reclaim(struct sock *sk, int amount)
            (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)))
                sk_leave_memory_pressure(sk);
 }
+EXPORT_SYMBOL(__sk_mem_reduce_allocated);
+
+/**
+ *     __sk_mem_reclaim - reclaim sk_forward_alloc and memory_allocated
+ *     @sk: socket
+ *     @amount: number of bytes (rounded down to a SK_MEM_QUANTUM multiple)
+ */
+void __sk_mem_reclaim(struct sock *sk, int amount)
+{
+       amount >>= SK_MEM_QUANTUM_SHIFT;
+       sk->sk_forward_alloc -= amount << SK_MEM_QUANTUM_SHIFT;
+       __sk_mem_reduce_allocated(sk, amount);
+}
 EXPORT_SYMBOL(__sk_mem_reclaim);
 
 int sk_set_peek_off(struct sock *sk, int val)
-- 
1.8.3.1

Reply via email to