This patch adds UDP memory usage accounting in IPv4.

Cc: Satoshi Oshima <[EMAIL PROTECTED]>
signed-off-by: Hideo Aoki <[EMAIL PROTECTED]>
---

 af_inet.c   |   30 +++++++++++++++++++++++++++++-
 ip_output.c |   49 ++++++++++++++++++++++++++++++++++++++++++-------
 udp.c       |   16 ++++++++++++++++
 3 files changed, 87 insertions(+), 8 deletions(-)

diff -pruN net-2.6-udp-take9a2-p3/net/ipv4/af_inet.c 
net-2.6-udp-take9a2-p4/net/ipv4/af_inet.c
--- net-2.6-udp-take9a2-p3/net/ipv4/af_inet.c   2007-11-28 12:11:02.000000000 
-0500
+++ net-2.6-udp-take9a2-p4/net/ipv4/af_inet.c   2007-11-28 12:11:04.000000000 
-0500
@@ -126,13 +126,41 @@ extern void ip_mc_drop_socket(struct soc
 static struct list_head inetsw[SOCK_MAX];
 static DEFINE_SPINLOCK(inetsw_lock);

+/**
+ *     __skb_queue_purge_and_sub_memory_allocated
+ *             - empty a list and subtruct memory allocation counter
+ *     @sk:   sk
+ *     @list: list to empty
+ *     Delete all buffers on an &sk_buff list and subtruct the
+ *     truesize of the sk_buff for memory accounting. Each buffer
+ *     is removed from the list and one reference dropped. This
+ *     function does not take the list lock and the caller must
+ *     hold the relevant locks to use it.
+ */
+static inline void __skb_queue_purge_and_sub_memory_allocated(struct sock *sk,
+                                       struct sk_buff_head *list)
+{
+       struct sk_buff *skb;
+       int purged_skb_size = 0;
+       while ((skb = __skb_dequeue(list)) != NULL) {
+               purged_skb_size += sk_datagram_pages(skb->truesize);
+               kfree_skb(skb);
+       }
+       atomic_sub(purged_skb_size, sk->sk_prot->memory_allocated);
+}
+
 /* New destruction routine */

 void inet_sock_destruct(struct sock *sk)
 {
        struct inet_sock *inet = inet_sk(sk);

-       __skb_queue_purge(&sk->sk_receive_queue);
+       if (sk->sk_prot->memory_allocated && sk->sk_type != SOCK_STREAM)
+               __skb_queue_purge_and_sub_memory_allocated(sk,
+                               &sk->sk_receive_queue);
+       else
+               __skb_queue_purge(&sk->sk_receive_queue);
+
        __skb_queue_purge(&sk->sk_error_queue);

        if (sk->sk_type == SOCK_STREAM && sk->sk_state != TCP_CLOSE) {
diff -pruN net-2.6-udp-take9a2-p3/net/ipv4/ip_output.c 
net-2.6-udp-take9a2-p4/net/ipv4/ip_output.c
--- net-2.6-udp-take9a2-p3/net/ipv4/ip_output.c 2007-11-27 11:11:37.000000000 
-0500
+++ net-2.6-udp-take9a2-p4/net/ipv4/ip_output.c 2007-11-28 12:11:09.000000000 
-0500
@@ -75,6 +75,7 @@
 #include <net/icmp.h>
 #include <net/checksum.h>
 #include <net/inetpeer.h>
+#include <net/udp.h>
 #include <linux/igmp.h>
 #include <linux/netfilter_ipv4.h>
 #include <linux/netfilter_bridge.h>
@@ -707,16 +708,19 @@ static inline int ip_ufo_append_data(str
 {
        struct sk_buff *skb;
        int err;
+       int size = 0;

        /* There is support for UDP fragmentation offload by network
         * device, so create one single skb packet containing complete
         * udp datagram
         */
        if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
-               skb = sock_alloc_send_skb(sk,
-                       hh_len + fragheaderlen + transhdrlen + 20,
-                       (flags & MSG_DONTWAIT), &err);
+               size = hh_len + fragheaderlen + transhdrlen + 20;
+               if (!sk_wmem_schedule(sk, size))
+                       return -ENOBUFS;

+               skb = sock_alloc_send_skb(sk, size, (flags & MSG_DONTWAIT),
+                                         &err);
                if (skb == NULL)
                        return err;

@@ -737,8 +741,12 @@ static inline int ip_ufo_append_data(str
                sk->sk_sndmsg_off = 0;
        }

-       err = skb_append_datato_frags(sk,skb, getfrag, from,
-                              (length - transhdrlen));
+       size = length - transhdrlen;
+       if (!sk_wmem_schedule(sk, size)) {
+               err = -ENOBUFS;
+               goto fail;
+       }
+       err = skb_append_datato_frags(sk, skb, getfrag, from, size);
        if (!err) {
                /* specify the length of each IP datagram fragment*/
                skb_shinfo(skb)->gso_size = mtu - fragheaderlen;
@@ -750,6 +758,7 @@ static inline int ip_ufo_append_data(str
        /* There is not enough support do UFO ,
         * so follow normal path
         */
+fail:
        kfree_skb(skb);
        return err;
 }
@@ -908,6 +917,12 @@ alloc_new_skb:
                        if (datalen == length + fraggap)
                                alloclen += rt->u.dst.trailer_len;

+                       if (!sk_wmem_schedule(sk, alloclen + hh_len + 15 +
+                                             sizeof(struct sk_buff))) {
+                               err = -ENOBUFS;
+                               goto error;
+                       }
+
                        if (transhdrlen) {
                                skb = sock_alloc_send_skb(sk,
                                                alloclen + hh_len + 15,
@@ -1004,6 +1019,10 @@ alloc_new_skb:
                                        frag = &skb_shinfo(skb)->frags[i];
                                }
                        } else if (i < MAX_SKB_FRAGS) {
+                               if (!sk_wmem_schedule(sk, PAGE_SIZE)) {
+                                       err = -ENOBUFS;
+                                       goto error;
+                               }
                                if (atomic_read(&sk->sk_wmem_alloc) + PAGE_SIZE
                                    > 2 * sk->sk_sndbuf) {
                                        err = -ENOBUFS;
@@ -1119,6 +1138,12 @@ ssize_t  ip_append_page(struct sock *sk,
                        fraggap = skb_prev->len - maxfraglen;

                        alloclen = fragheaderlen + hh_len + fraggap + 15;
+
+                       if (!sk_wmem_schedule(sk, alloclen +
+                                             sizeof(struct sk_buff))) {
+                               err = -ENOBUFS;
+                               goto error;
+                       }
                        skb = sock_wmalloc(sk, alloclen, 1, sk->sk_allocation);
                        if (unlikely(!skb)) {
                                err = -ENOBUFS;
@@ -1213,13 +1238,14 @@ int ip_push_pending_frames(struct sock *
        struct iphdr *iph;
        __be16 df = 0;
        __u8 ttl;
-       int err = 0;
+       int err = 0, send_page_size;

        if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
                goto out;
        tail_skb = &(skb_shinfo(skb)->frag_list);

        /* move skb->data to ip header from ext header */
+       send_page_size = sk_datagram_pages(skb->truesize);
        if (skb->data < skb_network_header(skb))
                __skb_pull(skb, skb_network_offset(skb));
        while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
@@ -1229,6 +1255,7 @@ int ip_push_pending_frames(struct sock *
                skb->len += tmp_skb->len;
                skb->data_len += tmp_skb->len;
                skb->truesize += tmp_skb->truesize;
+               send_page_size += sk_datagram_pages(tmp_skb->truesize);
                __sock_put(tmp_skb->sk);
                tmp_skb->destructor = NULL;
                tmp_skb->sk = NULL;
@@ -1284,6 +1311,8 @@ int ip_push_pending_frames(struct sock *
        /* Netfilter gets whole the not fragmented skb. */
        err = NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL,
                      skb->dst->dev, dst_output);
+       if (sk->sk_prot->memory_allocated)
+               atomic_sub(send_page_size, sk->sk_prot->memory_allocated);
        if (err) {
                if (err > 0)
                        err = inet->recverr ? net_xmit_errno(err) : 0;
@@ -1306,9 +1335,15 @@ error:
 void ip_flush_pending_frames(struct sock *sk)
 {
        struct sk_buff *skb;
+       int num_flush_mem = 0;

-       while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL)
+       while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
+               num_flush_mem += sk_datagram_pages(skb->truesize);
                kfree_skb(skb);
+       }
+
+       if (sk->sk_prot->memory_allocated)
+               atomic_sub(num_flush_mem, sk->sk_prot->memory_allocated);

        ip_cork_release(inet_sk(sk));
 }
diff -pruN net-2.6-udp-take9a2-p3/net/ipv4/udp.c 
net-2.6-udp-take9a2-p4/net/ipv4/udp.c
--- net-2.6-udp-take9a2-p3/net/ipv4/udp.c       2007-11-28 12:11:02.000000000 
-0500
+++ net-2.6-udp-take9a2-p4/net/ipv4/udp.c       2007-11-28 12:11:09.000000000 
-0500
@@ -833,6 +833,7 @@ int udp_recvmsg(struct kiocb *iocb, stru
        unsigned int ulen, copied;
        int err;
        int is_udplite = IS_UDPLITE(sk);
+       int truesize;

        /*
         *      Check any passed addresses
@@ -897,14 +898,18 @@ try_again:
                err = ulen;

 out_free:
+       truesize = skb->truesize;
        skb_free_datagram(sk, skb);
+       atomic_sub(sk_datagram_pages(truesize), sk->sk_prot->memory_allocated);
 out:
        return err;

 csum_copy_err:
        UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_udplite);

+       truesize = skb->truesize;
        skb_kill_datagram(sk, skb, flags);
+       atomic_sub(sk_datagram_pages(truesize), sk->sk_prot->memory_allocated);

        if (noblock)
                return -EAGAIN;
@@ -946,6 +951,7 @@ int udp_queue_rcv_skb(struct sock * sk,
 {
        struct udp_sock *up = udp_sk(sk);
        int rc;
+       int scheduled = 0;

        /*
         *      Charge it to the socket, dropping if the queue is full.
@@ -1022,6 +1028,13 @@ int udp_queue_rcv_skb(struct sock * sk,
                        goto drop;
        }

+       if (sk_datagram_rmem_schedule(sk, skb))
+               scheduled = skb->truesize;
+       else {
+               UDP_INC_STATS_BH(UDP_MIB_RCVBUFERRORS, up->pcflag);
+               goto drop;
+       }
+
        if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) {
                /* Note that an ENOMEM error is charged twice */
                if (rc == -ENOMEM)
@@ -1035,6 +1048,9 @@ int udp_queue_rcv_skb(struct sock * sk,
 drop:
        UDP_INC_STATS_BH(UDP_MIB_INERRORS, up->pcflag);
        kfree_skb(skb);
+       if (scheduled)
+               atomic_sub(sk_datagram_pages(scheduled),
+                          sk->sk_prot->memory_allocated);
        return -1;
 }

-- 
Hitachi Computer Products (America) Inc.
-
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to