In this year's edition of GSoC, there is a project idea for CRIU to add support
for checkpoint/restore of cork-ed UDP sockets. But to add it, the kernel API 
needs
to be extended.
This is what this patch does. It adds a new command, called SIOUDPPENDGET, to 
the 
ioctl syscall regarding UDP sockets, which stores the pending data from the 
write
queue and the destination address in a struct msghdr. The arg for ioctl needs 
to 
be a pointer to a user space struct msghdr. The syscall returns the number of 
writed
bytes, if successful, or error. To retrive the data requires the CAP_NET_ADMIN
capability.

Signed-off-by: Lese Doru Calin <lesedorucali...@gmail.com>
---
 include/linux/socket.h       |   2 +
 include/uapi/linux/sockios.h |   3 +
 net/ipv4/udp.c               | 145 +++++++++++++++++++++++++++++++----
 net/socket.c                 |   4 +-
 4 files changed, 139 insertions(+), 15 deletions(-)

diff --git a/include/linux/socket.h b/include/linux/socket.h
index 54338fac45cb..632ba0ea6709 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -351,6 +351,8 @@ struct ucred {
 #define IPX_TYPE       1
 
 extern int move_addr_to_kernel(void __user *uaddr, int ulen, struct 
sockaddr_storage *kaddr);
+extern int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
+                            void __user *uaddr, int __user *ulen);
 extern int put_cmsg(struct msghdr*, int level, int type, int len, void *data);
 
 struct timespec64;
diff --git a/include/uapi/linux/sockios.h b/include/uapi/linux/sockios.h
index 7d1bccbbef78..3639fa906604 100644
--- a/include/uapi/linux/sockios.h
+++ b/include/uapi/linux/sockios.h
@@ -153,6 +153,9 @@
 #define SIOCSHWTSTAMP  0x89b0          /* set and get config           */
 #define SIOCGHWTSTAMP  0x89b1          /* get config                   */
 
+/* UDP socket calls*/
+#define SIOUDPPENDGET 0x89C0   /* get the pending data from write queue */
+
 /* Device private ioctl calls */
 
 /*
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 32564b350823..f729a5e7f90b 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1620,6 +1620,133 @@ static int first_packet_length(struct sock *sk)
        return res;
 }
 
+static void udp_set_source_addr(struct sock *sk, struct msghdr *msg,
+                               int *addr_len, u32 addr, u16 port)
+{
+       DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name);
+
+       if (sin) {
+               sin->sin_family = AF_INET;
+               sin->sin_port = port;
+               sin->sin_addr.s_addr = addr;
+               memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
+               *addr_len = sizeof(*sin);
+
+               if (cgroup_bpf_enabled)
+                       BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk,
+                                        (struct sockaddr *)sin);
+       }
+}
+
+static int udp_peek_sndq(struct sock *sk, struct msghdr *msg, int off, int len)
+{
+       int copy, copied = 0, err = 0;
+       struct sk_buff *skb;
+
+       skb_queue_walk(&sk->sk_write_queue, skb) {
+               copy = len - copied;
+               if (copy > skb->len - off)
+                       copy = skb->len - off;
+
+               err = skb_copy_datagram_msg(skb, off, msg, copy);
+               if (err)
+                       break;
+
+               copied += copy;
+               if (len <= copied)
+                       break;
+       }
+       return err ?: copied;
+}
+
+static int udp_get_pending_write_queue(struct sock *sk, struct msghdr *msg,
+                                      int *addr_len)
+{
+       int err = 0, off = sizeof(struct udphdr);
+       struct inet_sock *inet = inet_sk(sk);
+       struct udp_sock *up = udp_sk(sk);
+       struct flowi4 *fl4;
+       struct flowi6 *fl6;
+
+       switch (up->pending) {
+       case 0:
+               return -ENODATA;
+       case AF_INET:
+               off += sizeof(struct iphdr);
+               fl4 = &inet->cork.fl.u.ip4;
+               udp_set_source_addr(sk, msg, addr_len,
+                                   fl4->daddr, fl4->fl4_dport);
+               break;
+       case AF_INET6:
+               off += sizeof(struct ipv6hdr);
+               if (msg->msg_name) {
+                       DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6,
+                                        msg->msg_name);
+
+                       fl6 = &inet->cork.fl.u.ip6;
+                       sin6->sin6_family = AF_INET6;
+                       sin6->sin6_port = fl6->fl6_dport;
+                       sin6->sin6_flowinfo = 0;
+                       sin6->sin6_addr = fl6->daddr;
+                       sin6->sin6_scope_id = fl6->flowi6_oif;
+                       *addr_len = sizeof(*sin6);
+
+                       if (cgroup_bpf_enabled)
+                               BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk,
+                                               (struct sockaddr *)sin6);
+               }
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       lock_sock(sk);
+       if (unlikely(!up->pending)) {
+               release_sock(sk);
+               return -EINVAL;
+       }
+       err = udp_peek_sndq(sk, msg, off, msg_data_left(msg));
+       release_sock(sk);
+       return err;
+}
+
+static int prep_msghdr_recv_pending(struct sock *sk, void __user *argp)
+{
+       struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
+       struct user_msghdr __user *msg;
+       struct sockaddr __user *uaddr;
+       struct sockaddr_storage addr;
+       struct msghdr msg_sys;
+       int __user *uaddr_len;
+       int err = 0, len = 0;
+
+       if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
+               return -EPERM;
+
+       if (!argp)
+               return -EINVAL;
+
+       msg = (struct user_msghdr __user *)argp;
+       err = recvmsg_copy_msghdr(&msg_sys, msg, 0, &uaddr, &iov);
+       if (err < 0)
+               return err;
+
+       uaddr_len = &msg->msg_namelen;
+       msg_sys.msg_name = &addr;
+       msg_sys.msg_flags = 0;
+
+       err = udp_get_pending_write_queue(sk, &msg_sys, &len);
+       msg_sys.msg_namelen = len;
+       len = err;
+
+       if (uaddr && err >= 0)
+               err = move_addr_to_user(&addr, msg_sys.msg_namelen,
+                                       uaddr, uaddr_len);
+
+       kfree(iov);
+       return err < 0 ? err : len;
+}
+
 /*
  *     IOCTL requests applicable to the UDP protocol
  */
@@ -1641,6 +1768,9 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
                return put_user(amount, (int __user *)arg);
        }
 
+       case SIOUDPPENDGET:
+               return prep_msghdr_recv_pending(sk, (void __user *)arg);
+
        default:
                return -ENOIOCTLCMD;
        }
@@ -1729,7 +1859,6 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, 
size_t len, int noblock,
                int flags, int *addr_len)
 {
        struct inet_sock *inet = inet_sk(sk);
-       DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name);
        struct sk_buff *skb;
        unsigned int ulen, copied;
        int off, err, peeking = flags & MSG_PEEK;
@@ -1794,18 +1923,8 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, 
size_t len, int noblock,
 
        sock_recv_ts_and_drops(msg, sk, skb);
 
-       /* Copy the address. */
-       if (sin) {
-               sin->sin_family = AF_INET;
-               sin->sin_port = udp_hdr(skb)->source;
-               sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
-               memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
-               *addr_len = sizeof(*sin);
-
-               if (cgroup_bpf_enabled)
-                       BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk,
-                                                       (struct sockaddr *)sin);
-       }
+       udp_set_source_addr(sk, msg, addr_len, ip_hdr(skb)->saddr,
+                           udp_hdr(skb)->source);
 
        if (udp_sk(sk)->gro_enabled)
                udp_cmsg_recv(msg, sk, skb);
diff --git a/net/socket.c b/net/socket.c
index 2dd739fba866..bd25d528c9a0 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -217,8 +217,8 @@ int move_addr_to_kernel(void __user *uaddr, int ulen, 
struct sockaddr_storage *k
  *     specified. Zero is returned for a success.
  */
 
-static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
-                            void __user *uaddr, int __user *ulen)
+int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
+                     void __user *uaddr, int __user *ulen)
 {
        int err;
        int len;
-- 
2.17.1

Reply via email to