In this year's edition of GSoC, there is a project idea for CRIU to add support for checkpoint/restore of cork-ed UDP sockets. But to add it, the kernel API needs to be extended. This is what this patch does. It adds a new command, called SIOUDPPENDGET, to the ioctl syscall regarding UDP sockets, which stores the pending data from the write queue and the destination address in a struct msghdr. The arg for ioctl needs to be a pointer to a user space struct msghdr. The syscall returns the number of writed bytes, if successful, or error. To retrive the data requires the CAP_NET_ADMIN capability.
Signed-off-by: Lese Doru Calin <lesedorucali...@gmail.com> --- include/linux/socket.h | 2 + include/uapi/linux/sockios.h | 3 + net/ipv4/udp.c | 145 +++++++++++++++++++++++++++++++---- net/socket.c | 4 +- 4 files changed, 139 insertions(+), 15 deletions(-) diff --git a/include/linux/socket.h b/include/linux/socket.h index 54338fac45cb..632ba0ea6709 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -351,6 +351,8 @@ struct ucred { #define IPX_TYPE 1 extern int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr); +extern int move_addr_to_user(struct sockaddr_storage *kaddr, int klen, + void __user *uaddr, int __user *ulen); extern int put_cmsg(struct msghdr*, int level, int type, int len, void *data); struct timespec64; diff --git a/include/uapi/linux/sockios.h b/include/uapi/linux/sockios.h index 7d1bccbbef78..3639fa906604 100644 --- a/include/uapi/linux/sockios.h +++ b/include/uapi/linux/sockios.h @@ -153,6 +153,9 @@ #define SIOCSHWTSTAMP 0x89b0 /* set and get config */ #define SIOCGHWTSTAMP 0x89b1 /* get config */ +/* UDP socket calls*/ +#define SIOUDPPENDGET 0x89C0 /* get the pending data from write queue */ + /* Device private ioctl calls */ /* diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 32564b350823..f729a5e7f90b 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1620,6 +1620,133 @@ static int first_packet_length(struct sock *sk) return res; } +static void udp_set_source_addr(struct sock *sk, struct msghdr *msg, + int *addr_len, u32 addr, u16 port) +{ + DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name); + + if (sin) { + sin->sin_family = AF_INET; + sin->sin_port = port; + sin->sin_addr.s_addr = addr; + memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); + *addr_len = sizeof(*sin); + + if (cgroup_bpf_enabled) + BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, + (struct sockaddr *)sin); + } +} + +static int udp_peek_sndq(struct sock *sk, struct msghdr *msg, int off, int len) +{ + int copy, copied = 0, err = 0; + struct sk_buff *skb; + + skb_queue_walk(&sk->sk_write_queue, skb) { + copy = len - copied; + if (copy > skb->len - off) + copy = skb->len - off; + + err = skb_copy_datagram_msg(skb, off, msg, copy); + if (err) + break; + + copied += copy; + if (len <= copied) + break; + } + return err ?: copied; +} + +static int udp_get_pending_write_queue(struct sock *sk, struct msghdr *msg, + int *addr_len) +{ + int err = 0, off = sizeof(struct udphdr); + struct inet_sock *inet = inet_sk(sk); + struct udp_sock *up = udp_sk(sk); + struct flowi4 *fl4; + struct flowi6 *fl6; + + switch (up->pending) { + case 0: + return -ENODATA; + case AF_INET: + off += sizeof(struct iphdr); + fl4 = &inet->cork.fl.u.ip4; + udp_set_source_addr(sk, msg, addr_len, + fl4->daddr, fl4->fl4_dport); + break; + case AF_INET6: + off += sizeof(struct ipv6hdr); + if (msg->msg_name) { + DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, + msg->msg_name); + + fl6 = &inet->cork.fl.u.ip6; + sin6->sin6_family = AF_INET6; + sin6->sin6_port = fl6->fl6_dport; + sin6->sin6_flowinfo = 0; + sin6->sin6_addr = fl6->daddr; + sin6->sin6_scope_id = fl6->flowi6_oif; + *addr_len = sizeof(*sin6); + + if (cgroup_bpf_enabled) + BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, + (struct sockaddr *)sin6); + } + break; + default: + return -EINVAL; + } + + lock_sock(sk); + if (unlikely(!up->pending)) { + release_sock(sk); + return -EINVAL; + } + err = udp_peek_sndq(sk, msg, off, msg_data_left(msg)); + release_sock(sk); + return err; +} + +static int prep_msghdr_recv_pending(struct sock *sk, void __user *argp) +{ + struct iovec iovstack[UIO_FASTIOV], *iov = iovstack; + struct user_msghdr __user *msg; + struct sockaddr __user *uaddr; + struct sockaddr_storage addr; + struct msghdr msg_sys; + int __user *uaddr_len; + int err = 0, len = 0; + + if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) + return -EPERM; + + if (!argp) + return -EINVAL; + + msg = (struct user_msghdr __user *)argp; + err = recvmsg_copy_msghdr(&msg_sys, msg, 0, &uaddr, &iov); + if (err < 0) + return err; + + uaddr_len = &msg->msg_namelen; + msg_sys.msg_name = &addr; + msg_sys.msg_flags = 0; + + err = udp_get_pending_write_queue(sk, &msg_sys, &len); + msg_sys.msg_namelen = len; + len = err; + + if (uaddr && err >= 0) + err = move_addr_to_user(&addr, msg_sys.msg_namelen, + uaddr, uaddr_len); + + kfree(iov); + return err < 0 ? err : len; +} + /* * IOCTL requests applicable to the UDP protocol */ @@ -1641,6 +1768,9 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg) return put_user(amount, (int __user *)arg); } + case SIOUDPPENDGET: + return prep_msghdr_recv_pending(sk, (void __user *)arg); + default: return -ENOIOCTLCMD; } @@ -1729,7 +1859,6 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, int flags, int *addr_len) { struct inet_sock *inet = inet_sk(sk); - DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name); struct sk_buff *skb; unsigned int ulen, copied; int off, err, peeking = flags & MSG_PEEK; @@ -1794,18 +1923,8 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, sock_recv_ts_and_drops(msg, sk, skb); - /* Copy the address. */ - if (sin) { - sin->sin_family = AF_INET; - sin->sin_port = udp_hdr(skb)->source; - sin->sin_addr.s_addr = ip_hdr(skb)->saddr; - memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); - *addr_len = sizeof(*sin); - - if (cgroup_bpf_enabled) - BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, - (struct sockaddr *)sin); - } + udp_set_source_addr(sk, msg, addr_len, ip_hdr(skb)->saddr, + udp_hdr(skb)->source); if (udp_sk(sk)->gro_enabled) udp_cmsg_recv(msg, sk, skb); diff --git a/net/socket.c b/net/socket.c index 2dd739fba866..bd25d528c9a0 100644 --- a/net/socket.c +++ b/net/socket.c @@ -217,8 +217,8 @@ int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *k * specified. Zero is returned for a success. */ -static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen, - void __user *uaddr, int __user *ulen) +int move_addr_to_user(struct sockaddr_storage *kaddr, int klen, + void __user *uaddr, int __user *ulen) { int err; int len; -- 2.17.1