In order to check if egress traffic should be allowed through, we run a
reverse socket lookup (i.e. normal socket lookup with the src/dst
addresses and ports reversed) to check if the corresponding ingress
traffic is allowed in.  Thus, if there's a sk_lookup reverse call
returns a socket that matches the egress socket, we also let the egress
traffic through - following the principle of, allowing return traffic to
proceed if ingress traffic is allowed in.  The reverse lookup is only
performed in case an sk_lookup ebpf program is attached and the source
address and/or port for the return traffic have been modified.

The src address and port can be modified by using ancilliary messages.
Up until now, it was possible to specify a different source address to
sendmsg by providing it in an IP_PKTINFO anciliarry message, but there's
no way to change the source port. This patch also extends the ancilliary
messages supported by sendmsg to support the IP_ORIGDSTADDR ancilliary
message, reusing the same cmsg and struct used in recvmsg - which
already supports specifying a port.

Suggested-by: Jakub Sitnicki <ja...@cloudflare.com>
Signed-off-by: Tiago Lam <tiago...@cloudflare.com>
---
 include/net/ip.h       |  1 +
 net/ipv4/ip_sockglue.c | 11 +++++++++++
 net/ipv4/udp.c         | 33 ++++++++++++++++++++++++++++++++-
 3 files changed, 44 insertions(+), 1 deletion(-)

diff --git a/include/net/ip.h b/include/net/ip.h
index c5606cadb1a5..e5753abd7247 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -75,6 +75,7 @@ static inline unsigned int ip_hdrlen(const struct sk_buff 
*skb)
 struct ipcm_cookie {
        struct sockcm_cookie    sockc;
        __be32                  addr;
+       __be16                  port;
        int                     oif;
        struct ip_options_rcu   *opt;
        __u8                    protocol;
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index cf377377b52d..6e55bd25b5f7 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -297,6 +297,17 @@ int ip_cmsg_send(struct sock *sk, struct msghdr *msg, 
struct ipcm_cookie *ipc,
                        ipc->addr = info->ipi_spec_dst.s_addr;
                        break;
                }
+               case IP_ORIGDSTADDR:
+               {
+                       struct sockaddr_in *dst_addr;
+
+                       if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct 
sockaddr_in)))
+                               return -EINVAL;
+                       dst_addr = (struct sockaddr_in *)CMSG_DATA(cmsg);
+                       ipc->port = dst_addr->sin_port;
+                       ipc->addr = dst_addr->sin_addr.s_addr;
+                       break;
+               }
                case IP_TTL:
                        if (cmsg->cmsg_len != CMSG_LEN(sizeof(int)))
                                return -EINVAL;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 49c622e743e8..b9dc0a88b0c6 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1060,6 +1060,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, 
size_t len)
        DECLARE_SOCKADDR(struct sockaddr_in *, usin, msg->msg_name);
        struct flowi4 fl4_stack;
        struct flowi4 *fl4;
+       __u8 flow_flags = inet_sk_flowi_flags(sk);
        int ulen = len;
        struct ipcm_cookie ipc;
        struct rtable *rt = NULL;
@@ -1179,6 +1180,37 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, 
size_t len)
                }
        }
 
+       /* If we're egressing with a different source address and/or port, we
+        * perform a reverse socket lookup.  The rationale behind this is that 
we
+        * can allow return UDP traffic that has ingressed through sk_lookup to
+        * also egress correctly. In case this the reverse lookup fails.
+        *
+        * The lookup is performed if either source address and/or port 
changed, and
+        * neither is "0".
+        */
+       if (static_branch_unlikely(&bpf_sk_lookup_enabled) &&
+           !connected &&
+           (ipc.port && ipc.addr) &&
+           (inet->inet_saddr != ipc.addr || inet->inet_sport != ipc.port)) {
+               struct sock *sk_egress;
+
+               bpf_sk_lookup_run_v4(sock_net(sk), IPPROTO_UDP,
+                                    daddr, dport, ipc.addr, ntohs(ipc.port), 
1, &sk_egress);
+               if (IS_ERR_OR_NULL(sk_egress) ||
+                   atomic64_read(&sk_egress->sk_cookie) != 
atomic64_read(&sk->sk_cookie)) {
+                       net_info_ratelimited("No reverse socket lookup match 
for local addr %pI4:%d remote addr %pI4:%d\n",
+                                            &ipc.addr, ntohs(ipc.port), 
&daddr, ntohs(dport));
+               } else {
+                       /* Override the source port to use with the one we got 
in cmsg,
+                        * and tell routing to let us use a non-local address. 
Otherwise
+                        * route lookups will fail with non-local source 
address when
+                        * IP_TRANSPARENT isn't set.
+                        */
+                       inet->inet_sport = ipc.port;
+                       flow_flags |= FLOWI_FLAG_ANYSRC;
+               }
+       }
+
        saddr = ipc.addr;
        ipc.addr = faddr = daddr;
 
@@ -1223,7 +1255,6 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, 
size_t len)
 
        if (!rt) {
                struct net *net = sock_net(sk);
-               __u8 flow_flags = inet_sk_flowi_flags(sk);
 
                fl4 = &fl4_stack;
 

-- 
2.34.1


Reply via email to