On Mon, Sep 23, 2024 at 01:08 PM GMT, David Laight wrote:
> From: Tiago Lam <tiago...@cloudflare.com>

[...]

>> To limit its usage, a reverse socket lookup is performed to check if the
>> configured egress source address and/or port have any ingress sk_lookup
>> match. If it does, traffic is allowed to proceed, otherwise it falls
>> back to the regular egress path.
>
> Is that really useful/necessary?

We've been asking ourselves the same question during Plumbers with
Martin.

Unprivileges processes can already source UDP traffic from (almost) any
IP & port by binding a socket to the desired source port and passing
IP_PKTINFO. So perhaps having a reverse socket lookup is an overkill.

We should probably respect net.ipv4.ip_local_reserved_ports and
net.ipv4.ip_unprivileged_port_start system settings, though, or check
for relevant caps.

Open question if it is acceptable to disregard exclusive UDP port
ownership by sockets binding to a wildcard address without SO_REUSEADDR?

[...]





> The check (but not the commit message) implies that some 'bpf thingy'
> also needs to be enabled.
> Any check would need to include the test that the program sending the packet
> has the ability to send a packet through the ingress socket.
> Additionally a check for the sending process having (IIRC) CAP_NET_ADMIN
> (which would let the process send the message by other means) would save the
> slow path.
>
> The code we have sends a lot of UDP RTP (typically 160 bytes of audio every 
> 20ms).
> There is actually no reason for there to be a valid matching ingress path.
> (That code would benefit from being able to bind a lot of ports to the same
> UDP socket.)
>
>       David
>
>> 
>> Suggested-by: Jakub Sitnicki <ja...@cloudflare.com>
>> Signed-off-by: Tiago Lam <tiago...@cloudflare.com>
>> ---
>>  net/ipv6/datagram.c | 79 
>> +++++++++++++++++++++++++++++++++++++++++++++++++++++
>>  net/ipv6/udp.c      |  8 ++++--
>>  2 files changed, 85 insertions(+), 2 deletions(-)
>> 
>> diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
>> index fff78496803d..369c64a478ec 100644
>> --- a/net/ipv6/datagram.c
>> +++ b/net/ipv6/datagram.c
>> @@ -756,6 +756,29 @@ void ip6_datagram_recv_ctl(struct sock *sk, struct 
>> msghdr *msg,
>>  }
>>  EXPORT_SYMBOL_GPL(ip6_datagram_recv_ctl);
>> 
>> +static inline bool reverse_sk_lookup(struct flowi6 *fl6, struct sock *sk,
>> +                                 struct in6_addr *saddr, __be16 sport)
>> +{
>> +    if (static_branch_unlikely(&bpf_sk_lookup_enabled) &&
>> +        (saddr && sport) &&
>> +        (ipv6_addr_cmp(&sk->sk_v6_rcv_saddr, saddr) ||
>> +        inet_sk(sk)->inet_sport != sport)) {
>> +            struct sock *sk_egress;
>> +
>> +            bpf_sk_lookup_run_v6(sock_net(sk), IPPROTO_UDP, &fl6->daddr,
>> +                                 fl6->fl6_dport, saddr, ntohs(sport), 0,
>> +                                 &sk_egress);
>> +            if (!IS_ERR_OR_NULL(sk_egress) && sk_egress == sk)
>> +                    return true;
>> +
>> +            net_info_ratelimited("No reverse socket lookup match for local 
>> addr %pI6:%d remote addr
>> %pI6:%d\n",
>> +                                 &saddr, ntohs(sport), &fl6->daddr,
>> +                                 ntohs(fl6->fl6_dport));
>> +    }
>> +
>> +    return false;
>> +}
>> +
>>  int ip6_datagram_send_ctl(struct net *net, struct sock *sk,
>>                        struct msghdr *msg, struct flowi6 *fl6,
>>                        struct ipcm6_cookie *ipc6)
>> @@ -844,7 +867,63 @@ int ip6_datagram_send_ctl(struct net *net, struct sock 
>> *sk,
>> 
>>                      break;
>>                  }
>> +            case IPV6_ORIGDSTADDR:
>> +                    {
>> +                    struct sockaddr_in6 *sockaddr_in;
>> +                    struct net_device *dev = NULL;
>> +
>> +                    if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct 
>> sockaddr_in6))) {
>> +                            err = -EINVAL;
>> +                            goto exit_f;
>> +                    }
>> +
>> +                    sockaddr_in = (struct sockaddr_in6 *)CMSG_DATA(cmsg);
>> +
>> +                    addr_type = __ipv6_addr_type(&sockaddr_in->sin6_addr);
>> +
>> +                    if (addr_type & IPV6_ADDR_LINKLOCAL)
>> +                            return -EINVAL;
>> +
>> +                    /* If we're egressing with a different source address
>> +                     * and/or port, we perform a reverse socket lookup. The
>> +                     * rationale behind this is that we can allow return
>> +                     * UDP traffic that has ingressed through sk_lookup to
>> +                     * also egress correctly. In case the reverse lookup
>> +                     * fails, we continue with the normal path.
>> +                     *
>> +                     * The lookup is performed if either source address
>> +                     * and/or port changed, and neither is "0".
>> +                     */
>> +                    if (reverse_sk_lookup(fl6, sk, &sockaddr_in->sin6_addr,
>> +                                          sockaddr_in->sin6_port)) {
>> +                            /* Override the source port and address to use
>> +                             * with the one we got in cmsg and bail early.
>> +                             */
>> +                            fl6->saddr = sockaddr_in->sin6_addr;
>> +                            fl6->fl6_sport = sockaddr_in->sin6_port;
>> +                            break;
>> +                    }
>> 
>> +                    if (addr_type != IPV6_ADDR_ANY) {
>> +                            int strict = __ipv6_addr_src_scope(addr_type) 
>> <= IPV6_ADDR_SCOPE_LINKLOCAL;
>> +
>> +                            if (!ipv6_can_nonlocal_bind(net, inet_sk(sk)) &&
>> +                                !ipv6_chk_addr_and_flags(net,
>> +                                                         
>> &sockaddr_in->sin6_addr,
>> +                                                         dev, !strict, 0,
>> +                                                         IFA_F_TENTATIVE) &&
>> +                                !ipv6_chk_acast_addr_src(net, dev,
>> +                                                         
>> &sockaddr_in->sin6_addr))
>> +                                    err = -EINVAL;
>> +                            else
>> +                                    fl6->saddr = sockaddr_in->sin6_addr;
>> +                    }
>> +
>> +                    if (err)
>> +                            goto exit_f;
>> +
>> +                    break;
>> +                    }
>>              case IPV6_FLOWINFO:
>>                      if (cmsg->cmsg_len < CMSG_LEN(4)) {
>>                              err = -EINVAL;
>> diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
>> index 6602a2e9cdb5..6121cbb71ad3 100644
>> --- a/net/ipv6/udp.c
>> +++ b/net/ipv6/udp.c
>> @@ -1476,6 +1476,12 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr 
>> *msg, size_t len)
>> 
>>      fl6->flowi6_uid = sk->sk_uid;
>> 
>> +    /* We use fl6's daddr and fl6_sport in the reverse sk_lookup done
>> +     * within ip6_datagram_send_ctl() now.
>> +     */
>> +    fl6->daddr = *daddr;
>> +    fl6->fl6_sport = inet->inet_sport;
>> +
>>      if (msg->msg_controllen) {
>>              opt = &opt_space;
>>              memset(opt, 0, sizeof(struct ipv6_txoptions));
>> @@ -1511,10 +1517,8 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr 
>> *msg, size_t len)
>> 
>>      fl6->flowi6_proto = sk->sk_protocol;
>>      fl6->flowi6_mark = ipc6.sockc.mark;
>> -    fl6->daddr = *daddr;
>>      if (ipv6_addr_any(&fl6->saddr) && !ipv6_addr_any(&np->saddr))
>>              fl6->saddr = np->saddr;
>> -    fl6->fl6_sport = inet->inet_sport;
>> 
>>      if (cgroup_bpf_enabled(CGROUP_UDP6_SENDMSG) && !connected) {
>>              err = BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk,
>> 
>> --
>> 2.34.1
>> 
>
> -
> Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 
> 1PT, UK
> Registration No: 1397386 (Wales)

Reply via email to