Following ipv4 stack changes, run a BPF program attached to netns before
looking up a listening socket. Program can return a listening socket to use
as result of socket lookup, fail the lookup, or take no action.

Suggested-by: Marek Majkowski <ma...@cloudflare.com>
Signed-off-by: Jakub Sitnicki <ja...@cloudflare.com>
---

Notes:
    v3:
    - Use a static_key to minimize the hook overhead when not used. (Alexei)
    - Don't copy struct in6_addr when populating BPF prog context. (Martin)
    - Adapt for running an array of attached programs. (Alexei)
    - Adapt for optionally skipping reuseport selection. (Martin)

 include/linux/filter.h      | 41 +++++++++++++++++++++++++++++++++++++
 net/ipv6/inet6_hashtables.c | 35 +++++++++++++++++++++++++++++++
 2 files changed, 76 insertions(+)

diff --git a/include/linux/filter.h b/include/linux/filter.h
index ff7721d862c2..e7462f178213 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -1336,4 +1336,45 @@ static inline bool bpf_sk_lookup_run_v4(struct net *net, 
int protocol,
        return do_reuseport;
 }
 
+#if IS_ENABLED(CONFIG_IPV6)
+static inline bool bpf_sk_lookup_run_v6(struct net *net, int protocol,
+                                       const struct in6_addr *saddr,
+                                       const __be16 sport,
+                                       const struct in6_addr *daddr,
+                                       const u16 dport,
+                                       struct sock **psk)
+{
+       struct bpf_prog_array *run_array;
+       bool do_reuseport = false;
+       struct sock *sk = NULL;
+
+       rcu_read_lock();
+       run_array = rcu_dereference(net->bpf.run_array[NETNS_BPF_SK_LOOKUP]);
+       if (run_array) {
+               const struct bpf_sk_lookup_kern ctx = {
+                       .family         = AF_INET6,
+                       .protocol       = protocol,
+                       .v6.saddr       = saddr,
+                       .v6.daddr       = daddr,
+                       .sport          = sport,
+                       .dport          = dport,
+               };
+               u32 ret;
+
+               ret = BPF_PROG_SK_LOOKUP_RUN_ARRAY(run_array, &ctx,
+                                                  BPF_PROG_RUN);
+               if (ret & (1U << BPF_REDIRECT)) {
+                       sk = ctx.selected_sk;
+                       do_reuseport = sk && !ctx.no_reuseport;
+               } else if (ret & (1U << BPF_DROP)) {
+                       sk = ERR_PTR(-ECONNREFUSED);
+               }
+       }
+       rcu_read_unlock();
+
+       *psk = sk;
+       return do_reuseport;
+}
+#endif /* IS_ENABLED(CONFIG_IPV6) */
+
 #endif /* __LINUX_FILTER_H__ */
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 03942eef8ab6..b63583d2aa76 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -21,6 +21,8 @@
 #include <net/ip.h>
 #include <net/sock_reuseport.h>
 
+extern struct inet_hashinfo tcp_hashinfo;
+
 u32 inet6_ehashfn(const struct net *net,
                  const struct in6_addr *laddr, const u16 lport,
                  const struct in6_addr *faddr, const __be16 fport)
@@ -159,6 +161,31 @@ static struct sock *inet6_lhash2_lookup(struct net *net,
        return result;
 }
 
+static inline struct sock *inet6_lookup_run_bpf(struct net *net,
+                                               struct inet_hashinfo *hashinfo,
+                                               struct sk_buff *skb, int doff,
+                                               const struct in6_addr *saddr,
+                                               const __be16 sport,
+                                               const struct in6_addr *daddr,
+                                               const u16 hnum)
+{
+       struct sock *sk, *reuse_sk;
+       bool do_reuseport;
+
+       if (hashinfo != &tcp_hashinfo)
+               return NULL; /* only TCP is supported */
+
+       do_reuseport = bpf_sk_lookup_run_v6(net, IPPROTO_TCP,
+                                           saddr, sport, daddr, hnum, &sk);
+       if (do_reuseport) {
+               reuse_sk = lookup_reuseport(net, sk, skb, doff,
+                                           saddr, sport, daddr, hnum);
+               if (reuse_sk)
+                       sk = reuse_sk;
+       }
+       return sk;
+}
+
 struct sock *inet6_lookup_listener(struct net *net,
                struct inet_hashinfo *hashinfo,
                struct sk_buff *skb, int doff,
@@ -170,6 +197,14 @@ struct sock *inet6_lookup_listener(struct net *net,
        struct sock *result = NULL;
        unsigned int hash2;
 
+       /* Lookup redirect from BPF */
+       if (static_branch_unlikely(&bpf_sk_lookup_enabled)) {
+               result = inet6_lookup_run_bpf(net, hashinfo, skb, doff,
+                                             saddr, sport, daddr, hnum);
+               if (result)
+                       goto done;
+       }
+
        hash2 = ipv6_portaddr_hash(net, daddr, hnum);
        ilb2 = inet_lhash2_bucket(hashinfo, hash2);
 
-- 
2.25.4

Reply via email to