On Fri, Apr 11, 2025 at 01:32:41PM +0200, Michal Luczaj wrote: > Test redirection logic. All supported and unsupported redirect combinations > are tested for success and failure respectively. > > BPF_MAP_TYPE_SOCKMAP > BPF_MAP_TYPE_SOCKHASH > x > sk_msg-to-egress > sk_msg-to-ingress > sk_skb-to-egress > sk_skb-to-ingress
Could we also add test cases for SK_PASS (and even SK_DROP)? Previously, we encountered deadlocks and incorrect sequence issues when the program returned SK_PASS, so explicit testing for these cases would be helpful. If implemented, this test would fully exercise all code paths and demonstrate a complete example that covers every aspect of sockmap's packet steering and connection management capabilities. > x > AF_INET, SOCK_STREAM > AF_INET6, SOCK_STREAM > AF_INET, SOCK_DGRAM > AF_INET6, SOCK_DGRAM > AF_UNIX, SOCK_STREAM > AF_UNIX, SOCK_DGRAM > AF_VSOCK, SOCK_STREAM > AF_VSOCK, SOCK_SEQPACKET > > Suggested-by: Jakub Sitnicki <ja...@cloudflare.com> > Signed-off-by: Michal Luczaj <m...@rbox.co> > --- > .../selftests/bpf/prog_tests/sockmap_redir.c | 461 > +++++++++++++++++++++ > 1 file changed, 461 insertions(+) > > diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_redir.c > b/tools/testing/selftests/bpf/prog_tests/sockmap_redir.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..df550759c7e50d248322be3655b02b3a21267b4a > --- /dev/null > +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_redir.c > @@ -0,0 +1,461 @@ > +// SPDX-License-Identifier: GPL-2.0 > +/* > + * Test for sockmap/sockhash redirection. > + * > + * BPF_MAP_TYPE_SOCKMAP > + * BPF_MAP_TYPE_SOCKHASH > + * x > + * sk_msg-to-egress > + * sk_msg-to-ingress > + * sk_skb-to-egress > + * sk_skb-to-ingress > + * x > + * AF_INET, SOCK_STREAM > + * AF_INET6, SOCK_STREAM > + * AF_INET, SOCK_DGRAM > + * AF_INET6, SOCK_DGRAM > + * AF_UNIX, SOCK_STREAM > + * AF_UNIX, SOCK_DGRAM > + * AF_VSOCK, SOCK_STREAM > + * AF_VSOCK, SOCK_SEQPACKET > + */ > + > +#include <errno.h> > +#include <error.h> > +#include <sched.h> > +#include <stdio.h> > +#include <unistd.h> > + > +#include <netinet/in.h> > +#include <sys/socket.h> > +#include <sys/types.h> > +#include <sys/un.h> > +#include <linux/string.h> > +#include <linux/vm_sockets.h> > + > +#include <bpf/bpf.h> > +#include <bpf/libbpf.h> > + > +#include "linux/const.h" > +#include "test_progs.h" > +#include "sockmap_helpers.h" > +#include "test_sockmap_listen.skel.h" > + > +/* The meaning of SUPPORTED is "will redirect packet as expected". > + */ > +#define SUPPORTED _BITUL(0) > + > +/* Note on sk_skb-to-ingress ->af_vsock: > + * > + * Peer socket may receive the packet some time after the return from > sendmsg(). > + * In a typical usage scenario, recvmsg() will block until the redirected > packet > + * appears in the destination queue, or timeout if the packet was dropped. By > + * that point, the verdict map has already been updated to reflect what has > + * happened. > + * > + * But sk_skb-to-ingress/af_vsock is an unsupported combination, so no > recvmsg() > + * takes place. Which means we may race the execution of the verdict logic > and > + * read map_verd before it has been updated, i.e. we might observe > + * map_verd[SK_DROP]=0 instead of map_verd[SK_DROP]=1. > + * > + * This confuses the selftest logic: if there was no packet dropped, where's > the > + * packet? So here's a heuristic: on map_verd[SK_DROP]=map_verd[SK_PASS]=0 > + * (which implies the verdict program has not been ran) just re-read the > verdict > + * map again. > + */ > +#define UNSUPPORTED_RACY_VERD _BITUL(1) > + > +enum prog_type { > + SK_MSG_EGRESS, > + SK_MSG_INGRESS, > + SK_SKB_EGRESS, > + SK_SKB_INGRESS, > +}; > + > +enum { > + SEND_INNER = 0, > + SEND_OUTER, > +}; > + > +enum { > + RECV_INNER = 0, > + RECV_OUTER, > +}; > + > +struct maps { > + int in; > + int out; > + int verd; > +}; > + > +struct combo_spec { > + enum prog_type prog_type; > + const char *in, *out; > +}; > + > +struct redir_spec { > + const char *name; > + int idx_send; > + int idx_recv; > + enum prog_type prog_type; > +}; > + > +struct socket_spec { > + int family; > + int sotype; > + int send_flags; > + int in[2]; > + int out[2]; > +}; > + > +static int socket_spec_pairs(struct socket_spec *s) > +{ > + return create_socket_pairs(s->family, s->sotype, > + &s->in[0], &s->out[0], > + &s->in[1], &s->out[1]); > +} > + > +static void socket_spec_close(struct socket_spec *s) > +{ > + xclose(s->in[0]); > + xclose(s->in[1]); > + xclose(s->out[0]); > + xclose(s->out[1]); > +} > + > +static void get_redir_params(struct redir_spec *redir, > + struct test_sockmap_listen *skel, > + int *prog_fd, enum bpf_attach_type *attach_type, > + bool *ingress_flag) > +{ > + enum prog_type type = redir->prog_type; > + struct bpf_program *prog; > + bool sk_msg; > + > + sk_msg = type == SK_MSG_INGRESS || type == SK_MSG_EGRESS; > + prog = sk_msg ? skel->progs.prog_msg_verdict : > skel->progs.prog_skb_verdict; > + > + *prog_fd = bpf_program__fd(prog); > + *attach_type = sk_msg ? BPF_SK_MSG_VERDICT : BPF_SK_SKB_VERDICT; > + *ingress_flag = type == SK_MSG_INGRESS || type == SK_SKB_INGRESS; > +} > + > +static void try_recv(const char *prefix, int fd, int flags, bool > expect_success) > +{ > + ssize_t n; > + char buf; > + > + errno = 0; > + n = recv(fd, &buf, 1, flags); > + if (n < 0 && expect_success) > + FAIL_ERRNO("%s: unexpected failure: retval=%zd", prefix, n); > + if (!n && !expect_success) > + FAIL("%s: expected failure: retval=%zd", prefix, n); > +} > + > +static void handle_unsupported(int sd_send, int sd_peer, int sd_in, int > sd_out, > + int sd_recv, int map_verd, int status) > +{ > + unsigned int drop, pass; > + char recv_buf; > + ssize_t n; > + > +get_verdict: > + if (xbpf_map_lookup_elem(map_verd, &u32(SK_DROP), &drop) || > + xbpf_map_lookup_elem(map_verd, &u32(SK_PASS), &pass)) > + return; > + > + if (pass == 0 && drop == 0 && (status & UNSUPPORTED_RACY_VERD)) { > + sched_yield(); > + goto get_verdict; > + } > + > + if (pass != 0) { > + FAIL("unsupported: wanted verdict pass 0, have %u", pass); > + return; > + } > + > + /* If nothing was dropped, packet should have reached the peer */ > + if (drop == 0) { > + errno = 0; > + n = recv_timeout(sd_peer, &recv_buf, 1, 0, IO_TIMEOUT_SEC); > + if (n != 1) > + FAIL_ERRNO("unsupported: packet missing, retval=%zd", > n); > + } > + > + /* Ensure queues are empty */ > + try_recv("bpf.recv(sd_send)", sd_send, MSG_DONTWAIT, false); > + if (sd_in != sd_send) > + try_recv("bpf.recv(sd_in)", sd_in, MSG_DONTWAIT, false); > + > + try_recv("bpf.recv(sd_out)", sd_out, MSG_DONTWAIT, false); > + if (sd_recv != sd_out) > + try_recv("bpf.recv(sd_recv)", sd_recv, MSG_DONTWAIT, false); > +} > + > +static void test_send_redir_recv(int sd_send, int send_flags, int sd_peer, > + int sd_in, int sd_out, int sd_recv, > + struct maps *maps, int status) > +{ > + unsigned int drop, pass; > + char *send_buf = "ab"; > + char recv_buf = '\0'; > + ssize_t n, len = 1; > + > + /* Zero out the verdict map */ > + if (xbpf_map_update_elem(maps->verd, &u32(SK_DROP), &u32(0), BPF_ANY) || > + xbpf_map_update_elem(maps->verd, &u32(SK_PASS), &u32(0), BPF_ANY)) > + return; > + > + if (xbpf_map_update_elem(maps->in, &u32(0), &u64(sd_in), BPF_NOEXIST)) > + return; > + > + if (xbpf_map_update_elem(maps->out, &u32(0), &u64(sd_out), BPF_NOEXIST)) > + goto del_in; > + > + /* Last byte is OOB data when send_flags has MSG_OOB bit set */ > + if (send_flags & MSG_OOB) > + len++; > + n = send(sd_send, send_buf, len, send_flags); > + if (n >= 0 && n < len) > + FAIL("incomplete send"); > + if (n < 0) { > + /* sk_msg redirect combo not supported? */ > + if (status & SUPPORTED || errno != EACCES) > + FAIL_ERRNO("send"); > + goto out; > + } > + > + if (!(status & SUPPORTED)) { > + handle_unsupported(sd_send, sd_peer, sd_in, sd_out, sd_recv, > + maps->verd, status); > + goto out; > + } > + > + errno = 0; > + n = recv_timeout(sd_recv, &recv_buf, 1, 0, IO_TIMEOUT_SEC); > + if (n != 1) { > + FAIL_ERRNO("recv_timeout()"); > + goto out; > + } > + > + /* Check verdict _after_ recv(); af_vsock may need time to catch up */ > + if (xbpf_map_lookup_elem(maps->verd, &u32(SK_DROP), &drop) || > + xbpf_map_lookup_elem(maps->verd, &u32(SK_PASS), &pass)) > + goto out; > + > + if (drop != 0 || pass != 1) > + FAIL("unexpected verdict drop/pass: wanted 0/1, have %u/%u", > + drop, pass); > + > + if (recv_buf != send_buf[0]) > + FAIL("recv(): payload check, %02x != %02x", recv_buf, > send_buf[0]); > + > + if (send_flags & MSG_OOB) { > + /* Fail reading OOB while in sockmap */ > + try_recv("bpf.recv(sd_out, MSG_OOB)", sd_out, > + MSG_OOB | MSG_DONTWAIT, false); > + > + /* Remove sd_out from sockmap */ > + xbpf_map_delete_elem(maps->out, &u32(0)); > + > + /* Check that OOB was dropped on redirect */ > + try_recv("recv(sd_out, MSG_OOB)", sd_out, > + MSG_OOB | MSG_DONTWAIT, false); > + > + goto del_in; > + } > +out: > + xbpf_map_delete_elem(maps->out, &u32(0)); > +del_in: > + xbpf_map_delete_elem(maps->in, &u32(0)); > +} > + > +static int is_redir_supported(enum prog_type type, const char *in, > + const char *out) > +{ > + /* Matching based on strings returned by socket_kind_to_str(): > + * tcp4, udp4, tcp6, udp6, u_str, u_dgr, v_str, v_seq > + * Plus a wildcard: any > + * Not in use: u_seq, v_dgr > + */ > + struct combo_spec *c, combos[] = { > + /* Send to local: TCP -> any, but vsock */ > + { SK_MSG_INGRESS, "tcp", "tcp" }, > + { SK_MSG_INGRESS, "tcp", "udp" }, > + { SK_MSG_INGRESS, "tcp", "u_str" }, > + { SK_MSG_INGRESS, "tcp", "u_dgr" }, > + > + /* Send to egress: TCP -> TCP */ > + { SK_MSG_EGRESS, "tcp", "tcp" }, > + > + /* Ingress to egress: any -> any */ > + { SK_SKB_EGRESS, "any", "any" }, > + > + /* Ingress to local: any -> any, but vsock */ > + { SK_SKB_INGRESS, "any", "tcp" }, > + { SK_SKB_INGRESS, "any", "udp" }, > + { SK_SKB_INGRESS, "any", "u_str" }, > + { SK_SKB_INGRESS, "any", "u_dgr" }, > + }; > + > + for (c = combos; c < combos + ARRAY_SIZE(combos); c++) { > + if (c->prog_type == type && > + (!strcmp(c->in, "any") || strstarts(in, c->in)) && > + (!strcmp(c->out, "any") || strstarts(out, c->out))) > + return SUPPORTED; > + } > + > + return 0; > +} > + > +static int get_support_status(enum prog_type type, const char *in, > + const char *out) > +{ > + int status = is_redir_supported(type, in, out); > + > + if (type == SK_SKB_INGRESS && strstarts(out, "v_")) > + status |= UNSUPPORTED_RACY_VERD; > + > + return status; > +} > + > +static void test_socket(enum bpf_map_type type, struct redir_spec *redir, > + struct maps *maps, struct socket_spec *s_in, > + struct socket_spec *s_out) > +{ > + int fd_in, fd_out, fd_send, fd_peer, fd_recv, flags, status; > + const char *in_str, *out_str; > + char s[MAX_TEST_NAME]; > + > + fd_in = s_in->in[0]; > + fd_out = s_out->out[0]; > + fd_send = s_in->in[redir->idx_send]; > + fd_peer = s_in->in[redir->idx_send ^ 1]; > + fd_recv = s_out->out[redir->idx_recv]; > + flags = s_in->send_flags; > + > + in_str = socket_kind_to_str(fd_in); > + out_str = socket_kind_to_str(fd_out); > + status = get_support_status(redir->prog_type, in_str, out_str); > + > + snprintf(s, sizeof(s), > + "%-4s %-17s %-5s %s %-5s%6s", > + /* hash sk_skb-to-ingress u_str → v_str (OOB) */ > + type == BPF_MAP_TYPE_SOCKMAP ? "map" : "hash", > + redir->name, > + in_str, > + status & SUPPORTED ? "→" : " ", > + out_str, > + (flags & MSG_OOB) ? "(OOB)" : ""); > + > + if (!test__start_subtest(s)) > + return; > + > + test_send_redir_recv(fd_send, flags, fd_peer, fd_in, fd_out, fd_recv, > + maps, status); > +} > + > +static void test_redir(enum bpf_map_type type, struct redir_spec *redir, > + struct maps *maps) > +{ > + struct socket_spec *s, sockets[] = { > + { AF_INET, SOCK_STREAM }, > + // { AF_INET, SOCK_STREAM, MSG_OOB }, /* Known to be broken */ > + { AF_INET6, SOCK_STREAM }, > + { AF_INET, SOCK_DGRAM }, > + { AF_INET6, SOCK_DGRAM }, > + { AF_UNIX, SOCK_STREAM }, > + { AF_UNIX, SOCK_STREAM, MSG_OOB }, > + { AF_UNIX, SOCK_DGRAM }, > + // { AF_UNIX, SOCK_SEQPACKET}, /* Unsupported > BPF_MAP_UPDATE_ELEM */ > + { AF_VSOCK, SOCK_STREAM }, > + // { AF_VSOCK, SOCK_DGRAM }, /* Unsupported socket() */ > + { AF_VSOCK, SOCK_SEQPACKET }, > + }; > + > + for (s = sockets; s < sockets + ARRAY_SIZE(sockets); s++) > + if (socket_spec_pairs(s)) > + goto out; > + > + /* Intra-proto */ > + for (s = sockets; s < sockets + ARRAY_SIZE(sockets); s++) > + test_socket(type, redir, maps, s, s); > + > + /* Cross-proto */ > + for (int i = 0; i < ARRAY_SIZE(sockets); i++) { > + for (int j = 0; j < ARRAY_SIZE(sockets); j++) { > + struct socket_spec *out = &sockets[j]; > + struct socket_spec *in = &sockets[i]; > + > + /* Skip intra-proto and between variants */ > + if (out->send_flags || > + (in->family == out->family && > + in->sotype == out->sotype)) > + continue; > + > + test_socket(type, redir, maps, in, out); > + } > + } > +out: > + while (--s >= sockets) > + socket_spec_close(s); > +} > + > +static void test_map(enum bpf_map_type type) > +{ > + struct redir_spec *r, redirs[] = { > + { "sk_msg-to-ingress", SEND_INNER, RECV_INNER, SK_MSG_INGRESS }, > + { "sk_msg-to-egress", SEND_INNER, RECV_OUTER, SK_MSG_EGRESS }, > + { "sk_skb-to-egress", SEND_OUTER, RECV_OUTER, SK_SKB_EGRESS }, > + { "sk_skb-to-ingress", SEND_OUTER, RECV_INNER, SK_SKB_INGRESS }, > + }; > + > + for (r = redirs; r < redirs + ARRAY_SIZE(redirs); r++) { > + enum bpf_attach_type attach_type; > + struct test_sockmap_listen *skel; > + struct maps maps; > + int prog_fd; > + > + skel = test_sockmap_listen__open_and_load(); > + if (!skel) { > + FAIL("open_and_load"); > + return; > + } > + > + switch (type) { > + case BPF_MAP_TYPE_SOCKMAP: > + skel->bss->test_sockmap = true; > + maps.out = bpf_map__fd(skel->maps.sock_map); > + break; > + case BPF_MAP_TYPE_SOCKHASH: > + skel->bss->test_sockmap = false; > + maps.out = bpf_map__fd(skel->maps.sock_hash); > + break; > + default: > + FAIL("Unsupported bpf_map_type"); > + return; > + } > + > + maps.in = bpf_map__fd(skel->maps.nop_map); > + maps.verd = bpf_map__fd(skel->maps.verdict_map); > + get_redir_params(r, skel, &prog_fd, &attach_type, > + &skel->bss->test_ingress); > + > + if (xbpf_prog_attach(prog_fd, maps.in, attach_type, 0)) > + return; > + > + test_redir(type, r, &maps); > + > + if (xbpf_prog_detach2(prog_fd, maps.in, attach_type)) > + return; > + > + test_sockmap_listen__destroy(skel); > + } > +} > + > +void serial_test_sockmap_redir(void) > +{ > + test_map(BPF_MAP_TYPE_SOCKMAP); > + test_map(BPF_MAP_TYPE_SOCKHASH); > +} > > -- > 2.49.0 >