When run in server mode, the sample RDS program opens PF_RDS socket, attaches ebpf program to RDS socket which then uses bpf_skb_next_frag helper along with bpf tail calls to inspect skb linear and non-linear data.
To ease testing, RDS client functionality is also added so that users can generate RDS packet. Run server: [root@lab71 bpf]# ./rds_skb -s 192.168.3.71 running server in a loop transport tcp server bound to address: 192.168.3.71 port 4000 server listening on 192.168.3.71 192.168.3.71 received a packet from 192.168.3.71 of len 8192 cmsg len 0, on port 52287 payload contains:30 31 32 33 34 35 36 37 38 39 3a 3b 3c 3d 3e 3f 40 41 42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f 50 51 52 53 54 55 56 57 58 59 5a 5b 5c 5d 5e 5f 60 61 62 63 64 65 66 67 68 69 6a 6b ... server listening on 192.168.3.71 Run client: [root@lab70 bpf]# ./rds_skb -s 192.168.3.71 -c 192.168.3.70 transport tcp client bound to address: 192.168.3.71 port 47437 client sending 8192 byte message from 192.168.3.71 to 192.168.3.70 on port 47437 bpf program output: [root@lab71]# cat /sys/kernel/debug/tracing/trace_pipe <idle>-0 [000] ..s. 218923.839673: 0: 30 31 32 <idle>-0 [000] ..s. 218923.839682: 0: 33 34 35 <idle>-0 [000] ..s. 218923.845133: 0: be bf c0 <idle>-0 [000] ..s. 218923.845135: 0: c1 c2 c3 <idle>-0 [000] ..s. 218923.850581: 0: be bf c0 <idle>-0 [000] ..s. 218923.850582: 0: c1 c2 c3 <idle>-0 [000] ..s. 218923.850582: 0: no more skb frag Note: changing MTU to 9000 help assure that RDS get skb with fragments. Signed-off-by: Tushar Dave <tushar.n.d...@oracle.com> Reviewed-by: Shannon Nelson <shannon.nel...@oracle.com> Reviewed-by: Sowmini Varadhan <sowmini.varad...@oracle.com> --- samples/bpf/Makefile | 3 + samples/bpf/rds_skb_kern.c | 87 +++++++++++++ samples/bpf/rds_skb_user.c | 311 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 401 insertions(+) create mode 100644 samples/bpf/rds_skb_kern.c create mode 100644 samples/bpf/rds_skb_user.c diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 62a99ab..a05c3b2 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -51,6 +51,7 @@ hostprogs-y += cpustat hostprogs-y += xdp_adjust_tail hostprogs-y += xdpsock hostprogs-y += xdp_fwd +hostprogs-y += rds_skb # Libbpf dependencies LIBBPF = $(TOOLS_PATH)/lib/bpf/libbpf.a @@ -105,6 +106,7 @@ cpustat-objs := bpf_load.o cpustat_user.o xdp_adjust_tail-objs := xdp_adjust_tail_user.o xdpsock-objs := bpf_load.o xdpsock_user.o xdp_fwd-objs := bpf_load.o xdp_fwd_user.o +rds_skb-objs := bpf_load.o rds_skb_user.o # Tell kbuild to always build the programs always := $(hostprogs-y) @@ -160,6 +162,7 @@ always += cpustat_kern.o always += xdp_adjust_tail_kern.o always += xdpsock_kern.o always += xdp_fwd_kern.o +always += rds_skb_kern.o HOSTCFLAGS += -I$(objtree)/usr/include HOSTCFLAGS += -I$(srctree)/tools/lib/ diff --git a/samples/bpf/rds_skb_kern.c b/samples/bpf/rds_skb_kern.c new file mode 100644 index 0000000..c8832d4 --- /dev/null +++ b/samples/bpf/rds_skb_kern.c @@ -0,0 +1,87 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/filter.h> +#include <linux/ptrace.h> +#include <linux/version.h> +#include <uapi/linux/bpf.h> +#include <linux/rds.h> +#include "bpf_helpers.h" + + +#define PROG(F) SEC("socket/"__stringify(F)) int bpf_func_##F + +#define bpf_printk(fmt, ...) \ +({ \ + char ____fmt[] = fmt; \ + bpf_trace_printk(____fmt, sizeof(____fmt), \ + ##__VA_ARGS__); \ +}) + + +struct bpf_map_def SEC("maps") jmp_table = { + .type = BPF_MAP_TYPE_PROG_ARRAY, + .key_size = sizeof(u32), + .value_size = sizeof(u32), + .max_entries = 2, +}; + +#define FRAG 1 + +static inline void dump_skb(struct __sk_buff *skb) +{ + void *data = (void *)(long) skb->data_meta; + void *data_end = (void *)(long) skb->data_end; + unsigned char *d; + + if (data + 6 > data_end) + return; + + d = (unsigned char *)data; + bpf_printk("%x %x %x\n", d[0], d[1], d[2]); + bpf_printk("%x %x %x\n", d[3], d[4], d[5]); + return; +} + +static void populate_skb_frags(struct __sk_buff *skb) +{ + int ret; + + ret = bpf_next_skb_frag(skb); + if (ret == -ENODATA) { + bpf_printk("no more skb frag\n"); + return; + } + + bpf_tail_call(skb, &jmp_table, 1); +} + +/* walk skb frag */ + +PROG(FRAG)(struct __sk_buff *skb) +{ + dump_skb(skb); + populate_skb_frags(skb); + return 0; +} + +SEC("socket/0") +int main_prog(struct __sk_buff *skb) +{ + void *data = (void *)(long) skb->data; + void *data_end = (void *)(long) skb->data_end; + int ret; + unsigned char *d; + + if (data + 6 > data_end) { + bpf_printk("out\n"); + return 0; + } + + d = (unsigned char *)data; + bpf_printk("%x %x %x\n", d[0], d[1], d[2]); + bpf_printk("%x %x %x\n", d[3], d[4], d[5]); + + populate_skb_frags(skb); + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/rds_skb_user.c b/samples/bpf/rds_skb_user.c new file mode 100644 index 0000000..9f73dc3 --- /dev/null +++ b/samples/bpf/rds_skb_user.c @@ -0,0 +1,311 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <arpa/inet.h> +#include <assert.h> +#include "bpf_load.h" +#include <getopt.h> +#include <errno.h> +#include <netinet/in.h> +#include <limits.h> +#include <linux/sockios.h> +#include <linux/rds.h> +#include <linux/errqueue.h> +#include <linux/bpf.h> +#include <strings.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <string.h> +#include <stdlib.h> +#include <stdio.h> +#include <unistd.h> + +#define TESTPORT 4000 +#define BUFSIZE 8192 + +static const char *trans2str(int trans) +{ + switch (trans) { + case RDS_TRANS_TCP: + return ("tcp"); + case RDS_TRANS_NONE: + return ("none"); + default: + return ("unknown"); + } +} + +static int gettransport(int sock) +{ + int err; + char val; + socklen_t len = sizeof(int); + + err = getsockopt(sock, SOL_RDS, SO_RDS_TRANSPORT, + (char *)&val, &len); + if (err < 0) { + fprintf(stderr, "%s: getsockopt %s\n", + __func__, strerror(errno)); + return err; + } + return (int)val; +} + +static int settransport(int sock, int transport) +{ + int err; + + err = setsockopt(sock, SOL_RDS, SO_RDS_TRANSPORT, + (char *)&transport, sizeof(transport)); + if (err < 0) { + fprintf(stderr, "could not set transport %s, %s\n", + trans2str(transport), strerror(errno)); + } + return err; +} + +static void print_sock_local_info(int fd, char *str, struct sockaddr_in *ret) +{ + socklen_t sin_size = sizeof(struct sockaddr_in); + struct sockaddr_in sin; + int err; + + err = getsockname(fd, (struct sockaddr *)&sin, &sin_size); + if (err < 0) { + fprintf(stderr, "%s getsockname %s\n", + __func__, strerror(errno)); + return; + } + printf("%s address: %s port %d\n", + (str ? str : ""), inet_ntoa(sin.sin_addr), ntohs(sin.sin_port)); + + if (ret != NULL) + *ret = sin; +} + +static void server(char *address, in_port_t port) +{ + struct sockaddr_in sin, din; + struct msghdr msg; + struct iovec *iov; + int rc, sock; + char *buf; + + buf = calloc(BUFSIZE, sizeof(char)); + if (!buf) { + fprintf(stderr, "%s: calloc %s\n", __func__, strerror(errno)); + return; + } + + sock = socket(PF_RDS, SOCK_SEQPACKET, 0); + if (sock < 0) { + fprintf(stderr, "%s: socket %s\n", __func__, strerror(errno)); + goto out; + } + if (settransport(sock, RDS_TRANS_TCP) < 0) + goto out; + + printf("transport %s\n", trans2str(gettransport(sock))); + + memset(&sin, 0, sizeof(sin)); + sin.sin_family = AF_INET; + sin.sin_addr.s_addr = inet_addr(address); + sin.sin_port = htons(port); + + rc = bind(sock, (struct sockaddr *)&sin, sizeof(sin)); + if (rc < 0) { + fprintf(stderr, "%s: bind %s\n", __func__, strerror(errno)); + goto out; + } + + /* attach eBPF program */ + assert(setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, &prog_fd[1], + sizeof(prog_fd[0])) == 0); + + print_sock_local_info(sock, "server bound to", NULL); + + iov = calloc(1, sizeof(struct iovec)); + if (!iov) { + fprintf(stderr, "%s: calloc %s\n", __func__, strerror(errno)); + goto out; + } + + while (1) { + memset(buf, 0, BUFSIZE); + iov[0].iov_base = buf; + iov[0].iov_len = BUFSIZE; + + memset(&msg, 0, sizeof(msg)); + msg.msg_name = &din; + msg.msg_namelen = sizeof(din); + msg.msg_iov = iov; + msg.msg_iovlen = 1; + + printf("server listening on %s\n", inet_ntoa(sin.sin_addr)); + + rc = recvmsg(sock, &msg, 0); + if (rc < 0) { + fprintf(stderr, "%s: recvmsg %s\n", + __func__, strerror(errno)); + break; + } + + printf("%s received a packet from %s of len %d cmsg len %d, on port %d\n", + inet_ntoa(sin.sin_addr), + inet_ntoa(din.sin_addr), + (uint32_t) iov[0].iov_len, + (uint32_t) msg.msg_controllen, + ntohs(din.sin_port)); + + { + int i; + + printf("payload contains:"); + for (i = 0; i < 60; i++) + printf("%x ", buf[i]); + printf("...\n"); + } + } + free(iov); +out: + free(buf); +} + +static void create_message(char *buf) +{ + unsigned int i; + + for (i = 0; i < BUFSIZE; i++) { + buf[i] = i + 0x30; + } +} + +static int build_rds_packet(struct msghdr *msg, char *buf) +{ + struct iovec *iov; + + iov = calloc(1, sizeof(struct iovec)); + if (!iov) { + fprintf(stderr, "%s: calloc %s\n", __func__, strerror(errno)); + return -1; + } + + msg->msg_iov = iov; + msg->msg_iovlen = 1; + + iov[0].iov_base = buf; + iov[0].iov_len = BUFSIZE * sizeof(char); + + return 0; +} + +static void client(char *localaddr, char *remoteaddr, in_port_t server_port) +{ + struct sockaddr_in sin, din; + struct msghdr msg; + int rc, sock; + char *buf; + + buf = calloc(BUFSIZE, sizeof(char)); + if (!buf) { + fprintf(stderr, "%s: calloc %s\n", __func__, strerror(errno)); + return; + } + + create_message(buf); + + sock = socket(PF_RDS, SOCK_SEQPACKET, 0); + if (sock < 0) { + fprintf(stderr, "%s: socket %s\n", __func__, strerror(errno)); + goto out; + } + + if (settransport(sock, RDS_TRANS_TCP) < 0) + goto out; + + printf("transport %s\n", trans2str(gettransport(sock))); + + memset(&sin, 0, sizeof(sin)); + sin.sin_family = AF_INET; + sin.sin_addr.s_addr = inet_addr(localaddr); + sin.sin_port = 0; + + rc = bind(sock, (struct sockaddr *)&sin, sizeof(sin)); + if (rc < 0) { + fprintf(stderr, "%s: bind %s\n", __func__, strerror(errno)); + goto out; + } + print_sock_local_info(sock, "client bound to", &sin); + + memset(&msg, 0, sizeof(msg)); + msg.msg_name = &din; + msg.msg_namelen = sizeof(din); + + memset(&din, 0, sizeof(din)); + din.sin_family = AF_INET; + din.sin_addr.s_addr = inet_addr(remoteaddr); + din.sin_port = htons(server_port); + + rc = build_rds_packet(&msg, buf); + if (rc < 0) + goto out; + + printf("client sending %d byte message from %s to %s on port %d\n", + (uint32_t) msg.msg_iov->iov_len, localaddr, + remoteaddr, ntohs(sin.sin_port)); + + rc = sendmsg(sock, &msg, 0); + if (rc < 0) + fprintf(stderr, "%s: sendmsg %s\n", __func__, strerror(errno)); + + if (msg.msg_control) + free(msg.msg_control); + if (msg.msg_iov) + free(msg.msg_iov); +out: + free(buf); + + return; +} + +static void usage(char *progname) +{ + fprintf(stderr, "Usage %s [-s srvaddr] [-c clientaddr]\n", progname); +} + +int main(int argc, char **argv) +{ + in_port_t server_port = TESTPORT; + char *serveraddr = NULL; + char *clientaddr = NULL; + char filename[256]; + int opt; + + while ((opt = getopt(argc, argv, "s:c:")) != -1) { + switch (opt) { + case 's': + serveraddr = optarg; + break; + case 'c': + clientaddr = optarg; + break; + default: + usage(argv[0]); + return 1; + } + } + + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + + if (load_bpf_file(filename)) { + fprintf(stderr, "Error: load_bpf_file %s", bpf_log_buf); + return 1; + } + + if (serveraddr && !clientaddr) { + printf("running server in a loop\n"); + server(serveraddr, server_port); + } else if (serveraddr && clientaddr) { + client(clientaddr, serveraddr, server_port); + } + + return 0; +} -- 1.8.3.1