Add --splice option to bench_sockmap that uses splice(2) instead of read(2) in the consumer path. A global pipe is created once during setup and reused across iterations to avoid per-call pipe creation overhead.
When --splice is enabled, the consumer splices data from the socket into the pipe, then reads from the pipe into the user buffer. The socket is set to O_NONBLOCK to prevent tcp_splice_read() from blocking indefinitely, as it only checks sock->file->f_flags for non-blocking mode, ignoring SPLICE_F_NONBLOCK. Also increase SO_RCVBUF to 16MB to avoid sk_psock_backlog being throttled by the default sk_rcvbuf limit, and add --verify option to optionally enable data correctness checking (disabled by default for benchmark accuracy). Benchmark results with rx-verdict-ingress mode (loopback, 8 CPUs): read(2): ~4292 MB/s splice(2) + zero-copy: ~4270 MB/s splice(2) + always-copy: ~2770 MB/s Zero-copy splice achieves near-parity with read(2), while the always-copy fallback is ~35% slower. Usage: # Steer softirqs to CPU 7 to avoid contending with the producer CPU echo 80 > /sys/class/net/lo/queues/rx-0/rps_cpus # Raise the receive buffer ceiling so the benchmark can set 16MB rcvbuf sysctl -w net.core.rmem_max=16777216 # Run the benchmark ./bench sockmap --rx-verdict-ingress --splice -c 2 -p 1 -a -d 30 Signed-off-by: Jiayuan Chen <[email protected]> --- .../selftests/bpf/benchs/bench_sockmap.c | 57 ++++++++++++++++++- 1 file changed, 56 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/benchs/bench_sockmap.c b/tools/testing/selftests/bpf/benchs/bench_sockmap.c index cfc072aa7fff..ffcf5ad8cafa 100644 --- a/tools/testing/selftests/bpf/benchs/bench_sockmap.c +++ b/tools/testing/selftests/bpf/benchs/bench_sockmap.c @@ -7,6 +7,9 @@ #include <sys/sendfile.h> #include <arpa/inet.h> #include <fcntl.h> +#include <unistd.h> +#include <sched.h> +#include <sys/syscall.h> #include <argp.h> #include "bench.h" #include "bench_sockmap_prog.skel.h" @@ -46,6 +49,8 @@ enum SOCKMAP_ARG_FLAG { ARG_CTL_RX_STRP, ARG_CONSUMER_DELAY_TIME, ARG_PRODUCER_DURATION, + ARG_CTL_SPLICE, + ARG_CTL_VERIFY, }; #define TXMODE_NORMAL() \ @@ -110,6 +115,9 @@ static struct socmap_ctx { int delay_consumer; int prod_run_time; int strp_size; + bool use_splice; + bool verify; + int pipefd[2]; } ctx = { .prod_send = 0, .user_read = 0, @@ -119,6 +127,9 @@ static struct socmap_ctx { .delay_consumer = 0, .prod_run_time = 0, .strp_size = 0, + .use_splice = false, + .verify = false, + .pipefd = {-1, -1}, }; static void bench_sockmap_prog_destroy(void) @@ -130,6 +141,11 @@ static void bench_sockmap_prog_destroy(void) close(ctx.fds[i]); } + if (ctx.pipefd[0] >= 0) + close(ctx.pipefd[0]); + if (ctx.pipefd[1] >= 0) + close(ctx.pipefd[1]); + bench_sockmap_prog__destroy(ctx.skel); } @@ -320,6 +336,7 @@ static int setup_tx_sockmap(void) static void setup(void) { + int rcvbuf = 16 * 1024 * 1024; int err; ctx.skel = bench_sockmap_prog__open_and_load(); @@ -350,6 +367,18 @@ static void setup(void) goto err; } + if (ctx.use_splice) { + if (pipe(ctx.pipefd)) { + fprintf(stderr, "pipe error:%d\n", errno); + goto err; + } + } + + setsockopt(ctx.c2, SOL_SOCKET, SO_RCVBUF, &rcvbuf, sizeof(rcvbuf)); + + if (ctx.use_splice) + set_non_block(ctx.c2, true); + return; err: @@ -368,6 +397,8 @@ static void measure(struct bench_res *res) static void verify_data(int *check_pos, char *buf, int rcv) { + if (!ctx.verify) + return; for (int i = 0 ; i < rcv; i++) { if (buf[i] != snd_data[(*check_pos) % DATA_REPEAT_SIZE]) { fprintf(stderr, "verify data fail"); @@ -388,6 +419,9 @@ static void *consumer(void *input) char *buf = malloc(recv_buf_size); int delay_read = ctx.delay_consumer; + printf("cons[%d] started, tid=%ld cpu=%d\n", + tid, syscall(SYS_gettid), sched_getcpu()); + if (!buf) { fprintf(stderr, "fail to init read buffer"); return NULL; @@ -419,7 +453,15 @@ static void *consumer(void *input) } /* read real endpoint by consumer 0 */ atomic_inc(&ctx.read_calls); - rcv = read(ctx.c2, buf, recv_buf_size); + if (ctx.use_splice) { + rcv = splice(ctx.c2, NULL, ctx.pipefd[1], + NULL, recv_buf_size, + SPLICE_F_NONBLOCK); + if (rcv > 0) + rcv = read(ctx.pipefd[0], buf, rcv); + } else { + rcv = read(ctx.c2, buf, recv_buf_size); + } if (rcv < 0 && errno != EAGAIN) { fprintf(stderr, "%s fail to read c2 %d\n", __func__, errno); return NULL; @@ -440,6 +482,9 @@ static void *producer(void *input) int target; FILE *file; + printf("prod started, tid=%ld cpu=%d\n", + syscall(SYS_gettid), sched_getcpu()); + file = tmpfile(); if (!file) { fprintf(stderr, "create file for sendfile"); @@ -554,6 +599,10 @@ static const struct argp_option opts[] = { "delay consumer start"}, { "producer-duration", ARG_PRODUCER_DURATION, "SEC", 0, "producer duration"}, + { "splice", ARG_CTL_SPLICE, NULL, 0, + "use splice instead of read for consumer"}, + { "verify", ARG_CTL_VERIFY, NULL, 0, + "verify received data correctness"}, {}, }; @@ -572,6 +621,12 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) case ARG_CTL_RX_STRP: ctx.strp_size = strtol(arg, NULL, 10); break; + case ARG_CTL_SPLICE: + ctx.use_splice = true; + break; + case ARG_CTL_VERIFY: + ctx.verify = true; + break; default: return ARGP_ERR_UNKNOWN; } -- 2.43.0

