Add --splice option to bench_sockmap that uses splice(2) instead of
read(2) in the consumer path. A global pipe is created once during
setup and reused across iterations to avoid per-call pipe creation
overhead.

When --splice is enabled, the consumer splices data from the socket
into the pipe, then reads from the pipe into the user buffer. The
socket is set to O_NONBLOCK to prevent tcp_splice_read() from
blocking indefinitely, as it only checks sock->file->f_flags for
non-blocking mode, ignoring SPLICE_F_NONBLOCK.

Also increase SO_RCVBUF to 16MB to avoid sk_psock_backlog being
throttled by the default sk_rcvbuf limit, and add --verify option
to optionally enable data correctness checking (disabled by default
for benchmark accuracy).

Benchmark results with rx-verdict-ingress mode (loopback, 8 CPUs):

  read(2):                  ~4292 MB/s
  splice(2) + zero-copy:    ~4270 MB/s
  splice(2) + always-copy:  ~2770 MB/s

Zero-copy splice achieves near-parity with read(2), while the
always-copy fallback is ~35% slower.

Usage:
  # Steer softirqs to CPU 7 to avoid contending with the producer CPU
  echo 80 > /sys/class/net/lo/queues/rx-0/rps_cpus
  # Raise the receive buffer ceiling so the benchmark can set 16MB rcvbuf
  sysctl -w net.core.rmem_max=16777216
  # Run the benchmark
  ./bench sockmap --rx-verdict-ingress --splice -c 2 -p 1 -a -d 30

Signed-off-by: Jiayuan Chen <[email protected]>
---
 .../selftests/bpf/benchs/bench_sockmap.c      | 57 ++++++++++++++++++-
 1 file changed, 56 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/bpf/benchs/bench_sockmap.c 
b/tools/testing/selftests/bpf/benchs/bench_sockmap.c
index cfc072aa7fff..ffcf5ad8cafa 100644
--- a/tools/testing/selftests/bpf/benchs/bench_sockmap.c
+++ b/tools/testing/selftests/bpf/benchs/bench_sockmap.c
@@ -7,6 +7,9 @@
 #include <sys/sendfile.h>
 #include <arpa/inet.h>
 #include <fcntl.h>
+#include <unistd.h>
+#include <sched.h>
+#include <sys/syscall.h>
 #include <argp.h>
 #include "bench.h"
 #include "bench_sockmap_prog.skel.h"
@@ -46,6 +49,8 @@ enum SOCKMAP_ARG_FLAG {
        ARG_CTL_RX_STRP,
        ARG_CONSUMER_DELAY_TIME,
        ARG_PRODUCER_DURATION,
+       ARG_CTL_SPLICE,
+       ARG_CTL_VERIFY,
 };
 
 #define TXMODE_NORMAL()                                \
@@ -110,6 +115,9 @@ static struct socmap_ctx {
        int             delay_consumer;
        int             prod_run_time;
        int             strp_size;
+       bool            use_splice;
+       bool            verify;
+       int             pipefd[2];
 } ctx = {
        .prod_send      = 0,
        .user_read      = 0,
@@ -119,6 +127,9 @@ static struct socmap_ctx {
        .delay_consumer = 0,
        .prod_run_time  = 0,
        .strp_size      = 0,
+       .use_splice     = false,
+       .verify         = false,
+       .pipefd         = {-1, -1},
 };
 
 static void bench_sockmap_prog_destroy(void)
@@ -130,6 +141,11 @@ static void bench_sockmap_prog_destroy(void)
                        close(ctx.fds[i]);
        }
 
+       if (ctx.pipefd[0] >= 0)
+               close(ctx.pipefd[0]);
+       if (ctx.pipefd[1] >= 0)
+               close(ctx.pipefd[1]);
+
        bench_sockmap_prog__destroy(ctx.skel);
 }
 
@@ -320,6 +336,7 @@ static int setup_tx_sockmap(void)
 
 static void setup(void)
 {
+       int rcvbuf = 16 * 1024 * 1024;
        int err;
 
        ctx.skel = bench_sockmap_prog__open_and_load();
@@ -350,6 +367,18 @@ static void setup(void)
                goto err;
        }
 
+       if (ctx.use_splice) {
+               if (pipe(ctx.pipefd)) {
+                       fprintf(stderr, "pipe error:%d\n", errno);
+                       goto err;
+               }
+       }
+
+       setsockopt(ctx.c2, SOL_SOCKET, SO_RCVBUF, &rcvbuf, sizeof(rcvbuf));
+
+       if (ctx.use_splice)
+               set_non_block(ctx.c2, true);
+
        return;
 
 err:
@@ -368,6 +397,8 @@ static void measure(struct bench_res *res)
 
 static void verify_data(int *check_pos, char *buf, int rcv)
 {
+       if (!ctx.verify)
+               return;
        for (int i = 0 ; i < rcv; i++) {
                if (buf[i] != snd_data[(*check_pos) % DATA_REPEAT_SIZE]) {
                        fprintf(stderr, "verify data fail");
@@ -388,6 +419,9 @@ static void *consumer(void *input)
        char *buf = malloc(recv_buf_size);
        int delay_read = ctx.delay_consumer;
 
+       printf("cons[%d] started, tid=%ld cpu=%d\n",
+              tid, syscall(SYS_gettid), sched_getcpu());
+
        if (!buf) {
                fprintf(stderr, "fail to init read buffer");
                return NULL;
@@ -419,7 +453,15 @@ static void *consumer(void *input)
                        }
                        /* read real endpoint by consumer 0 */
                        atomic_inc(&ctx.read_calls);
-                       rcv = read(ctx.c2, buf, recv_buf_size);
+                       if (ctx.use_splice) {
+                               rcv = splice(ctx.c2, NULL, ctx.pipefd[1],
+                                            NULL, recv_buf_size,
+                                            SPLICE_F_NONBLOCK);
+                               if (rcv > 0)
+                                       rcv = read(ctx.pipefd[0], buf, rcv);
+                       } else {
+                               rcv = read(ctx.c2, buf, recv_buf_size);
+                       }
                        if (rcv < 0 && errno != EAGAIN) {
                                fprintf(stderr, "%s fail to read c2 %d\n", 
__func__, errno);
                                return NULL;
@@ -440,6 +482,9 @@ static void *producer(void *input)
        int target;
        FILE *file;
 
+       printf("prod started, tid=%ld cpu=%d\n",
+              syscall(SYS_gettid), sched_getcpu());
+
        file = tmpfile();
        if (!file) {
                fprintf(stderr, "create file for sendfile");
@@ -554,6 +599,10 @@ static const struct argp_option opts[] = {
                "delay consumer start"},
        { "producer-duration", ARG_PRODUCER_DURATION, "SEC", 0,
                "producer duration"},
+       { "splice", ARG_CTL_SPLICE, NULL, 0,
+               "use splice instead of read for consumer"},
+       { "verify", ARG_CTL_VERIFY, NULL, 0,
+               "verify received data correctness"},
        {},
 };
 
@@ -572,6 +621,12 @@ static error_t parse_arg(int key, char *arg, struct 
argp_state *state)
        case ARG_CTL_RX_STRP:
                ctx.strp_size = strtol(arg, NULL, 10);
                break;
+       case ARG_CTL_SPLICE:
+               ctx.use_splice = true;
+               break;
+       case ARG_CTL_VERIFY:
+               ctx.verify = true;
+               break;
        default:
                return ARGP_ERR_UNKNOWN;
        }
-- 
2.43.0


Reply via email to