It seems my patch broke the CI on debian/ubuntu, but works on Fedora/Centos. I was able to reproduce that with a container, and hope to provide a v3 this week.
Sorry for that! Christophe On Thu, Mar 19, 2026 at 12:45 PM Christophe Fontaine <[email protected]> wrote: > > arm64 jit compiler didn't support reading from a packet. > Enable arm64 JIT to generate native code for > (BPF_ABS | <size> | BPF_LD) and (BPF_IND | <size> | BPF_LD) > instructions. > > Both 'fast path' and 'slow path' are implemented, similar to > the x86_64 JIT, where we call '__rte_pktmbuf_read' if the > requested data is not in the first segment. > > Added unit test which focuses only on BPF_LD | BPF_ABS/BPF_IND and > a BPF_LD | BPF_ABS slow path call. > > Signed-off-by: Christophe Fontaine <[email protected]> > --- > app/test/test_bpf.c | 121 ++++++++++++++++++++++++++++++++++++++++ > lib/bpf/bpf_jit_arm64.c | 86 ++++++++++++++++++++++++++++ > 2 files changed, 207 insertions(+) > > diff --git a/app/test/test_bpf.c b/app/test/test_bpf.c > index dd24722450..799f9d30ca 100644 > --- a/app/test/test_bpf.c > +++ b/app/test/test_bpf.c > @@ -2750,6 +2750,79 @@ static const struct rte_bpf_xsym test_call5_xsym[] = { > }, > }; > > +/* load mbuf (BPF_ABS/BPF_IND) test-cases */ > +static const struct ebpf_insn test_ld_mbuf0_prog[] = { > + /* BPF_ABS/BPF_IND implicitly expect mbuf ptr in R6 */ > + { > + .code = (EBPF_ALU64 | EBPF_MOV | BPF_X), > + .dst_reg = EBPF_REG_6, > + .src_reg = EBPF_REG_1, > + }, > + /* load IPv4 version and IHL */ > + { > + .code = (BPF_LD | BPF_ABS | BPF_B), > + .imm = offsetof(struct rte_ipv4_hdr, version_ihl), > + }, > + { > + .code = (BPF_JMP | EBPF_EXIT), > + }, > +}; > + > +/* load mbuf (BPF_ABS/BPF_IND) test-cases */ > +static const struct ebpf_insn test_ld_slow_mbuf0_prog[] = { > + /* BPF_ABS/BPF_IND implicitly expect mbuf ptr in R6 */ > + { > + .code = (EBPF_ALU64 | EBPF_MOV | BPF_X), > + .dst_reg = EBPF_REG_6, > + .src_reg = EBPF_REG_1, > + }, > + /* load from chained mbuf */ > + { > + .code = (BPF_LD | BPF_ABS | BPF_B), > + /* 201: second mbuf, built by test_ld_mbuf1_prepare */ > + .imm = 201 + 0x42, > + }, > + { > + .code = (BPF_JMP | EBPF_EXIT), > + }, > +}; > + > +static const struct ebpf_insn test_ld_ind_mbuf0_prog[] = { > + /* BPF_ABS/BPF_IND implicitly expect mbuf ptr in R6 */ > + { > + .code = (EBPF_ALU64 | EBPF_MOV | BPF_X), > + .dst_reg = EBPF_REG_6, > + .src_reg = EBPF_REG_1, > + }, > + { > + /* Set return value to one. */ > + .code = (EBPF_ALU64 | EBPF_MOV | BPF_K), > + .dst_reg = EBPF_REG_0, > + .imm = 0, > + }, > + /* load IPv4 version and IHL */ > + { > + .code = (BPF_LD | BPF_IND | BPF_B), > + .src_reg = EBPF_REG_0, > + .imm = offsetof(struct rte_ipv4_hdr, version_ihl), > + }, > + { > + .code = (BPF_JMP | EBPF_EXIT), > + }, > +}; > + > +static int > +test_ld_mbuf0_check(uint64_t rc, const void *arg) > +{ > + return cmp_res(__func__, 0x45, rc, arg, arg, 0); > +} > + > +static int > +test_ld_slow_mbuf0_check(uint64_t rc, const void *arg) > +{ > + return cmp_res(__func__, 0x42, rc, arg, arg, 0); > +} > + > /* load mbuf (BPF_ABS/BPF_IND) test-cases */ > static const struct ebpf_insn test_ld_mbuf1_prog[] = { > > @@ -3417,6 +3490,54 @@ static const struct bpf_test tests[] = { > /* for now don't support function calls on 32 bit platform */ > .allow_fail = (sizeof(uint64_t) != sizeof(uintptr_t)), > }, > + { > + .name = "test_ld_abs_mbuf0", > + .arg_sz = sizeof(struct dummy_mbuf), > + .prm = { > + .ins = test_ld_mbuf0_prog, > + .nb_ins = RTE_DIM(test_ld_mbuf0_prog), > + .prog_arg = { > + .type = RTE_BPF_ARG_PTR_MBUF, > + .buf_size = sizeof(struct dummy_mbuf), > + }, > + }, > + .prepare = test_ld_mbuf1_prepare, > + .check_result = test_ld_mbuf0_check, > + /* mbuf as input argument is not supported on 32 bit platform > */ > + .allow_fail = (sizeof(uint64_t) != sizeof(uintptr_t)), > + }, > + { > + .name = "test_ld_slow_mbuf0", > + .arg_sz = sizeof(struct dummy_mbuf), > + .prm = { > + .ins = test_ld_slow_mbuf0_prog, > + .nb_ins = RTE_DIM(test_ld_slow_mbuf0_prog), > + .prog_arg = { > + .type = RTE_BPF_ARG_PTR_MBUF, > + .buf_size = sizeof(struct dummy_mbuf), > + }, > + }, > + .prepare = test_ld_mbuf1_prepare, > + .check_result = test_ld_slow_mbuf0_check, > + /* mbuf as input argument is not supported on 32 bit platform > */ > + .allow_fail = (sizeof(uint64_t) != sizeof(uintptr_t)), > + }, > + { > + .name = "test_ld_ind_mbuf0", > + .arg_sz = sizeof(struct dummy_mbuf), > + .prm = { > + .ins = test_ld_ind_mbuf0_prog, > + .nb_ins = RTE_DIM(test_ld_ind_mbuf0_prog), > + .prog_arg = { > + .type = RTE_BPF_ARG_PTR_MBUF, > + .buf_size = sizeof(struct dummy_mbuf), > + }, > + }, > + .prepare = test_ld_mbuf1_prepare, > + .check_result = test_ld_mbuf0_check, > + /* mbuf as input argument is not supported on 32 bit platform > */ > + .allow_fail = (sizeof(uint64_t) != sizeof(uintptr_t)), > + }, > { > .name = "test_ld_mbuf1", > .arg_sz = sizeof(struct dummy_mbuf), > diff --git a/lib/bpf/bpf_jit_arm64.c b/lib/bpf/bpf_jit_arm64.c > index 099822e9f1..5b3b80cb86 100644 > --- a/lib/bpf/bpf_jit_arm64.c > +++ b/lib/bpf/bpf_jit_arm64.c > @@ -8,6 +8,7 @@ > > #include <rte_common.h> > #include <rte_byteorder.h> > +#include <rte_mbuf.h> > > #include "bpf_impl.h" > > @@ -1123,6 +1124,75 @@ emit_branch(struct a64_jit_ctx *ctx, uint8_t op, > uint32_t i, int16_t off) > emit_b_cond(ctx, ebpf_to_a64_cond(op), jump_offset_get(ctx, i, off)); > } > > +/* > + * Emit code for BPF_LD | BPF_ABS/IND: load from packet. > + * Implements both a fast path, which computes the offset and read directly > + * and a slow path, which calls __rte_pktmbuf_read(mbuf, off, len, buf) > + * when the data is not in the first segment. > + */ > +static void > +emit_ld_mbuf(struct a64_jit_ctx *ctx, uint32_t op, uint8_t tmp1, uint8_t > tmp2, > + uint8_t src, int32_t imm) > +{ > + uint8_t r0 = ebpf_to_a64_reg(ctx, EBPF_REG_0); > + uint8_t r6 = ebpf_to_a64_reg(ctx, EBPF_REG_6); > + uint32_t mode = BPF_MODE(op); > + uint32_t opsz = BPF_SIZE(op); > + uint32_t sz = bpf_size(opsz); > + > + /* r0 = mbuf (R6) */ > + emit_mov_64(ctx, A64_R(0), r6); > + > + /* r1 = off: for ABS use imm, for IND use src + imm */ > + if (mode == BPF_ABS) { > + emit_mov_imm(ctx, 0, A64_R(1), imm); > + } else { > + emit_mov_imm(ctx, 0, tmp2, imm); > + emit_add(ctx, 0, tmp2, src); > + emit_mov_64(ctx, A64_R(1), tmp2); > + } > + > + /* r2 = len, 1/2/4 bytes */ > + emit_mov_imm32(ctx, 0, A64_R(2), sz); > + /* r3 = buf (SP) */ > + emit_mov_64(ctx, A64_R(3), A64_SP); > + > + /* tmp1 = mbuf->data_len */ > + emit_mov_imm(ctx, 1, tmp1, offsetof(struct rte_mbuf, data_len)); > + emit_ldr(ctx, BPF_W, tmp1, r6, tmp1); > + > + /* tmp2 = off + sz */ > + emit_add_imm_64(ctx, tmp2, A64_R(1), sz); > + /* if off+sz > data_len, jump to slow path */ > + emit_cmp(ctx, 1, tmp2, tmp1); > + emit_b_cond(ctx, A64_HI, 8); > + > + /* Fast path, read directly, pointer to the data will be in A64_R(0) > */ > + /* A64_R(0) = mbuf->buf_addr */ > + emit_mov_imm(ctx, 1, tmp1, offsetof(struct rte_mbuf, buf_addr)); > + emit_ldr(ctx, EBPF_DW, A64_R(0), r6, tmp1); > + /* tmp2 = * mbuf->data_off */ > + emit_mov_imm(ctx, 1, tmp2, offsetof(struct rte_mbuf, data_off)); > + emit_ldr(ctx, BPF_H, tmp2, r6, tmp2); > + > + /* A64_R(0) += data_off + off */ > + emit_add(ctx, 1, A64_R(0), tmp2); > + emit_add(ctx, 1, A64_R(0), A64_R(1)); > + > + /* End of Fast Path, skip slow path */ > + emit_b(ctx, 4); > + > + /* slow path, call __rte_pktmbuf_read */ > + emit_call(ctx, tmp1, __rte_pktmbuf_read); > + /* check return value of __rte_pktmbuf_read */ > + emit_return_zero_if_src_zero(ctx, 1, A64_R(0)); > + > + /* A64_R(0) points to the data, load 1/2/4 bytes into r0*/ > + emit_ldr(ctx, opsz, r0, A64_R(0), A64_ZR); > + if (sz != sizeof(uint8_t)) > + emit_be(ctx, r0, sz * CHAR_BIT); > +} > + > static void > check_program_has_call(struct a64_jit_ctx *ctx, struct rte_bpf *bpf) > { > @@ -1137,6 +1207,13 @@ check_program_has_call(struct a64_jit_ctx *ctx, struct > rte_bpf *bpf) > switch (op) { > /* Call imm */ > case (BPF_JMP | EBPF_CALL): > + /* BPF_LD | BPF_ABS/IND use __rte_pktmbuf_read */ > + case (BPF_LD | BPF_ABS | BPF_B): > + case (BPF_LD | BPF_ABS | BPF_H): > + case (BPF_LD | BPF_ABS | BPF_W): > + case (BPF_LD | BPF_IND | BPF_B): > + case (BPF_LD | BPF_IND | BPF_H): > + case (BPF_LD | BPF_IND | BPF_W): > ctx->foundcall = 1; > return; > } > @@ -1338,6 +1415,15 @@ emit(struct a64_jit_ctx *ctx, struct rte_bpf *bpf) > emit_mov_imm(ctx, 1, dst, u64); > i++; > break; > + /* load absolute/indirect from packet */ > + case (BPF_LD | BPF_ABS | BPF_B): > + case (BPF_LD | BPF_ABS | BPF_H): > + case (BPF_LD | BPF_ABS | BPF_W): > + case (BPF_LD | BPF_IND | BPF_B): > + case (BPF_LD | BPF_IND | BPF_H): > + case (BPF_LD | BPF_IND | BPF_W): > + emit_ld_mbuf(ctx, op, tmp1, tmp2, src, imm); > + break; > /* *(size *)(dst + off) = src */ > case (BPF_STX | BPF_MEM | BPF_B): > case (BPF_STX | BPF_MEM | BPF_H): > -- > 2.53.0 >

