On Thu, Apr 17, 2025 at 01:10:37AM +0530, Hari Bathini wrote:
> arch_bpf_trampoline_size() provides JIT size of the BPF trampoline
> before the buffer for JIT'ing it is allocated. The total number of
> instructions emitted for BPF trampoline JIT code depends on where
> the final image is located. So, the size arrived at with the dummy
> pass in arch_bpf_trampoline_size() can vary from the actual size
> needed in  arch_prepare_bpf_trampoline().  When the instructions
> accounted in  arch_bpf_trampoline_size() is less than the number of
> instructions emitted during the actual JIT compile of the trampoline,
> the below warning is produced:
> 
>   WARNING: CPU: 8 PID: 204190 at arch/powerpc/net/bpf_jit_comp.c:981 
> __arch_prepare_bpf_trampoline.isra.0+0xd2c/0xdcc
> 
> which is:
> 
>   /* Make sure the trampoline generation logic doesn't overflow */
>   if (image && WARN_ON_ONCE(&image[ctx->idx] >
>                       (u32 *)rw_image_end - BPF_INSN_SAFETY)) {
> 
> So, during the dummy pass, instead of providing some arbitrary image
> location, account for maximum possible instructions if and when there
> is a dependency with image location for JIT'ing.
> 
> Fixes: d243b62b7bd3 ("powerpc64/bpf: Add support for bpf trampolines")
> Reported-by: Venkat Rao Bagalkote <venka...@linux.ibm.com>
> Closes: 
> https://lore.kernel.org/all/6168bfc8-659f-4b5a-a6fb-90a916dde...@linux.ibm.com/
> Cc: sta...@vger.kernel.org # v6.13+
> Signed-off-by: Hari Bathini <hbath...@linux.ibm.com>
> ---
> 
> Changes since v1:
> - Pass NULL for image during intial pass and account for max. possible
>   instruction during this pass as Naveen suggested.
> 
> 
>  arch/powerpc/net/bpf_jit.h        | 20 ++++++++++++++++---
>  arch/powerpc/net/bpf_jit_comp.c   | 33 ++++++++++---------------------
>  arch/powerpc/net/bpf_jit_comp64.c |  9 +++++++++
>  3 files changed, 36 insertions(+), 26 deletions(-)
> 
> diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
> index 6beacaec63d3..4c26912c2e3c 100644
> --- a/arch/powerpc/net/bpf_jit.h
> +++ b/arch/powerpc/net/bpf_jit.h
> @@ -51,8 +51,16 @@
>               EMIT(PPC_INST_BRANCH_COND | (((cond) & 0x3ff) << 16) | (offset 
> & 0xfffc));                                      \
>       } while (0)
>  
> -/* Sign-extended 32-bit immediate load */
> +/*
> + * Sign-extended 32-bit immediate load
> + *
> + * If this is a dummy pass (!image), account for
> + * maximum possible instructions.
> + */
>  #define PPC_LI32(d, i)               do {                                    
>       \
> +     if (!image)                                                           \
> +             ctx->idx += 2;                                                \
> +     else {                                                                \
>               if ((int)(uintptr_t)(i) >= -32768 &&                          \
>                               (int)(uintptr_t)(i) < 32768)                  \
>                       EMIT(PPC_RAW_LI(d, i));                               \
> @@ -60,10 +68,15 @@
>                       EMIT(PPC_RAW_LIS(d, IMM_H(i)));                       \
>                       if (IMM_L(i))                                         \
>                               EMIT(PPC_RAW_ORI(d, d, IMM_L(i)));            \
> -             } } while(0)
> +             }                                                             \
> +     } } while (0)
>  
>  #ifdef CONFIG_PPC64
> +/* If dummy pass (!image), account for maximum possible instructions */
>  #define PPC_LI64(d, i)               do {                                    
>       \
> +     if (!image)                                                           \
> +             ctx->idx += 5;                                                \
> +     else {                                                                \
>               if ((long)(i) >= -2147483648 &&                               \
>                               (long)(i) < 2147483648)                       \
>                       PPC_LI32(d, i);                                       \
> @@ -84,7 +97,8 @@
>                       if ((uintptr_t)(i) & 0x000000000000ffffULL)           \
>                               EMIT(PPC_RAW_ORI(d, d, (uintptr_t)(i) &       \
>                                                       0xffff));             \
> -             } } while (0)
> +             }                                                             \
> +     } } while (0)

You should now also be able to remove the padding we add in 
bpf_jit_comp64.c for 'case BPF_LD | BPF_IMM | BPF_DW:'

>  #define PPC_LI_ADDR  PPC_LI64
>  
>  #ifndef CONFIG_PPC_KERNEL_PCREL
> diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
> index 2991bb171a9b..c0684733e9d6 100644
> --- a/arch/powerpc/net/bpf_jit_comp.c
> +++ b/arch/powerpc/net/bpf_jit_comp.c
> @@ -504,10 +504,11 @@ static int invoke_bpf_prog(u32 *image, u32 *ro_image, 
> struct codegen_context *ct
>       EMIT(PPC_RAW_ADDI(_R3, _R1, regs_off));
>       if (!p->jited)
>               PPC_LI_ADDR(_R4, (unsigned long)p->insnsi);
> -     if (!create_branch(&branch_insn, (u32 *)&ro_image[ctx->idx], (unsigned 
> long)p->bpf_func,
> -                        BRANCH_SET_LINK)) {
> -             if (image)
> -                     image[ctx->idx] = ppc_inst_val(branch_insn);
> +     /* Account for max possible instructions during dummy pass for size 
> calculation */
> +     if (image && !create_branch(&branch_insn, (u32 *)&ro_image[ctx->idx],
> +                                 (unsigned long)p->bpf_func,
> +                                 BRANCH_SET_LINK)) {
> +             image[ctx->idx] = ppc_inst_val(branch_insn);
>               ctx->idx++;
>       } else {
>               EMIT(PPC_RAW_LL(_R12, _R25, offsetof(struct bpf_prog, 
> bpf_func)));
> @@ -889,7 +890,8 @@ static int __arch_prepare_bpf_trampoline(struct 
> bpf_tramp_image *im, void *rw_im
>                       bpf_trampoline_restore_tail_call_cnt(image, ctx, 
> func_frame_offset, r4_off);
>  
>               /* Reserve space to patch branch instruction to skip fexit 
> progs */
> -             im->ip_after_call = &((u32 *)ro_image)[ctx->idx];
> +             if (ro_image) /* image is NULL for dummy pass */
> +                     im->ip_after_call = &((u32 *)ro_image)[ctx->idx];
>               EMIT(PPC_RAW_NOP());
>       }
>  
> @@ -912,7 +914,8 @@ static int __arch_prepare_bpf_trampoline(struct 
> bpf_tramp_image *im, void *rw_im
>               }
>  
>       if (flags & BPF_TRAMP_F_CALL_ORIG) {
> -             im->ip_epilogue = &((u32 *)ro_image)[ctx->idx];
> +             if (ro_image) /* image is NULL for dummy pass */
> +                     im->ip_epilogue = &((u32 *)ro_image)[ctx->idx];
>               PPC_LI_ADDR(_R3, im);
>               ret = bpf_jit_emit_func_call_rel(image, ro_image, ctx,
>                                                (unsigned 
> long)__bpf_tramp_exit);
> @@ -973,25 +976,9 @@ int arch_bpf_trampoline_size(const struct btf_func_model 
> *m, u32 flags,
>                            struct bpf_tramp_links *tlinks, void *func_addr)
>  {
>       struct bpf_tramp_image im;
> -     void *image;
>       int ret;
>  
> -     /*
> -      * Allocate a temporary buffer for __arch_prepare_bpf_trampoline().
> -      * This will NOT cause fragmentation in direct map, as we do not
> -      * call set_memory_*() on this buffer.
> -      *
> -      * We cannot use kvmalloc here, because we need image to be in
> -      * module memory range.
> -      */
> -     image = bpf_jit_alloc_exec(PAGE_SIZE);
> -     if (!image)
> -             return -ENOMEM;
> -
> -     ret = __arch_prepare_bpf_trampoline(&im, image, image + PAGE_SIZE, 
> image,
> -                                         m, flags, tlinks, func_addr);
> -     bpf_jit_free_exec(image);
> -
> +     ret = __arch_prepare_bpf_trampoline(&im, NULL, NULL, NULL, m, flags, 
> tlinks, func_addr);
>       return ret;
>  }
>  
> diff --git a/arch/powerpc/net/bpf_jit_comp64.c 
> b/arch/powerpc/net/bpf_jit_comp64.c
> index 233703b06d7c..91f9efe8b8d7 100644
> --- a/arch/powerpc/net/bpf_jit_comp64.c
> +++ b/arch/powerpc/net/bpf_jit_comp64.c
> @@ -225,6 +225,15 @@ int bpf_jit_emit_func_call_rel(u32 *image, u32 *fimage, 
> struct codegen_context *
>       }
>  
>  #ifdef CONFIG_PPC_KERNEL_PCREL
> +     /*
> +      * If fimage is NULL (the initial pass to find image size),
> +      * account for the maximum no. of instructions possible.
> +      */
> +     if (!fimage) {
> +             ctx->idx += 7;
> +             return 0;
> +     }
> +

I would merge this with the below if conditional so that this gets 
noticed if the instruction sequence below ever changes.

>       reladdr = func_addr - local_paca->kernelbase;
>  
>       if (reladdr < (long)SZ_8G && reladdr >= -(long)SZ_8G) {

Other than that:
Acked-by: Naveen N Rao (AMD) <nav...@kernel.org>


- Naveen


Reply via email to