ldimm64 is not only used for loading function addresses, and
the NOPs added for padding are impacting performance, so avoid
them when not necessary.
On QEMU mac99, with the patch:

test_bpf: #829 ALU64_MOV_K: all immediate value magnitudes jited:1 167436810 
PASS
test_bpf: #831 ALU64_OR_K: all immediate value magnitudes jited:1 170702940 PASS

Without the patch:

test_bpf: #829 ALU64_MOV_K: all immediate value magnitudes jited:1 173012360 
PASS
test_bpf: #831 ALU64_OR_K: all immediate value magnitudes jited:1 176424090 PASS

That's a 3.5% performance improvement.

Fixes: f9320c49993c ("powerpc/bpf: Update ldimm64 instructions during extra 
pass")
Signed-off-by: Christophe Leroy <christophe.le...@csgroup.eu>
---
 arch/powerpc/net/bpf_jit_comp.c   | 3 ++-
 arch/powerpc/net/bpf_jit_comp32.c | 5 +++--
 arch/powerpc/net/bpf_jit_comp64.c | 5 +++--
 3 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index 43e634126514..206b698723a3 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -68,7 +68,8 @@ static int bpf_jit_fixup_addresses(struct bpf_prog *fp, u32 
*image,
                         * of the JITed sequence remains unchanged.
                         */
                        ctx->idx = tmp_idx;
-               } else if (insn[i].code == (BPF_LD | BPF_IMM | BPF_DW)) {
+               } else if (insn[i].code == (BPF_LD | BPF_IMM | BPF_DW) &&
+                          insn[i].src_reg == BPF_PSEUDO_FUNC) {
                        tmp_idx = ctx->idx;
                        ctx->idx = addrs[i] / 4;
 #ifdef CONFIG_PPC32
diff --git a/arch/powerpc/net/bpf_jit_comp32.c 
b/arch/powerpc/net/bpf_jit_comp32.c
index a379b0ce19ff..878f8a88d83e 100644
--- a/arch/powerpc/net/bpf_jit_comp32.c
+++ b/arch/powerpc/net/bpf_jit_comp32.c
@@ -960,8 +960,9 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, 
struct codegen_context *
                        PPC_LI32(dst_reg_h, (u32)insn[i + 1].imm);
                        PPC_LI32(dst_reg, (u32)insn[i].imm);
                        /* padding to allow full 4 instructions for later 
patching */
-                       for (j = ctx->idx - tmp_idx; j < 4; j++)
-                               EMIT(PPC_RAW_NOP());
+                       if (insn[i].src_reg == BPF_PSEUDO_FUNC)
+                               for (j = ctx->idx - tmp_idx; j < 4; j++)
+                                       EMIT(PPC_RAW_NOP());
                        /* Adjust for two bpf instructions */
                        addrs[++i] = ctx->idx * 4;
                        break;
diff --git a/arch/powerpc/net/bpf_jit_comp64.c 
b/arch/powerpc/net/bpf_jit_comp64.c
index 29ee306d6302..af8bdb5553cd 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -938,8 +938,9 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, 
struct codegen_context *
                        tmp_idx = ctx->idx;
                        PPC_LI64(dst_reg, imm64);
                        /* padding to allow full 5 instructions for later 
patching */
-                       for (j = ctx->idx - tmp_idx; j < 5; j++)
-                               EMIT(PPC_RAW_NOP());
+                       if (insn[i].src_reg == BPF_PSEUDO_FUNC)
+                               for (j = ctx->idx - tmp_idx; j < 5; j++)
+                                       EMIT(PPC_RAW_NOP());
                        /* Adjust for two bpf instructions */
                        addrs[++i] = ctx->idx * 4;
                        break;
-- 
2.38.1

Reply via email to