Before this patch, the test #359 ADD default X is:
0: nop
4: nop
8: std r27,-40(r1)
c: std r28,-32(r1)
10: xor r8,r8,r8
14: rotlwi r8,r8,0
18: xor r28,r28,r28
1c: rotlwi r28,r28,0
20: mr r27,r3
24: li r8,66
28: add r8,r8,r28
2c: rotlwi r8,r8,0
30: ld r27,-40(r1)
34: ld r28,-32(r1)
38: mr r3,r8
3c: blr
After this patch, the same test has become:
0: nop
4: nop
8: xor r8,r8,r8
c: rotlwi r8,r8,0
10: xor r5,r5,r5
14: rotlwi r5,r5,0
18: mr r4,r3
1c: li r8,66
20: add r8,r8,r5
24: rotlwi r8,r8,0
28: mr r3,r8
2c: blr
Signed-off-by: Jordan Niethe <jniet...@gmail.com>
---
arch/powerpc/net/bpf_jit64.h | 2 ++
arch/powerpc/net/bpf_jit_comp64.c | 60 +++++++++++++++++++++++++------
2 files changed, 52 insertions(+), 10 deletions(-)
diff --git a/arch/powerpc/net/bpf_jit64.h b/arch/powerpc/net/bpf_jit64.h
index 89b625d9342b..e20521bf77bf 100644
--- a/arch/powerpc/net/bpf_jit64.h
+++ b/arch/powerpc/net/bpf_jit64.h
@@ -70,6 +70,7 @@ const int b2p[MAX_BPF_JIT_REG + 2] = {
*/
#define PPC_BPF_LL(ctx, r, base, i) do {
\
if ((i) % 4) {
\
+ bpf_set_seen_register(ctx,
bpf_to_ppc(ctx, TMP_REG_2));\
EMIT(PPC_RAW_LI(bpf_to_ppc(ctx,
TMP_REG_2), (i)));\
EMIT(PPC_RAW_LDX(r, base,
\
bpf_to_ppc(ctx,
TMP_REG_2))); \
@@ -78,6 +79,7 @@ const int b2p[MAX_BPF_JIT_REG + 2] = {
} while(0)
#define PPC_BPF_STL(ctx, r, base, i) do {
\
if ((i) % 4) {
\
+ bpf_set_seen_register(ctx,
bpf_to_ppc(ctx, TMP_REG_2));\
EMIT(PPC_RAW_LI(bpf_to_ppc(ctx,
TMP_REG_2), (i)));\
EMIT(PPC_RAW_STDX(r, base,
\
bpf_to_ppc(ctx,
TMP_REG_2))); \
diff --git a/arch/powerpc/net/bpf_jit_comp64.c
b/arch/powerpc/net/bpf_jit_comp64.c
index f7a668c1e364..287e0322bbf3 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -66,6 +66,24 @@ static int bpf_jit_stack_offsetof(struct codegen_context
*ctx, int reg)
void bpf_jit_realloc_regs(struct codegen_context *ctx)
{
+ if (ctx->seen & SEEN_FUNC)
+ return;
+
+ while (ctx->seen & SEEN_NVREG_MASK &&
+ (ctx->seen & SEEN_VREG_MASK) != SEEN_VREG_MASK) {
+ int old = 32 - fls(ctx->seen & SEEN_NVREG_MASK);
+ int new = 32 - fls(~ctx->seen & SEEN_VREG_MASK);
+ int i;
+
+ for (i = BPF_REG_0; i <= TMP_REG_2; i++) {
+ if (ctx->b2p[i] != old)
+ continue;
+ ctx->b2p[i] = new;
+ bpf_set_seen_register(ctx, new);
+ bpf_clear_seen_register(ctx, old);
+ break;
+ }
+ }
}
void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
@@ -106,10 +124,9 @@ void bpf_jit_build_prologue(u32 *image, struct
codegen_context *ctx)
* If we haven't created our own stack frame, we save these
* in the protected zone below the previous stack frame
*/
- for (i = BPF_REG_6; i <= BPF_REG_10; i++)
- if (bpf_is_seen_register(ctx, bpf_to_ppc(ctx, i)))
- PPC_BPF_STL(ctx, bpf_to_ppc(ctx, i), 1,
- bpf_jit_stack_offsetof(ctx, bpf_to_ppc(ctx,
i)));
+ for (i = BPF_PPC_NVR_MIN; i <= 31; i++)
+ if (bpf_is_seen_register(ctx, i))
+ PPC_BPF_STL(ctx, i, 1, bpf_jit_stack_offsetof(ctx, i));
/* Setup frame pointer to point to the bpf stack area */
if (bpf_is_seen_register(ctx, bpf_to_ppc(ctx, BPF_REG_FP)))
@@ -122,10 +139,9 @@ static void bpf_jit_emit_common_epilogue(u32 *image,
struct codegen_context *ctx
int i;
/* Restore NVRs */
- for (i = BPF_REG_6; i <= BPF_REG_10; i++)
- if (bpf_is_seen_register(ctx, bpf_to_ppc(ctx, i)))
- PPC_BPF_LL(ctx, bpf_to_ppc(ctx, i), 1,
- bpf_jit_stack_offsetof(ctx, bpf_to_ppc(ctx,
i)));
+ for (i = BPF_PPC_NVR_MIN; i <= 31; i++)
+ if (bpf_is_seen_register(ctx, i))
+ PPC_BPF_LL(ctx, i, 1, bpf_jit_stack_offsetof(ctx, i));
/* Tear down our stack frame */
if (bpf_has_stack_frame(ctx)) {
@@ -223,6 +239,7 @@ static void bpf_jit_emit_tail_call(u32 *image, struct
codegen_context *ctx, u32
* if (index >= array->map.max_entries)
* goto out;
*/
+ bpf_set_seen_register(ctx, bpf_to_ppc(ctx, TMP_REG_1));
EMIT(PPC_RAW_LWZ(bpf_to_ppc(ctx, TMP_REG_1), b2p_bpf_array,
offsetof(struct bpf_array, map.max_entries)));
EMIT(PPC_RAW_RLWINM(b2p_index, b2p_index, 0, 0, 31));
@@ -318,9 +335,9 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
struct codegen_context *
* optimization but everything else should work without
* any issues.
*/
- if (dst_reg >= BPF_PPC_NVR_MIN && dst_reg < 32)
+ if (dst_reg >= 3 && dst_reg < 32)
bpf_set_seen_register(ctx, dst_reg);
- if (src_reg >= BPF_PPC_NVR_MIN && src_reg < 32)
+ if (src_reg >= 3 && src_reg < 32)
bpf_set_seen_register(ctx, src_reg);
switch (code) {
@@ -345,6 +362,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
struct codegen_context *
if (imm >= -32768 && imm < 32768)
EMIT(PPC_RAW_ADDI(dst_reg, dst_reg,
IMM_L(imm)));
else {
+ bpf_set_seen_register(ctx, tmp1_reg);
PPC_LI32(tmp1_reg, imm);
EMIT(PPC_RAW_ADD(dst_reg, dst_reg,
tmp1_reg));
}
@@ -362,6 +380,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
struct codegen_context *
if (imm >= -32768 && imm < 32768)
EMIT(PPC_RAW_MULI(dst_reg, dst_reg,
IMM_L(imm)));
else {
+ bpf_set_seen_register(ctx, tmp1_reg);
PPC_LI32(tmp1_reg, imm);
if (BPF_CLASS(code) == BPF_ALU)
EMIT(PPC_RAW_MULW(dst_reg, dst_reg,
tmp1_reg));
@@ -372,6 +391,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
struct codegen_context *
case BPF_ALU | BPF_DIV | BPF_X: /* (u32) dst /= (u32) src */
case BPF_ALU | BPF_MOD | BPF_X: /* (u32) dst %= (u32) src */
if (BPF_OP(code) == BPF_MOD) {
+ bpf_set_seen_register(ctx, tmp1_reg);
EMIT(PPC_RAW_DIVWU(tmp1_reg, dst_reg, src_reg));
EMIT(PPC_RAW_MULW(tmp1_reg, src_reg, tmp1_reg));
EMIT(PPC_RAW_SUB(dst_reg, dst_reg, tmp1_reg));
@@ -381,6 +401,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
struct codegen_context *
case BPF_ALU64 | BPF_DIV | BPF_X: /* dst /= src */
case BPF_ALU64 | BPF_MOD | BPF_X: /* dst %= src */
if (BPF_OP(code) == BPF_MOD) {
+ bpf_set_seen_register(ctx, tmp1_reg);
EMIT(PPC_RAW_DIVDU(tmp1_reg, dst_reg, src_reg));
EMIT(PPC_RAW_MULD(tmp1_reg, src_reg, tmp1_reg));
EMIT(PPC_RAW_SUB(dst_reg, dst_reg, tmp1_reg));
@@ -396,10 +417,12 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
struct codegen_context *
else if (imm == 1)
goto bpf_alu32_trunc;
+ bpf_set_seen_register(ctx, tmp1_reg);
PPC_LI32(tmp1_reg, imm);
switch (BPF_CLASS(code)) {
case BPF_ALU:
if (BPF_OP(code) == BPF_MOD) {
+ bpf_set_seen_register(ctx, tmp2_reg);
EMIT(PPC_RAW_DIVWU(tmp2_reg, dst_reg,
tmp1_reg));
EMIT(PPC_RAW_MULW(tmp1_reg, tmp1_reg,
tmp2_reg));
EMIT(PPC_RAW_SUB(dst_reg, dst_reg,
tmp1_reg));
@@ -408,6 +431,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
struct codegen_context *
break;
case BPF_ALU64:
if (BPF_OP(code) == BPF_MOD) {
+ bpf_set_seen_register(ctx, tmp2_reg);
EMIT(PPC_RAW_DIVDU(tmp2_reg, dst_reg,
tmp1_reg));
EMIT(PPC_RAW_MULD(tmp1_reg, tmp1_reg,
tmp2_reg));
EMIT(PPC_RAW_SUB(dst_reg, dst_reg,
tmp1_reg));
@@ -434,6 +458,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
struct codegen_context *
EMIT(PPC_RAW_ANDI(dst_reg, dst_reg,
IMM_L(imm)));
else {
/* Sign-extended */
+ bpf_set_seen_register(ctx, tmp1_reg);
PPC_LI32(tmp1_reg, imm);
EMIT(PPC_RAW_AND(dst_reg, dst_reg, tmp1_reg));
}
@@ -446,6 +471,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
struct codegen_context *
case BPF_ALU64 | BPF_OR | BPF_K:/* dst = dst | imm */
if (imm < 0 && BPF_CLASS(code) == BPF_ALU64) {
/* Sign-extended */
+ bpf_set_seen_register(ctx, tmp1_reg);
PPC_LI32(tmp1_reg, imm);
EMIT(PPC_RAW_OR(dst_reg, dst_reg, tmp1_reg));
} else {
@@ -463,6 +489,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
struct codegen_context *
case BPF_ALU64 | BPF_XOR | BPF_K: /* dst ^= imm */
if (imm < 0 && BPF_CLASS(code) == BPF_ALU64) {
/* Sign-extended */
+ bpf_set_seen_register(ctx, tmp1_reg);
PPC_LI32(tmp1_reg, imm);
EMIT(PPC_RAW_XOR(dst_reg, dst_reg, tmp1_reg));
} else {
@@ -562,6 +589,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
struct codegen_context *
if (BPF_SRC(code) == BPF_FROM_LE)
goto emit_clear;
#endif
+ bpf_set_seen_register(ctx, tmp1_reg);
switch (imm) {
case 16:
/* Rotate 8 bits left & mask with 0x0000ff00 */
@@ -625,6 +653,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
struct codegen_context *
case BPF_STX | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = src */
case BPF_ST | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = imm */
if (BPF_CLASS(code) == BPF_ST) {
+ bpf_set_seen_register(ctx, tmp1_reg);
EMIT(PPC_RAW_LI(tmp1_reg, imm));
src_reg = tmp1_reg;
}
@@ -633,6 +662,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
struct codegen_context *
case BPF_STX | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = src */
case BPF_ST | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = imm */
if (BPF_CLASS(code) == BPF_ST) {
+ bpf_set_seen_register(ctx, tmp1_reg);
EMIT(PPC_RAW_LI(tmp1_reg, imm));
src_reg = tmp1_reg;
}
@@ -641,6 +671,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
struct codegen_context *
case BPF_STX | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = src */
case BPF_ST | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = imm */
if (BPF_CLASS(code) == BPF_ST) {
+ bpf_set_seen_register(ctx, tmp1_reg);
PPC_LI32(tmp1_reg, imm);
src_reg = tmp1_reg;
}
@@ -649,6 +680,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
struct codegen_context *
case BPF_STX | BPF_MEM | BPF_DW: /* (u64 *)(dst + off) = src */
case BPF_ST | BPF_MEM | BPF_DW: /* *(u64 *)(dst + off) = imm */
if (BPF_CLASS(code) == BPF_ST) {
+ bpf_set_seen_register(ctx, tmp1_reg);
PPC_LI32(tmp1_reg, imm);
src_reg = tmp1_reg;
}
@@ -669,6 +701,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
struct codegen_context *
/* *(u32 *)(dst + off) += src */
/* Get EA into TMP_REG_1 */
+ bpf_set_seen_register(ctx, tmp1_reg);
+ bpf_set_seen_register(ctx, tmp2_reg);
EMIT(PPC_RAW_ADDI(tmp1_reg, dst_reg, off));
tmp_idx = ctx->idx * 4;
/* load value from memory into TMP_REG_2 */
@@ -689,6 +723,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
struct codegen_context *
}
/* *(u64 *)(dst + off) += src */
+ bpf_set_seen_register(ctx, tmp1_reg);
+ bpf_set_seen_register(ctx, tmp2_reg);
EMIT(PPC_RAW_ADDI(tmp1_reg, dst_reg, off));
tmp_idx = ctx->idx * 4;
EMIT(PPC_RAW_LDARX(tmp2_reg, 0, tmp1_reg, 0));
@@ -870,6 +906,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
struct codegen_context *
break;
case BPF_JMP | BPF_JSET | BPF_X:
case BPF_JMP32 | BPF_JSET | BPF_X:
+ bpf_set_seen_register(ctx, tmp1_reg);
if (BPF_CLASS(code) == BPF_JMP) {
EMIT(PPC_RAW_AND_DOT(tmp1_reg, dst_reg,
src_reg));
} else {
@@ -903,6 +940,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
struct codegen_context *
EMIT(PPC_RAW_CMPLDI(dst_reg,
imm));
} else {
/* sign-extending load */
+ bpf_set_seen_register(ctx, tmp1_reg);
PPC_LI32(tmp1_reg, imm);
/* ... but unsigned comparison */
if (is_jmp32)
@@ -933,6 +971,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
struct codegen_context *
else
EMIT(PPC_RAW_CMPDI(dst_reg,
imm));
} else {
+ bpf_set_seen_register(ctx, tmp1_reg);
PPC_LI32(tmp1_reg, imm);
if (is_jmp32)
EMIT(PPC_RAW_CMPW(dst_reg,
tmp1_reg));
@@ -944,6 +983,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
struct codegen_context *
case BPF_JMP | BPF_JSET | BPF_K:
case BPF_JMP32 | BPF_JSET | BPF_K:
/* andi does not sign-extend the immediate */
+ bpf_set_seen_register(ctx, tmp1_reg);
if (imm >= 0 && imm < 32768)
/* PPC_ANDI is _only/always_ dot-form */
EMIT(PPC_RAW_ANDI(tmp1_reg, dst_reg,
imm));