From: Jiong Wang <jiong.w...@netronome.com>

As we are doing JIT, we would want to use the advanced version of the
reciprocal divide (reciprocal_value_adv) to trade performance with host.

We could reduce the required ALU instructions from 4 to 2 or 1.

Signed-off-by: Jiong Wang <jiong.w...@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicin...@netronome.com>
---
 drivers/net/ethernet/netronome/nfp/bpf/jit.c | 64 ++++++++++++++++----
 1 file changed, 53 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c 
b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
index 7c9ee3d85907..1d9e36835404 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
@@ -1497,8 +1497,8 @@ wrp_mul(struct nfp_prog *nfp_prog, struct nfp_insn_meta 
*meta,
 static int wrp_div_imm(struct nfp_prog *nfp_prog, u8 dst, u64 imm)
 {
        swreg dst_both = reg_both(dst), dst_a = reg_a(dst), dst_b = reg_a(dst);
-       struct reciprocal_value rvalue;
-       swreg tmp_b = imm_b(nfp_prog);
+       struct reciprocal_value_adv rvalue;
+       u8 pre_shift, exp;
        swreg magic;
 
        if (imm > U32_MAX) {
@@ -1506,16 +1506,58 @@ static int wrp_div_imm(struct nfp_prog *nfp_prog, u8 
dst, u64 imm)
                return 0;
        }
 
-       rvalue = reciprocal_value(imm);
+       /* NOTE: because we are using "reciprocal_value_adv" which doesn't
+        * support "divisor > (1u << 31)", we need to JIT separate NFP sequence
+        * to handle such case which actually equals to the result of unsigned
+        * comparison "dst >= imm" which could be calculated using the following
+        * NFP sequence:
+        *
+        *  alu[--, dst, -, imm]
+        *  immed[imm, 0]
+        *  alu[dst, imm, +carry, 0]
+        *
+        */
+       if (imm > 1U << 31) {
+               swreg tmp_b = ur_load_imm_any(nfp_prog, imm, imm_b(nfp_prog));
+
+               emit_alu(nfp_prog, reg_none(), dst_a, ALU_OP_SUB, tmp_b);
+               wrp_immed(nfp_prog, imm_a(nfp_prog), 0);
+               emit_alu(nfp_prog, dst_both, imm_a(nfp_prog), ALU_OP_ADD_C,
+                        reg_imm(0));
+               return 0;
+       }
+
+       rvalue = reciprocal_value_adv(imm, 32);
+       exp = rvalue.exp;
+       if (rvalue.is_wide_m && !(imm & 1)) {
+               pre_shift = fls(imm & -imm) - 1;
+               rvalue = reciprocal_value_adv(imm >> pre_shift, 32 - pre_shift);
+       } else {
+               pre_shift = 0;
+       }
        magic = ur_load_imm_any(nfp_prog, rvalue.m, imm_b(nfp_prog));
-       wrp_mul_u32(nfp_prog, imm_both(nfp_prog), reg_none(), dst_a, magic,
-                   true);
-       emit_alu(nfp_prog, dst_both, dst_a, ALU_OP_SUB, tmp_b);
-       emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, dst_b,
-                SHF_SC_R_SHF, rvalue.sh1);
-       emit_alu(nfp_prog, dst_both, dst_a, ALU_OP_ADD, tmp_b);
-       emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, dst_b,
-                SHF_SC_R_SHF, rvalue.sh2);
+       if (imm == 1U << exp) {
+               emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, dst_b,
+                        SHF_SC_R_SHF, exp);
+       } else if (rvalue.is_wide_m) {
+               wrp_mul_u32(nfp_prog, imm_both(nfp_prog), reg_none(), dst_a,
+                           magic, true);
+               emit_alu(nfp_prog, dst_both, dst_a, ALU_OP_SUB,
+                        imm_b(nfp_prog));
+               emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, dst_b,
+                        SHF_SC_R_SHF, 1);
+               emit_alu(nfp_prog, dst_both, dst_a, ALU_OP_ADD,
+                        imm_b(nfp_prog));
+               emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, dst_b,
+                        SHF_SC_R_SHF, rvalue.sh - 1);
+       } else {
+               if (pre_shift)
+                       emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE,
+                                dst_b, SHF_SC_R_SHF, pre_shift);
+               wrp_mul_u32(nfp_prog, dst_both, reg_none(), dst_a, magic, true);
+               emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE,
+                        dst_b, SHF_SC_R_SHF, rvalue.sh);
+       }
 
        return 0;
 }
-- 
2.17.1

Reply via email to