On Sun, Jun 24, 2018 at 8:54 PM, Jakub Kicinski <jakub.kicin...@netronome.com> wrote: > From: Jiong Wang <jiong.w...@netronome.com> > > NFP supports u16 and u32 multiplication. Multiplication is done 8-bits per > step, therefore we need 2 steps for u16 and 4 steps for u32. > > We also need one start instruction to initialize the sequence and one or > two instructions to fetch the result depending on either you need the high > halve of u32 multiplication. > > For ALU64, if either operand is beyond u32's value range, we reject it. One > thing to note, if the source operand is BPF_K, then we need to check "imm" > field directly, and we'd reject it if it is negative. Because for ALU64, > "imm" (with s32 type) is expected to be sign extended to s64 which NFP mul > doesn't support. For ALU32, it is fine for "imm" be negative though, > because the result is 32-bits and here is no difference on the low halve > of result for signed/unsigned mul, so we will get correct result. > > Signed-off-by: Jiong Wang <jiong.w...@netronome.com> > Reviewed-by: Jakub Kicinski <jakub.kicin...@netronome.com>
Acked-by: Song Liu <songliubrav...@fb.com> > --- > drivers/net/ethernet/netronome/nfp/bpf/jit.c | 137 ++++++++++++++++++ > drivers/net/ethernet/netronome/nfp/bpf/main.h | 5 + > .../net/ethernet/netronome/nfp/bpf/verifier.c | 58 ++++++-- > drivers/net/ethernet/netronome/nfp/nfp_asm.h | 28 ++++ > 4 files changed, 217 insertions(+), 11 deletions(-) > > diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c > b/drivers/net/ethernet/netronome/nfp/bpf/jit.c > index 4a629e9b5c0f..7d7061d93358 100644 > --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c > +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c > @@ -415,6 +415,60 @@ emit_alu(struct nfp_prog *nfp_prog, swreg dst, > reg.dst_lmextn, reg.src_lmextn); > } > > +static void > +__emit_mul(struct nfp_prog *nfp_prog, enum alu_dst_ab dst_ab, u16 areg, > + enum mul_type type, enum mul_step step, u16 breg, bool swap, > + bool wr_both, bool dst_lmextn, bool src_lmextn) > +{ > + u64 insn; > + > + insn = OP_MUL_BASE | > + FIELD_PREP(OP_MUL_A_SRC, areg) | > + FIELD_PREP(OP_MUL_B_SRC, breg) | > + FIELD_PREP(OP_MUL_STEP, step) | > + FIELD_PREP(OP_MUL_DST_AB, dst_ab) | > + FIELD_PREP(OP_MUL_SW, swap) | > + FIELD_PREP(OP_MUL_TYPE, type) | > + FIELD_PREP(OP_MUL_WR_AB, wr_both) | > + FIELD_PREP(OP_MUL_SRC_LMEXTN, src_lmextn) | > + FIELD_PREP(OP_MUL_DST_LMEXTN, dst_lmextn); > + > + nfp_prog_push(nfp_prog, insn); > +} > + > +static void > +emit_mul(struct nfp_prog *nfp_prog, swreg lreg, enum mul_type type, > + enum mul_step step, swreg rreg) > +{ > + struct nfp_insn_ur_regs reg; > + u16 areg; > + int err; > + > + if (type == MUL_TYPE_START && step != MUL_STEP_NONE) { > + nfp_prog->error = -EINVAL; > + return; > + } > + > + if (step == MUL_LAST || step == MUL_LAST_2) { > + /* When type is step and step Number is LAST or LAST2, left > + * source is used as destination. > + */ > + err = swreg_to_unrestricted(lreg, reg_none(), rreg, ®); > + areg = reg.dst; > + } else { > + err = swreg_to_unrestricted(reg_none(), lreg, rreg, ®); > + areg = reg.areg; > + } > + > + if (err) { > + nfp_prog->error = err; > + return; > + } > + > + __emit_mul(nfp_prog, reg.dst_ab, areg, type, step, reg.breg, reg.swap, > + reg.wr_both, reg.dst_lmextn, reg.src_lmextn); > +} > + > static void > __emit_ld_field(struct nfp_prog *nfp_prog, enum shf_sc sc, > u8 areg, u8 bmask, u8 breg, u8 shift, bool imm8, > @@ -1380,6 +1434,65 @@ static void wrp_end32(struct nfp_prog *nfp_prog, swreg > reg_in, u8 gpr_out) > SHF_SC_R_ROT, 16); > } > > +static void > +wrp_mul_u32(struct nfp_prog *nfp_prog, swreg dst_hi, swreg dst_lo, swreg > lreg, > + swreg rreg, bool gen_high_half) > +{ > + emit_mul(nfp_prog, lreg, MUL_TYPE_START, MUL_STEP_NONE, rreg); > + emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_32x32, MUL_STEP_1, rreg); > + emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_32x32, MUL_STEP_2, rreg); > + emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_32x32, MUL_STEP_3, rreg); > + emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_32x32, MUL_STEP_4, rreg); > + emit_mul(nfp_prog, dst_lo, MUL_TYPE_STEP_32x32, MUL_LAST, reg_none()); > + if (gen_high_half) > + emit_mul(nfp_prog, dst_hi, MUL_TYPE_STEP_32x32, MUL_LAST_2, > + reg_none()); > + else > + wrp_immed(nfp_prog, dst_hi, 0); > +} > + > +static void > +wrp_mul_u16(struct nfp_prog *nfp_prog, swreg dst_hi, swreg dst_lo, swreg > lreg, > + swreg rreg) > +{ > + emit_mul(nfp_prog, lreg, MUL_TYPE_START, MUL_STEP_NONE, rreg); > + emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_16x16, MUL_STEP_1, rreg); > + emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_16x16, MUL_STEP_2, rreg); > + emit_mul(nfp_prog, dst_lo, MUL_TYPE_STEP_16x16, MUL_LAST, reg_none()); > +} > + > +static int > +wrp_mul(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, > + bool gen_high_half, bool ropnd_from_reg) > +{ > + swreg multiplier, multiplicand, dst_hi, dst_lo; > + const struct bpf_insn *insn = &meta->insn; > + u32 lopnd_max, ropnd_max; > + u8 dst_reg; > + > + dst_reg = insn->dst_reg; > + multiplicand = reg_a(dst_reg * 2); > + dst_hi = reg_both(dst_reg * 2 + 1); > + dst_lo = reg_both(dst_reg * 2); > + lopnd_max = meta->umax_dst; > + if (ropnd_from_reg) { > + multiplier = reg_b(insn->src_reg * 2); > + ropnd_max = meta->umax_src; > + } else { > + u32 imm = insn->imm; > + > + multiplier = re_load_imm_any(nfp_prog, imm, imm_b(nfp_prog)); > + ropnd_max = imm; > + } > + if (lopnd_max > U16_MAX || ropnd_max > U16_MAX) > + wrp_mul_u32(nfp_prog, dst_hi, dst_lo, multiplicand, > multiplier, > + gen_high_half); > + else > + wrp_mul_u16(nfp_prog, dst_hi, dst_lo, multiplicand, > multiplier); > + > + return 0; > +} > + > static int adjust_head(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) > { > swreg tmp = imm_a(nfp_prog), tmp_len = imm_b(nfp_prog); > @@ -1684,6 +1797,16 @@ static int sub_imm64(struct nfp_prog *nfp_prog, struct > nfp_insn_meta *meta) > return 0; > } > > +static int mul_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) > +{ > + return wrp_mul(nfp_prog, meta, true, true); > +} > + > +static int mul_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) > +{ > + return wrp_mul(nfp_prog, meta, true, false); > +} > + > static int neg_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) > { > const struct bpf_insn *insn = &meta->insn; > @@ -2097,6 +2220,16 @@ static int sub_imm(struct nfp_prog *nfp_prog, struct > nfp_insn_meta *meta) > return wrp_alu32_imm(nfp_prog, meta, ALU_OP_SUB, !meta->insn.imm); > } > > +static int mul_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) > +{ > + return wrp_mul(nfp_prog, meta, false, true); > +} > + > +static int mul_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) > +{ > + return wrp_mul(nfp_prog, meta, false, false); > +} > + > static int neg_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) > { > u8 dst = meta->insn.dst_reg * 2; > @@ -2848,6 +2981,8 @@ static const instr_cb_t instr_cb[256] = { > [BPF_ALU64 | BPF_ADD | BPF_K] = add_imm64, > [BPF_ALU64 | BPF_SUB | BPF_X] = sub_reg64, > [BPF_ALU64 | BPF_SUB | BPF_K] = sub_imm64, > + [BPF_ALU64 | BPF_MUL | BPF_X] = mul_reg64, > + [BPF_ALU64 | BPF_MUL | BPF_K] = mul_imm64, > [BPF_ALU64 | BPF_NEG] = neg_reg64, > [BPF_ALU64 | BPF_LSH | BPF_X] = shl_reg64, > [BPF_ALU64 | BPF_LSH | BPF_K] = shl_imm64, > @@ -2867,6 +3002,8 @@ static const instr_cb_t instr_cb[256] = { > [BPF_ALU | BPF_ADD | BPF_K] = add_imm, > [BPF_ALU | BPF_SUB | BPF_X] = sub_reg, > [BPF_ALU | BPF_SUB | BPF_K] = sub_imm, > + [BPF_ALU | BPF_MUL | BPF_X] = mul_reg, > + [BPF_ALU | BPF_MUL | BPF_K] = mul_imm, > [BPF_ALU | BPF_NEG] = neg_reg, > [BPF_ALU | BPF_LSH | BPF_K] = shl_imm, > [BPF_ALU | BPF_END | BPF_X] = end_reg32, > diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h > b/drivers/net/ethernet/netronome/nfp/bpf/main.h > index c985d0ac61a3..c10079b1a312 100644 > --- a/drivers/net/ethernet/netronome/nfp/bpf/main.h > +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h > @@ -394,6 +394,11 @@ static inline bool is_mbpf_xadd(const struct > nfp_insn_meta *meta) > return (meta->insn.code & ~BPF_SIZE_MASK) == (BPF_STX | BPF_XADD); > } > > +static inline bool is_mbpf_mul(const struct nfp_insn_meta *meta) > +{ > + return is_mbpf_alu(meta) && mbpf_op(meta) == BPF_MUL; > +} > + > /** > * struct nfp_prog - nfp BPF program > * @bpf: backpointer to the bpf app priv structure > diff --git a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c > b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c > index 7bd9666bd8ff..30d4f1580693 100644 > --- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c > +++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c > @@ -516,6 +516,51 @@ nfp_bpf_check_xadd(struct nfp_prog *nfp_prog, struct > nfp_insn_meta *meta, > return nfp_bpf_check_ptr(nfp_prog, meta, env, meta->insn.dst_reg); > } > > +static int > +nfp_bpf_check_alu(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, > + struct bpf_verifier_env *env) > +{ > + const struct bpf_reg_state *sreg = > + cur_regs(env) + meta->insn.src_reg; > + const struct bpf_reg_state *dreg = > + cur_regs(env) + meta->insn.dst_reg; > + > + meta->umin_src = min(meta->umin_src, sreg->umin_value); > + meta->umax_src = max(meta->umax_src, sreg->umax_value); > + meta->umin_dst = min(meta->umin_dst, dreg->umin_value); > + meta->umax_dst = max(meta->umax_dst, dreg->umax_value); > + > + /* NFP supports u16 and u32 multiplication. > + * > + * For ALU64, if either operand is beyond u32's value range, we reject > + * it. One thing to note, if the source operand is BPF_K, then we need > + * to check "imm" field directly, and we'd reject it if it is > negative. > + * Because for ALU64, "imm" (with s32 type) is expected to be sign > + * extended to s64 which NFP mul doesn't support. > + * > + * For ALU32, it is fine for "imm" be negative though, because the > + * result is 32-bits and there is no difference on the low halve of > + * the result for signed/unsigned mul, so we will get correct result. > + */ > + if (is_mbpf_mul(meta)) { > + if (meta->umax_dst > U32_MAX) { > + pr_vlog(env, "multiplier is not within u32 value > range\n"); > + return -EINVAL; > + } > + if (mbpf_src(meta) == BPF_X && meta->umax_src > U32_MAX) { > + pr_vlog(env, "multiplicand is not within u32 value > range\n"); > + return -EINVAL; > + } > + if (mbpf_class(meta) == BPF_ALU64 && > + mbpf_src(meta) == BPF_K && meta->insn.imm < 0) { > + pr_vlog(env, "sign extended multiplicand won't be > within u32 value range\n"); > + return -EINVAL; > + } > + } > + > + return 0; > +} > + > static int > nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int > prev_insn_idx) > { > @@ -551,17 +596,8 @@ nfp_verify_insn(struct bpf_verifier_env *env, int > insn_idx, int prev_insn_idx) > if (is_mbpf_xadd(meta)) > return nfp_bpf_check_xadd(nfp_prog, meta, env); > > - if (is_mbpf_alu(meta)) { > - const struct bpf_reg_state *sreg = > - cur_regs(env) + meta->insn.src_reg; > - const struct bpf_reg_state *dreg = > - cur_regs(env) + meta->insn.dst_reg; > - > - meta->umin_src = min(meta->umin_src, sreg->umin_value); > - meta->umax_src = max(meta->umax_src, sreg->umax_value); > - meta->umin_dst = min(meta->umin_dst, dreg->umin_value); > - meta->umax_dst = max(meta->umax_dst, dreg->umax_value); > - } > + if (is_mbpf_alu(meta)) > + return nfp_bpf_check_alu(nfp_prog, meta, env); > > return 0; > } > diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.h > b/drivers/net/ethernet/netronome/nfp/nfp_asm.h > index f6677bc9875a..cdc4e065f6f5 100644 > --- a/drivers/net/ethernet/netronome/nfp/nfp_asm.h > +++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.h > @@ -426,4 +426,32 @@ static inline u32 nfp_get_ind_csr_ctx_ptr_offs(u32 > read_offset) > return (read_offset & ~NFP_IND_ME_CTX_PTR_BASE_MASK) | > NFP_CSR_CTX_PTR; > } > > +enum mul_type { > + MUL_TYPE_START = 0x00, > + MUL_TYPE_STEP_24x8 = 0x01, > + MUL_TYPE_STEP_16x16 = 0x02, > + MUL_TYPE_STEP_32x32 = 0x03, > +}; > + > +enum mul_step { > + MUL_STEP_1 = 0x00, > + MUL_STEP_NONE = MUL_STEP_1, > + MUL_STEP_2 = 0x01, > + MUL_STEP_3 = 0x02, > + MUL_STEP_4 = 0x03, > + MUL_LAST = 0x04, > + MUL_LAST_2 = 0x05, > +}; > + > +#define OP_MUL_BASE 0x0f800000000ULL > +#define OP_MUL_A_SRC 0x000000003ffULL > +#define OP_MUL_B_SRC 0x000000ffc00ULL > +#define OP_MUL_STEP 0x00000700000ULL > +#define OP_MUL_DST_AB 0x00000800000ULL > +#define OP_MUL_SW 0x00040000000ULL > +#define OP_MUL_TYPE 0x00180000000ULL > +#define OP_MUL_WR_AB 0x20000000000ULL > +#define OP_MUL_SRC_LMEXTN 0x40000000000ULL > +#define OP_MUL_DST_LMEXTN 0x80000000000ULL > + > #endif > -- > 2.17.1 >