On Mon, Jun 15, 2020 at 9:48 PM Jakub Jelinek <ja...@redhat.com> wrote: > > On Mon, Jun 15, 2020 at 09:29:29PM +0800, Hongtao Liu via Gcc-patches wrote: > > Basically i "copy" this optimization from clang i386 backend, Refer > > to pr95524 for details. > > Bootstrap is ok, regression test on i386/x86-64 backend is ok. > > > > gcc/ChangeLog: > > PR target/95524 > > * gcc/config/i386/i386-expand.c > > (ix86_expand_vec_shift_qihi_constant): New function. > > No gcc/ prefix in gcc/ChangeLog (git push would be refused). > And with or without the prefix it fits, so: > * config/i386/i386-expand.c (ix86_expand_vec_shift_qihi_constant): New > function. > > > * gcc/config/i386/i386-protos.h: Declare. > > This needs to mention the function name again. > > > * gcc/config/i386/sse.md: Optimize shift V*QImode by constant. > > And thus needs to mention the define_expand, (<shift_insn><mode>3) > in this case. > Changed. > + machine_mode qimode, himode; > + unsigned int shift_constant, and_constant, xor_constant; > + rtx vec_const_and, vec_const_xor; > + rtx tmp, op1_subreg; > + rtx (*gen_shift) (rtx, rtx, rtx); > + rtx (*gen_and) (rtx, rtx, rtx); > + rtx (*gen_xor) (rtx, rtx, rtx); > + rtx (*gen_sub) (rtx, rtx, rtx); > + > + /* Only optimize shift by constant. */ > + if (!CONST_INT_P (op2)) > + return false; > + > + qimode = GET_MODE (dest); > + shift_constant = INTVAL (op2); > > I wonder if shift_constant shouldn't be unsigned HOST_WIDE_INT i don't think there would be some usage with shift_constant greater than UINT_MAX(4294967295), shouldn't unsigned int enough here? > instead, and which >= 8 values you should try to do something about rather > than punt on optimizing. If this is just from normal C shifts, then That's why this optimization is only for ASHIFT/LSHIFTRT which shifts in 0s. > perhaps anything >= 32U would be invalid, if it is from intrinsics, > perhaps there is a different behavior (masked with something?). We don't have intrinsic of simd int8 shift yet, so it's only normal C shifts. > > + /* Shift constant greater equal 8 result into 0. */ > + if (shift_constant > 7) > + { > + if (code == ASHIFT || code == LSHIFTRT) > + { > + emit_move_insn (dest, CONST0_RTX (qimode)); > + return true; > + } > + /* Sign bit not known. */ > + else if (code == ASHIFTRT) > + return false; > + else > + gcc_unreachable (); > + } > + > + gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT); > + /* Record sign bit. */ > + xor_constant = 1 << (8 - shift_constant - 1); > + > + /* Zero upper/lower bits shift from left/right element. */ > + and_constant = code == ASHIFT ? 256 - (1 << shift_constant) : > + (1 << (8 - shift_constant)) - 1; > > Formatting. Should be: > and_constant > = (code == ASHIFT ? 256 - (1 << shift_constant) > : (1 << (8 - shift_constant)) - 1); > or so. Changed > + > + switch (qimode) > + { > + case V16QImode: > + himode = V8HImode; > + gen_shift = (code == ASHIFT) ? gen_ashlv8hi3 : > + (code == ASHIFTRT) ? gen_ashrv8hi3 : gen_lshrv8hi3; > > Similarly. : or ? should just never appear at the end of line. > And probably no reason to wrap the comparisons in parens, > instead wrap the whole expression. So: > gen_shift > = (code == ASHIFT > ? gen_ashlv8hi3 > : code == ASHIFTRT ? gen_ashrv8hi3 : gen_lshrv8hi3); > or so? > Changed. > + gen_and = gen_andv16qi3; > + gen_xor = gen_xorv16qi3; > + gen_sub = gen_subv16qi3; > + break; > + case V32QImode: > + himode = V16HImode; > + gen_shift = (code == ASHIFT) ? gen_ashlv16hi3 : > + (code == ASHIFTRT) ? gen_ashrv16hi3 : gen_lshrv16hi3; > > Ditto. > Changed. > + gen_and = gen_andv32qi3; > + gen_xor = gen_xorv32qi3; > + gen_sub = gen_subv32qi3; > + break; > + case V64QImode: > + himode = V32HImode; > + gen_shift = (code == ASHIFT) ? gen_ashlv32hi3 : > + (code == ASHIFTRT) ? gen_ashrv32hi3 : gen_lshrv32hi3; > > Ditto. > Changed. > + tmp = gen_reg_rtx (himode); > + vec_const_and = gen_reg_rtx (qimode); > + op1_subreg = simplify_gen_subreg (himode, op1, qimode, 0); > > Just use lowpart_subreg? Changed. > + > + /* For ASHIFT and LSHIFTRT, perform operation like > + vpsllw/vpsrlw $shift_constant, %op1, %dest. > + vpand %vec_const_and, %dest. */ > + emit_insn (gen_shift (tmp, op1_subreg, op2)); > + emit_move_insn (dest, simplify_gen_subreg (qimode, tmp, himode, 0)); > + emit_move_insn (vec_const_and, > + ix86_build_const_vector (qimode, true, > + GEN_INT (and_constant))); > + emit_insn (gen_and (dest, dest, vec_const_and)); > + > + /* For ASHIFTRT, perform extra operation like > + vpxor %vec_const_xor, %dest, %dest > + vpsubb %vec_const_xor, %dest, %dest */ > + if (code == ASHIFTRT) > + { > + vec_const_xor = gen_reg_rtx (qimode); > + emit_move_insn (vec_const_xor, > + ix86_build_const_vector (qimode, true, > + GEN_INT (xor_constant))); > + emit_insn (gen_xor (dest, dest, vec_const_xor)); > + emit_insn (gen_sub (dest, dest, vec_const_xor)); > + } > + return true; > +} > + > /* Expand a vector operation CODE for a V*QImode in terms of the > same operation on V*HImode. */ > > diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h > index f5320494fa1..7c2ce618f3f 100644 > --- a/gcc/config/i386/i386-protos.h > +++ b/gcc/config/i386/i386-protos.h > @@ -206,6 +206,7 @@ extern void ix86_expand_round_sse4 (rtx, rtx); > > extern bool ix86_expand_vecmul_qihi (rtx, rtx, rtx); > extern void ix86_expand_vecop_qihi (enum rtx_code, rtx, rtx, rtx); > +extern bool ix86_expand_vec_shift_qihi_constant (enum rtx_code, rtx, rtx, > rtx); > > extern rtx ix86_split_stack_guard (void); > > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md > index aa9fdc87c68..b466950af40 100644 > --- a/gcc/config/i386/sse.md > +++ b/gcc/config/i386/sse.md > @@ -19863,6 +19863,9 @@ > gen = (<CODE> == LSHIFTRT ? gen_xop_shlv16qi3 : gen_xop_shav16qi3); > emit_insn (gen (operands[0], operands[1], tmp)); > } > + else if (ix86_expand_vec_shift_qihi_constant (<CODE>, operands[0], > + operands[1], operands[2])) > + DONE; > > Perhaps do instead: > else if (!ix86_expand_vec_shift_qihi_constant (<CODE>, operands[0], > operands[1], operands[2])) > ix86_expand_vecop_qihi (<CODE>, operands[0], operands[1], operands[2]); > DONE; > ? > Changed. > Jakub >
-- BR, Hongtao