On Mon, Jun 15, 2020 at 9:48 PM Jakub Jelinek <ja...@redhat.com> wrote:
>
> On Mon, Jun 15, 2020 at 09:29:29PM +0800, Hongtao Liu via Gcc-patches wrote:
> >   Basically i "copy" this optimization from clang i386 backend, Refer
> > to pr95524 for details.
> >   Bootstrap is ok, regression test on i386/x86-64 backend is ok.
> >
> > gcc/ChangeLog:
> >         PR target/95524
> >         * gcc/config/i386/i386-expand.c
> >         (ix86_expand_vec_shift_qihi_constant): New function.
>
> No gcc/ prefix in gcc/ChangeLog (git push would be refused).
> And with or without the prefix it fits, so:
>         * config/i386/i386-expand.c (ix86_expand_vec_shift_qihi_constant): New
>         function.
>
> >         * gcc/config/i386/i386-protos.h: Declare.
>
> This needs to mention the function name again.
>
> >         * gcc/config/i386/sse.md: Optimize shift V*QImode by constant.
>
> And thus needs to mention the define_expand, (<shift_insn><mode>3)
> in this case.
>
Changed.
> +  machine_mode qimode, himode;
> +  unsigned int shift_constant, and_constant, xor_constant;
> +  rtx vec_const_and, vec_const_xor;
> +  rtx tmp, op1_subreg;
> +  rtx (*gen_shift) (rtx, rtx, rtx);
> +  rtx (*gen_and) (rtx, rtx, rtx);
> +  rtx (*gen_xor) (rtx, rtx, rtx);
> +  rtx (*gen_sub) (rtx, rtx, rtx);
> +
> +  /* Only optimize shift by constant.  */
> +  if (!CONST_INT_P (op2))
> +    return false;
> +
> +  qimode = GET_MODE (dest);
> +  shift_constant = INTVAL (op2);
>
> I wonder if shift_constant shouldn't be unsigned HOST_WIDE_INT
i don't think there would be some usage with shift_constant greater
than UINT_MAX(4294967295),
shouldn't unsigned int enough here?
> instead, and which >= 8 values you should try to do something about rather
> than punt on optimizing.  If this is just from normal C shifts, then
That's why this optimization is only for ASHIFT/LSHIFTRT which shifts in 0s.
> perhaps anything >= 32U would be invalid, if it is from intrinsics,
> perhaps there is a different behavior (masked with something?).
We don't have intrinsic of simd int8 shift yet, so it's only normal C shifts.
>
> +  /* Shift constant greater equal 8 result into 0.  */
> +  if (shift_constant > 7)
> +    {
> +      if (code == ASHIFT || code == LSHIFTRT)
> +       {
> +         emit_move_insn (dest, CONST0_RTX (qimode));
> +         return true;
> +       }
> +      /* Sign bit not known.  */
> +      else if (code == ASHIFTRT)
> +       return false;
> +      else
> +       gcc_unreachable ();
> +    }
> +
> +  gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
> +  /* Record sign bit.  */
> +  xor_constant = 1 << (8 - shift_constant - 1);
> +
> +  /* Zero upper/lower bits shift from left/right element.  */
> +  and_constant = code == ASHIFT ? 256 - (1 << shift_constant) :
> +    (1 << (8 - shift_constant)) - 1;
>
> Formatting.  Should be:
>   and_constant
>     = (code == ASHIFT ? 256 - (1 << shift_constant)
>                       : (1 << (8 - shift_constant)) - 1);
> or so.
Changed
> +
> +  switch (qimode)
> +    {
> +    case V16QImode:
> +      himode = V8HImode;
> +      gen_shift = (code == ASHIFT) ? gen_ashlv8hi3 :
> +       (code == ASHIFTRT) ? gen_ashrv8hi3 : gen_lshrv8hi3;
>
> Similarly.  : or ? should just never appear at the end of line.
> And probably no reason to wrap the comparisons in parens,
> instead wrap the whole expression.  So:
>       gen_shift
>         = (code == ASHIFT
>            ? gen_ashlv8hi3
>            : code == ASHIFTRT ? gen_ashrv8hi3 : gen_lshrv8hi3);
> or so?
>
Changed.
> +      gen_and = gen_andv16qi3;
> +      gen_xor = gen_xorv16qi3;
> +      gen_sub = gen_subv16qi3;
> +      break;
> +    case V32QImode:
> +      himode = V16HImode;
> +      gen_shift = (code == ASHIFT) ? gen_ashlv16hi3 :
> +       (code == ASHIFTRT) ? gen_ashrv16hi3 : gen_lshrv16hi3;
>
> Ditto.
>
Changed.
> +      gen_and = gen_andv32qi3;
> +      gen_xor = gen_xorv32qi3;
> +      gen_sub = gen_subv32qi3;
> +      break;
> +    case V64QImode:
> +      himode = V32HImode;
> +      gen_shift = (code == ASHIFT) ? gen_ashlv32hi3 :
> +       (code == ASHIFTRT) ? gen_ashrv32hi3 : gen_lshrv32hi3;
>
> Ditto.
>
Changed.
> +  tmp = gen_reg_rtx (himode);
> +  vec_const_and = gen_reg_rtx (qimode);
> +  op1_subreg = simplify_gen_subreg (himode, op1, qimode, 0);
>
> Just use lowpart_subreg?
Changed.
> +
> +  /* For ASHIFT and LSHIFTRT, perform operation like
> +     vpsllw/vpsrlw $shift_constant, %op1, %dest.
> +     vpand %vec_const_and, %dest.  */
> +  emit_insn (gen_shift (tmp, op1_subreg, op2));
> +  emit_move_insn (dest, simplify_gen_subreg (qimode, tmp, himode, 0));
> +  emit_move_insn (vec_const_and,
> +                 ix86_build_const_vector (qimode, true,
> +                                          GEN_INT (and_constant)));
> +  emit_insn (gen_and (dest, dest, vec_const_and));
> +
> +  /* For ASHIFTRT, perform extra operation like
> +     vpxor %vec_const_xor, %dest, %dest
> +     vpsubb %vec_const_xor, %dest, %dest  */
> +  if (code == ASHIFTRT)
> +    {
> +      vec_const_xor = gen_reg_rtx (qimode);
> +      emit_move_insn (vec_const_xor,
> +                     ix86_build_const_vector (qimode, true,
> +                                              GEN_INT (xor_constant)));
> +      emit_insn (gen_xor (dest, dest, vec_const_xor));
> +      emit_insn (gen_sub (dest, dest, vec_const_xor));
> +    }
> +  return true;
> +}
> +
>  /* Expand a vector operation CODE for a V*QImode in terms of the
>     same operation on V*HImode.  */
>
> diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
> index f5320494fa1..7c2ce618f3f 100644
> --- a/gcc/config/i386/i386-protos.h
> +++ b/gcc/config/i386/i386-protos.h
> @@ -206,6 +206,7 @@ extern void ix86_expand_round_sse4 (rtx, rtx);
>
>  extern bool ix86_expand_vecmul_qihi (rtx, rtx, rtx);
>  extern void ix86_expand_vecop_qihi (enum rtx_code, rtx, rtx, rtx);
> +extern bool ix86_expand_vec_shift_qihi_constant (enum rtx_code, rtx, rtx, 
> rtx);
>
>  extern rtx ix86_split_stack_guard (void);
>
> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> index aa9fdc87c68..b466950af40 100644
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -19863,6 +19863,9 @@
>        gen = (<CODE> == LSHIFTRT ? gen_xop_shlv16qi3 : gen_xop_shav16qi3);
>        emit_insn (gen (operands[0], operands[1], tmp));
>      }
> +  else if (ix86_expand_vec_shift_qihi_constant (<CODE>, operands[0],
> +                                               operands[1], operands[2]))
> +    DONE;
>
> Perhaps do instead:
>   else if (!ix86_expand_vec_shift_qihi_constant (<CODE>, operands[0],
>                                                  operands[1], operands[2]))
>     ix86_expand_vecop_qihi (<CODE>, operands[0], operands[1], operands[2]);
>   DONE;
> ?
>
Changed.
>         Jakub
>


-- 
BR,
Hongtao

Reply via email to