On Wed, Apr 20, 2016 at 9:53 PM, H.J. Lu <hongjiu...@intel.com> wrote: > Since all 1s in TImode is standard SSE2 constants, all 1s in OImode is > standard AVX2 constants and all 1s in XImode is standard AVX512F constants, > pass mode to standard_sse_constant_p and standard_sse_constant_opcode > to check if all 1s is available for target. > > Tested on Linux/x86-64. OK for master?
No. This patch should use "isa" attribute instead of adding even more similar patterns. Also, please leave MEM_P checks, the rare C->m move can be easily resolved by IRA. Also, the mode checks should be in the predicate, standard_sse_constant_p just validates if the constant is allowed by ISA. Uros. > BTW, it will be used to fix > > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70155 > > > H.J. > --- > * config/i386/i386-protos.h (standard_sse_constant_p): Take > machine_mode with VOIDmode as default. > * config/i386/i386.c (standard_sse_constant_p): Get mode if > it is VOIDmode. Return 2 for all 1s of integer in supported > modes. > (ix86_expand_vector_move): Pass mode to standard_sse_constant_p. > * config/i386/i386.md (*movxi_internal_avx512f): Replace > vector_move_operand with nonimmediate_or_sse_const_operand and > use BC instead of C in constraint. Check register_operand > instead of MEM_P. Pass mode to standard_sse_constant_opcode. > (*movoi_internal_avx): Disabled for TARGET_AVX2. Check > register_operand instead of MEM_P. > (*movoi_internal_avx2): New pattern. > (*movti_internal_sse): Likewise. > (*movti_internal): Renamed to ... > (*movti_internal_sse2): This. Require SSE2. Use BC instead of > C in constraint. Check register_operand instead of MEM_P in > 32-bit mode. > --- > gcc/config/i386/i386-protos.h | 2 +- > gcc/config/i386/i386.c | 27 ++++++++--- > gcc/config/i386/i386.md | 104 > ++++++++++++++++++++++++++++++++++++++++-- > 3 files changed, 121 insertions(+), 12 deletions(-) > > diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h > index ff47bc1..cf54189 100644 > --- a/gcc/config/i386/i386-protos.h > +++ b/gcc/config/i386/i386-protos.h > @@ -50,7 +50,7 @@ extern bool ix86_using_red_zone (void); > extern int standard_80387_constant_p (rtx); > extern const char *standard_80387_constant_opcode (rtx); > extern rtx standard_80387_constant_rtx (int); > -extern int standard_sse_constant_p (rtx); > +extern int standard_sse_constant_p (rtx, machine_mode = VOIDmode); > extern const char *standard_sse_constant_opcode (rtx_insn *, rtx); > extern bool symbolic_reference_mentioned_p (rtx); > extern bool extended_reg_mentioned_p (rtx); > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c > index 6379313..dd951c2 100644 > --- a/gcc/config/i386/i386.c > +++ b/gcc/config/i386/i386.c > @@ -10766,18 +10766,31 @@ standard_80387_constant_rtx (int idx) > in supported SSE/AVX vector mode. */ > > int > -standard_sse_constant_p (rtx x) > +standard_sse_constant_p (rtx x, machine_mode mode) > { > - machine_mode mode; > - > if (!TARGET_SSE) > return 0; > > - mode = GET_MODE (x); > - > + if (mode == VOIDmode) > + mode = GET_MODE (x); > + > if (x == const0_rtx || x == CONST0_RTX (mode)) > return 1; > - if (vector_all_ones_operand (x, mode)) > + if (CONST_INT_P (x)) > + { > + /* If mode != VOIDmode, standard_sse_constant_p must be called: > + 1. On TImode with SSE2. > + 2. On OImode with AVX2. > + 3. On XImode with AVX512F. > + */ > + if ((HOST_WIDE_INT) INTVAL (x) == HOST_WIDE_INT_M1 > + && (mode == VOIDmode > + || (mode == TImode && TARGET_SSE2) > + || (mode == OImode && TARGET_AVX2) > + || (mode == XImode && TARGET_AVX512F))) > + return 2; > + } > + else if (vector_all_ones_operand (x, mode)) > switch (mode) > { > case V16QImode: > @@ -18758,7 +18771,7 @@ ix86_expand_vector_move (machine_mode mode, rtx > operands[]) > && (CONSTANT_P (op1) > || (SUBREG_P (op1) > && CONSTANT_P (SUBREG_REG (op1)))) > - && !standard_sse_constant_p (op1)) > + && !standard_sse_constant_p (op1, mode)) > op1 = validize_mem (force_const_mem (mode, op1)); > > /* We need to check memory alignment for SSE mode since attribute > diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md > index babd0a4..75227aa 100644 > --- a/gcc/config/i386/i386.md > +++ b/gcc/config/i386/i386.md > @@ -1971,8 +1971,10 @@ > > (define_insn "*movxi_internal_avx512f" > [(set (match_operand:XI 0 "nonimmediate_operand" "=v,v ,m") > - (match_operand:XI 1 "vector_move_operand" "C ,vm,v"))] > - "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))" > + (match_operand:XI 1 "nonimmediate_or_sse_const_operand" "BC,vm,v"))] > + "TARGET_AVX512F > + && (register_operand (operands[0], XImode) > + || register_operand (operands[1], XImode))" > { > switch (which_alternative) > { > @@ -1996,7 +1998,10 @@ > (define_insn "*movoi_internal_avx" > [(set (match_operand:OI 0 "nonimmediate_operand" "=v,v ,m") > (match_operand:OI 1 "vector_move_operand" "C ,vm,v"))] > - "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))" > + "TARGET_AVX > + && !TARGET_AVX2 > + && (register_operand (operands[0], OImode) > + || register_operand (operands[1], OImode))" > { > switch (get_attr_type (insn)) > { > @@ -2042,10 +2047,62 @@ > ] > (const_string "OI")))]) > > -(define_insn "*movti_internal" > +(define_insn "*movoi_internal_avx2" > + [(set (match_operand:OI 0 "nonimmediate_operand" "=v,v ,m") > + (match_operand:OI 1 "nonimmediate_or_sse_const_operand" "BC,vm,v"))] > + "TARGET_AVX2 > + && (register_operand (operands[0], OImode) > + || register_operand (operands[1], OImode))" > +{ > + switch (get_attr_type (insn)) > + { > + case TYPE_SSELOG1: > + return standard_sse_constant_opcode (insn, operands[1]); > + > + case TYPE_SSEMOV: > + if (misaligned_operand (operands[0], OImode) > + || misaligned_operand (operands[1], OImode)) > + { > + if (get_attr_mode (insn) == MODE_V8SF) > + return "vmovups\t{%1, %0|%0, %1}"; > + else if (get_attr_mode (insn) == MODE_XI) > + return "vmovdqu32\t{%1, %0|%0, %1}"; > + else > + return "vmovdqu\t{%1, %0|%0, %1}"; > + } > + else > + { > + if (get_attr_mode (insn) == MODE_V8SF) > + return "vmovaps\t{%1, %0|%0, %1}"; > + else if (get_attr_mode (insn) == MODE_XI) > + return "vmovdqa32\t{%1, %0|%0, %1}"; > + else > + return "vmovdqa\t{%1, %0|%0, %1}"; > + } > + > + default: > + gcc_unreachable (); > + } > +} > + [(set_attr "type" "sselog1,ssemov,ssemov") > + (set_attr "prefix" "vex") > + (set (attr "mode") > + (cond [(ior (match_operand 0 "ext_sse_reg_operand") > + (match_operand 1 "ext_sse_reg_operand")) > + (const_string "XI") > + (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL") > + (const_string "V8SF") > + (and (eq_attr "alternative" "2") > + (match_test "TARGET_SSE_TYPELESS_STORES")) > + (const_string "V8SF") > + ] > + (const_string "OI")))]) > + > +(define_insn "*movti_internal_sse" > [(set (match_operand:TI 0 "nonimmediate_operand" "=!r ,o ,v,v ,m") > (match_operand:TI 1 "general_operand" "riFo,re,C,vm,v"))] > "(TARGET_64BIT || TARGET_SSE) > + && !TARGET_SSE2 > && !(MEM_P (operands[0]) && MEM_P (operands[1]))" > { > switch (get_attr_type (insn)) > @@ -2061,6 +2118,45 @@ > to stack may result in unaligned memory access. */ > if (misaligned_operand (operands[0], TImode) > || misaligned_operand (operands[1], TImode)) > + return "%vmovups\t{%1, %0|%0, %1}"; > + else > + return "%vmovaps\t{%1, %0|%0, %1}"; > + > + default: > + gcc_unreachable (); > + } > +} > + [(set_attr "isa" "x64,x64,*,*,*") > + (set_attr "type" "multi,multi,sselog1,ssemov,ssemov") > + (set_attr "prefix" "orig") > + (set (attr "mode") > + (cond [(eq_attr "alternative" "0,1") > + (const_string "DI")] > + (const_string "TI")))]) > + > +(define_insn "*movti_internal_sse2" > + [(set (match_operand:TI 0 "nonimmediate_operand" "=!r ,o ,v, v ,m") > + (match_operand:TI 1 "general_operand" "riFo,re,BC,vm,v"))] > + "TARGET_SSE2 > + && ((TARGET_64BIT > + && !(MEM_P (operands[0]) && MEM_P (operands[1]))) > + || (!TARGET_64BIT > + && (register_operand (operands[0], TImode) > + || register_operand (operands[1], TImode))))" > +{ > + switch (get_attr_type (insn)) > + { > + case TYPE_MULTI: > + return "#"; > + > + case TYPE_SSELOG1: > + return standard_sse_constant_opcode (insn, operands[1]); > + > + case TYPE_SSEMOV: > + /* TDmode values are passed as TImode on the stack. Moving them > + to stack may result in unaligned memory access. */ > + if (misaligned_operand (operands[0], TImode) > + || misaligned_operand (operands[1], TImode)) > { > if (get_attr_mode (insn) == MODE_V4SF) > return "%vmovups\t{%1, %0|%0, %1}"; > -- > 2.5.5 >