Since all 1s in TImode is standard SSE2 constants, all 1s in OImode is standard AVX2 constants and all 1s in XImode is standard AVX512F constants, pass mode to standard_sse_constant_p and standard_sse_constant_opcode to check if all 1s is available for target.
Tested on Linux/x86-64. OK for master? BTW, it will be used to fix https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70155 H.J. --- * config/i386/i386-protos.h (standard_sse_constant_p): Take machine_mode with VOIDmode as default. * config/i386/i386.c (standard_sse_constant_p): Get mode if it is VOIDmode. Return 2 for all 1s of integer in supported modes. (ix86_expand_vector_move): Pass mode to standard_sse_constant_p. * config/i386/i386.md (*movxi_internal_avx512f): Replace vector_move_operand with nonimmediate_or_sse_const_operand and use BC instead of C in constraint. Check register_operand instead of MEM_P. Pass mode to standard_sse_constant_opcode. (*movoi_internal_avx): Disabled for TARGET_AVX2. Check register_operand instead of MEM_P. (*movoi_internal_avx2): New pattern. (*movti_internal_sse): Likewise. (*movti_internal): Renamed to ... (*movti_internal_sse2): This. Require SSE2. Use BC instead of C in constraint. Check register_operand instead of MEM_P in 32-bit mode. --- gcc/config/i386/i386-protos.h | 2 +- gcc/config/i386/i386.c | 27 ++++++++--- gcc/config/i386/i386.md | 104 ++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 121 insertions(+), 12 deletions(-) diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index ff47bc1..cf54189 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -50,7 +50,7 @@ extern bool ix86_using_red_zone (void); extern int standard_80387_constant_p (rtx); extern const char *standard_80387_constant_opcode (rtx); extern rtx standard_80387_constant_rtx (int); -extern int standard_sse_constant_p (rtx); +extern int standard_sse_constant_p (rtx, machine_mode = VOIDmode); extern const char *standard_sse_constant_opcode (rtx_insn *, rtx); extern bool symbolic_reference_mentioned_p (rtx); extern bool extended_reg_mentioned_p (rtx); diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 6379313..dd951c2 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -10766,18 +10766,31 @@ standard_80387_constant_rtx (int idx) in supported SSE/AVX vector mode. */ int -standard_sse_constant_p (rtx x) +standard_sse_constant_p (rtx x, machine_mode mode) { - machine_mode mode; - if (!TARGET_SSE) return 0; - mode = GET_MODE (x); - + if (mode == VOIDmode) + mode = GET_MODE (x); + if (x == const0_rtx || x == CONST0_RTX (mode)) return 1; - if (vector_all_ones_operand (x, mode)) + if (CONST_INT_P (x)) + { + /* If mode != VOIDmode, standard_sse_constant_p must be called: + 1. On TImode with SSE2. + 2. On OImode with AVX2. + 3. On XImode with AVX512F. + */ + if ((HOST_WIDE_INT) INTVAL (x) == HOST_WIDE_INT_M1 + && (mode == VOIDmode + || (mode == TImode && TARGET_SSE2) + || (mode == OImode && TARGET_AVX2) + || (mode == XImode && TARGET_AVX512F))) + return 2; + } + else if (vector_all_ones_operand (x, mode)) switch (mode) { case V16QImode: @@ -18758,7 +18771,7 @@ ix86_expand_vector_move (machine_mode mode, rtx operands[]) && (CONSTANT_P (op1) || (SUBREG_P (op1) && CONSTANT_P (SUBREG_REG (op1)))) - && !standard_sse_constant_p (op1)) + && !standard_sse_constant_p (op1, mode)) op1 = validize_mem (force_const_mem (mode, op1)); /* We need to check memory alignment for SSE mode since attribute diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index babd0a4..75227aa 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -1971,8 +1971,10 @@ (define_insn "*movxi_internal_avx512f" [(set (match_operand:XI 0 "nonimmediate_operand" "=v,v ,m") - (match_operand:XI 1 "vector_move_operand" "C ,vm,v"))] - "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + (match_operand:XI 1 "nonimmediate_or_sse_const_operand" "BC,vm,v"))] + "TARGET_AVX512F + && (register_operand (operands[0], XImode) + || register_operand (operands[1], XImode))" { switch (which_alternative) { @@ -1996,7 +1998,10 @@ (define_insn "*movoi_internal_avx" [(set (match_operand:OI 0 "nonimmediate_operand" "=v,v ,m") (match_operand:OI 1 "vector_move_operand" "C ,vm,v"))] - "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "TARGET_AVX + && !TARGET_AVX2 + && (register_operand (operands[0], OImode) + || register_operand (operands[1], OImode))" { switch (get_attr_type (insn)) { @@ -2042,10 +2047,62 @@ ] (const_string "OI")))]) -(define_insn "*movti_internal" +(define_insn "*movoi_internal_avx2" + [(set (match_operand:OI 0 "nonimmediate_operand" "=v,v ,m") + (match_operand:OI 1 "nonimmediate_or_sse_const_operand" "BC,vm,v"))] + "TARGET_AVX2 + && (register_operand (operands[0], OImode) + || register_operand (operands[1], OImode))" +{ + switch (get_attr_type (insn)) + { + case TYPE_SSELOG1: + return standard_sse_constant_opcode (insn, operands[1]); + + case TYPE_SSEMOV: + if (misaligned_operand (operands[0], OImode) + || misaligned_operand (operands[1], OImode)) + { + if (get_attr_mode (insn) == MODE_V8SF) + return "vmovups\t{%1, %0|%0, %1}"; + else if (get_attr_mode (insn) == MODE_XI) + return "vmovdqu32\t{%1, %0|%0, %1}"; + else + return "vmovdqu\t{%1, %0|%0, %1}"; + } + else + { + if (get_attr_mode (insn) == MODE_V8SF) + return "vmovaps\t{%1, %0|%0, %1}"; + else if (get_attr_mode (insn) == MODE_XI) + return "vmovdqa32\t{%1, %0|%0, %1}"; + else + return "vmovdqa\t{%1, %0|%0, %1}"; + } + + default: + gcc_unreachable (); + } +} + [(set_attr "type" "sselog1,ssemov,ssemov") + (set_attr "prefix" "vex") + (set (attr "mode") + (cond [(ior (match_operand 0 "ext_sse_reg_operand") + (match_operand 1 "ext_sse_reg_operand")) + (const_string "XI") + (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL") + (const_string "V8SF") + (and (eq_attr "alternative" "2") + (match_test "TARGET_SSE_TYPELESS_STORES")) + (const_string "V8SF") + ] + (const_string "OI")))]) + +(define_insn "*movti_internal_sse" [(set (match_operand:TI 0 "nonimmediate_operand" "=!r ,o ,v,v ,m") (match_operand:TI 1 "general_operand" "riFo,re,C,vm,v"))] "(TARGET_64BIT || TARGET_SSE) + && !TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" { switch (get_attr_type (insn)) @@ -2061,6 +2118,45 @@ to stack may result in unaligned memory access. */ if (misaligned_operand (operands[0], TImode) || misaligned_operand (operands[1], TImode)) + return "%vmovups\t{%1, %0|%0, %1}"; + else + return "%vmovaps\t{%1, %0|%0, %1}"; + + default: + gcc_unreachable (); + } +} + [(set_attr "isa" "x64,x64,*,*,*") + (set_attr "type" "multi,multi,sselog1,ssemov,ssemov") + (set_attr "prefix" "orig") + (set (attr "mode") + (cond [(eq_attr "alternative" "0,1") + (const_string "DI")] + (const_string "TI")))]) + +(define_insn "*movti_internal_sse2" + [(set (match_operand:TI 0 "nonimmediate_operand" "=!r ,o ,v, v ,m") + (match_operand:TI 1 "general_operand" "riFo,re,BC,vm,v"))] + "TARGET_SSE2 + && ((TARGET_64BIT + && !(MEM_P (operands[0]) && MEM_P (operands[1]))) + || (!TARGET_64BIT + && (register_operand (operands[0], TImode) + || register_operand (operands[1], TImode))))" +{ + switch (get_attr_type (insn)) + { + case TYPE_MULTI: + return "#"; + + case TYPE_SSELOG1: + return standard_sse_constant_opcode (insn, operands[1]); + + case TYPE_SSEMOV: + /* TDmode values are passed as TImode on the stack. Moving them + to stack may result in unaligned memory access. */ + if (misaligned_operand (operands[0], TImode) + || misaligned_operand (operands[1], TImode)) { if (get_attr_mode (insn) == MODE_V4SF) return "%vmovups\t{%1, %0|%0, %1}"; -- 2.5.5