On Thu, Apr 21, 2016 at 9:42 AM, Uros Bizjak <ubiz...@gmail.com> wrote: > On Thu, Apr 21, 2016 at 9:37 AM, Uros Bizjak <ubiz...@gmail.com> wrote: >> On Wed, Apr 20, 2016 at 9:53 PM, H.J. Lu <hongjiu...@intel.com> wrote: >>> Since all 1s in TImode is standard SSE2 constants, all 1s in OImode is >>> standard AVX2 constants and all 1s in XImode is standard AVX512F constants, >>> pass mode to standard_sse_constant_p and standard_sse_constant_opcode >>> to check if all 1s is available for target. >>> >>> Tested on Linux/x86-64. OK for master? >> >> No. >> >> This patch should use "isa" attribute instead of adding even more >> similar patterns. Also, please leave MEM_P checks, the rare C->m move >> can be easily resolved by IRA. > > Actually, register_operand checks are indeed better, please disregard > MEM_P recommendation.
So, something like attached untested RFC proto-patch, that lacks wide-int handling. Uros.
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 0687701..572f5bf 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -10777,7 +10777,23 @@ standard_sse_constant_p (rtx x) if (x == const0_rtx || x == CONST0_RTX (mode)) return 1; - if (vector_all_ones_operand (x, mode)) + + else if (CONST_INT_P (x)) + { + if (INTVAL (X) == HOST_WIDE_INT_M1 + && TARGET_SSE2) + return 2; + } + else if (CONST_WIDE_INT_P (x)) + { + if (.... something involving wi::minus-one .... + && TARGET_AVX2) + return 2; + if (.... + && TARGET_AVX512F) + return 2; + } + else if (vector_all_ones_operand (x, mode)) switch (mode) { case V16QImode: @@ -10811,53 +10827,70 @@ standard_sse_constant_p (rtx x) const char * standard_sse_constant_opcode (rtx_insn *insn, rtx x) { + machine_mode insn_mode = get_attr_mode (insn); + switch (standard_sse_constant_p (x)) { case 1: - switch (get_attr_mode (insn)) + switch (insn_mode) { case MODE_XI: return "vpxord\t%g0, %g0, %g0"; - case MODE_V16SF: - return TARGET_AVX512DQ ? "vxorps\t%g0, %g0, %g0" - : "vpxord\t%g0, %g0, %g0"; - case MODE_V8DF: - return TARGET_AVX512DQ ? "vxorpd\t%g0, %g0, %g0" - : "vpxorq\t%g0, %g0, %g0"; + case MODE_OI: + return (TARGET_AVX512VL + ? "vpxord\t%x0, %x0, %x0" + : "vpxor\t%x0, %x0, %x0"); case MODE_TI: - return TARGET_AVX512VL ? "vpxord\t%t0, %t0, %t0" - : "%vpxor\t%0, %d0"; - case MODE_V2DF: - return "%vxorpd\t%0, %d0"; - case MODE_V4SF: - return "%vxorps\t%0, %d0"; + return (TARGET_AVX512VL + ? "vpxord\t%t0, %t0, %t0" + : "%vpxor\t%0, %d0"); - case MODE_OI: - return TARGET_AVX512VL ? "vpxord\t%x0, %x0, %x0" - : "vpxor\t%x0, %x0, %x0"; + case MODE_V8DF: + return (TARGET_AVX512DQ + ? "vxorpd\t%g0, %g0, %g0" + : "vpxorq\t%g0, %g0, %g0"); case MODE_V4DF: return "vxorpd\t%x0, %x0, %x0"; + case MODE_V2DF: + return "%vxorpd\t%0, %d0"; + + case MODE_V16SF: + return (TARGET_AVX512DQ + ? "vxorps\t%g0, %g0, %g0" + : "vpxord\t%g0, %g0, %g0"); case MODE_V8SF: return "vxorps\t%x0, %x0, %x0"; + case MODE_V4SF: + return "%vxorps\t%0, %d0"; default: break; } case 2: - if (TARGET_AVX512VL - || get_attr_mode (insn) == MODE_XI - || get_attr_mode (insn) == MODE_V8DF - || get_attr_mode (insn) == MODE_V16SF) - return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}"; - if (TARGET_AVX) - return "vpcmpeqd\t%0, %0, %0"; - else - return "pcmpeqd\t%0, %0"; + switch (GET_MODE_SIZE (insn_mode)) + { + case 64: + gcc_assert (TARGET_AVX512F); + return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}"; + case 32: + gcc_assert (TARGET_AVX2); + return (TARGET_AVX512VL + ? "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}"; + : "vpcmpeqd\t%0, %0, %0"); + case 16: + gcc_assert (TARGET_SSE2); + return (TARGET_AVX512VL + ? "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}"; + : "pcmpeqd\t%0, %0"); + default: + break; + } default: break; } + gcc_unreachable (); } diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 38eb98c..3337968 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -1970,9 +1970,11 @@ (set_attr "length_immediate" "1")]) (define_insn "*movxi_internal_avx512f" - [(set (match_operand:XI 0 "nonimmediate_operand" "=v,v ,m") - (match_operand:XI 1 "vector_move_operand" "C ,vm,v"))] - "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + [(set (match_operand:XI 0 "nonimmediate_operand" "=v,v ,m") + (match_operand:XI 1 "nonimmediate_or_sse_const_operand" "BC,vm,v"))] + "TARGET_AVX512F + && (register_operand (operands[0], XImode) + || register_operand (operands[1], XImode))" { switch (which_alternative) { @@ -1994,9 +1996,11 @@ (set_attr "mode" "XI")]) (define_insn "*movoi_internal_avx" - [(set (match_operand:OI 0 "nonimmediate_operand" "=v,v ,m") - (match_operand:OI 1 "vector_move_operand" "C ,vm,v"))] - "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + [(set (match_operand:OI 0 "nonimmediate_operand" "=v,v,v ,m") + (match_operand:OI 1 "nonimmediate_or_sse_const_operand" "BC,C,vm,v"))] + "TARGET_AVX + && (register_operand (operands[0], OImode) + || register_operand (operands[1], OImode))" { switch (get_attr_type (insn)) { @@ -2028,7 +2032,8 @@ gcc_unreachable (); } } - [(set_attr "type" "sselog1,ssemov,ssemov") + [(set_attr "isa" "avx2,*,*,*") + (set_attr "type" "sselog1,sselog1,ssemov,ssemov") (set_attr "prefix" "vex") (set (attr "mode") (cond [(ior (match_operand 0 "ext_sse_reg_operand") @@ -2036,17 +2041,21 @@ (const_string "XI") (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL") (const_string "V8SF") - (and (eq_attr "alternative" "2") + (and (eq_attr "alternative" "3") (match_test "TARGET_SSE_TYPELESS_STORES")) (const_string "V8SF") ] (const_string "OI")))]) (define_insn "*movti_internal" - [(set (match_operand:TI 0 "nonimmediate_operand" "=!r ,o ,v,v ,m") - (match_operand:TI 1 "general_operand" "riFo,re,C,vm,v"))] - "(TARGET_64BIT || TARGET_SSE) - && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + [(set (match_operand:TI 0 "nonimmediate_operand" "=!r ,o ,v ,v,v ,m") + (match_operand:TI 1 "general_operand" "riFo,re,BC,C,vm,v"))] + "(TARGET_64BIT + && !(MEM_P (operands[0]) && MEM_P (operands[1]))) + || (TARGET_SSE + && nonimmediate_or_sse_const_operand (operands[1], TImode) + && (register_operand (operands[0], TImode) + || register_operand (operands[1], TImode)))" { switch (get_attr_type (insn)) { @@ -2083,8 +2092,8 @@ gcc_unreachable (); } } - [(set_attr "isa" "x64,x64,*,*,*") - (set_attr "type" "multi,multi,sselog1,ssemov,ssemov") + [(set_attr "isa" "x64,x64,sse2,*,*,*") + (set_attr "type" "multi,multi,sselog1,sselog1,ssemov,ssemov") (set (attr "prefix") (if_then_else (eq_attr "type" "sselog1,ssemov") (const_string "maybe_vex") @@ -2098,7 +2107,7 @@ (ior (not (match_test "TARGET_SSE2")) (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")) (const_string "V4SF") - (and (eq_attr "alternative" "4") + (and (eq_attr "alternative" "5") (match_test "TARGET_SSE_TYPELESS_STORES")) (const_string "V4SF") (match_test "TARGET_AVX")