On Mon, Feb 4, 2019 at 12:04 PM Richard Biener <richard.guent...@gmail.com> wrote: > > On Mon, Feb 4, 2019 at 10:10 AM Uros Bizjak <ubiz...@gmail.com> wrote: > > > > On Fri, Feb 1, 2019 at 10:18 PM H.J. Lu <hjl.to...@gmail.com> wrote: > > > > > > On x86-64, since __m64 is returned and passed in XMM registers, we can > > > implement MMX intrinsics with SSE instructions. To support it, we disable > > > MMX by default in 64-bit mode so that MMX registers won't be available > > > with x86-64. Most of MMX instructions have equivalent SSE versions and > > > results of some SSE versions need to be reshuffled to the right order > > > for MMX. Thee are couple tricky cases: > > > > I don't think we have to disable MMX registers, but we have to tune > > register allocation preferences to not allocate MMX register unless > > really necessary. In practice, this means to change y constraints to > > *y when TARGET_MMX_WITH_SSE is active (probably using enable > > attribute). This would solve problem with assembler clobbers that Andi > > exposed. > > But is "unless really necessary" good enough to not have it wrongly > under any circumstance? I actually like HJs patch (not looked at the > details though). I'd have gone a more aggressive way of simply defaulting > to -mno-mmx without any emulation or whatnot though.
Please see attached *prototype* patch that enables vectorization for void foo (char *restrict r, char *restrict a, char *restrict b) { int i; for (i = 0; i < 8; i++) r[i] = a[i] + b[i]; } with and without -mmmx. The pattern is defined as: (define_insn "*mmx_<plusminus_insn><mode>3" [(set (match_operand:MMXMODEI8 0 "register_operand" "=y,x,v") (plusminus:MMXMODEI8 (match_operand:MMXMODEI8 1 "nonimmediate_operand" "<comm>0,0,v") (match_operand:MMXMODEI8 2 "nonimmediate_operand" "ym,x,v")))] "(TARGET_MMX || TARGET_MMX_WITH_SSE) && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" "@ p<plusminus_mnemonic><mmxvecsize>\t{%2, %0|%0, %2} p<plusminus_mnemonic><mmxvecsize>\t{%2, %0|%0, %2} vp<plusminus_mnemonic><mmxvecsize>\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "mmxadd") (set_attr "mode" "DI") (set (attr "enabled") (cond [(eq_attr "alternative" "1") (symbol_ref "TARGET_MMX_WITH_SSE") (eq_attr "alternative" "2") (symbol_ref "TARGET_AVX && TARGET_MMX_WITH_SSE") ] (symbol_ref ("!TARGET_MMX_WITH_SSE"))))]) so, there is no way mmx register gets allocated with TARGET_MMX_WITH_SSE. We have had MMX registers enabled in move insns for years, and there were no problems with current register preferences. So, I'm pretty confident that the above is enough to prevent unwanted MMX moves, while still allowing MMX registers. With the above approach, we can enable TARGET_MMX_WITH_SSE unconditionally for 64bit SSE2 targets, since we will still allow MMX regs. Please note that there is no requirement to use MMX instructions for MMX intrinsics, so we can emit _all_ MMX intrinsics using HJ's conversion unconditionally. Uros.
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 4e67abe87646..3bf7d33f840d 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -44052,7 +44052,8 @@ ix86_vector_mode_supported_p (machine_mode mode) { if (TARGET_SSE && VALID_SSE_REG_MODE (mode)) return true; - if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode)) + if ((TARGET_SSE2 && VALID_SSE2_REG_MODE (mode)) + || (TARGET_MMX_WITH_SSE && VALID_MMX_REG_MODE (mode))) return true; if (TARGET_AVX && VALID_AVX256_REG_MODE (mode)) return true; @@ -50050,6 +50051,9 @@ ix86_autovectorize_vector_sizes (vector_sizes *sizes) sizes->safe_push (32); sizes->safe_push (16); } + + if (TARGET_MMX_WITH_SSE) + sizes->safe_push (8); } /* Implemenation of targetm.vectorize.get_mask_mode. */ diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 83b025e0cf5d..3c9e77ba7c2e 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -585,6 +585,8 @@ extern unsigned char ix86_arch_features[X86_ARCH_LAST]; #define TARGET_FISTTP (TARGET_SSE3 && TARGET_80387) +#define TARGET_MMX_WITH_SSE (TARGET_64BIT && TARGET_SSE2) + extern unsigned char x86_prefetch_sse; #define TARGET_PREFETCH_SSE x86_prefetch_sse diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index c1e0f2c411e6..304f711d2b27 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -45,7 +45,7 @@ ;; 8 byte integral modes handled by MMX (and by extension, SSE) (define_mode_iterator MMXMODEI [V8QI V4HI V2SI]) -(define_mode_iterator MMXMODEI8 [V8QI V4HI V2SI V1DI]) +(define_mode_iterator MMXMODEI8 [V8QI V4HI V2SI (V1DI "TARGET_SSE2")]) ;; All 8-byte vector modes handled by MMX (define_mode_iterator MMXMODE [V8QI V4HI V2SI V1DI V2SF]) @@ -70,7 +70,7 @@ (define_expand "mov<mode>" [(set (match_operand:MMXMODE 0 "nonimmediate_operand") (match_operand:MMXMODE 1 "nonimmediate_operand"))] - "TARGET_MMX" + "TARGET_MMX || TARGET_MMX_WITH_SSE" { ix86_expand_vector_move (<MODE>mode, operands); DONE; @@ -81,7 +81,7 @@ "=r ,o ,r,r ,m ,?!y,!y,?!y,m ,r ,?!y,v,v,v,m,r,v,!y,*x") (match_operand:MMXMODE 1 "nonimm_or_0_operand" "rCo,rC,C,rm,rC,C ,!y,m ,?!y,?!y,r ,C,v,m,v,v,r,*x,!y"))] - "TARGET_MMX + "(TARGET_MMX || TARGET_MMX_WITH_SSE) && !(MEM_P (operands[0]) && MEM_P (operands[1]))" { switch (get_attr_type (insn)) @@ -690,19 +690,37 @@ (plusminus:MMXMODEI8 (match_operand:MMXMODEI8 1 "nonimmediate_operand") (match_operand:MMXMODEI8 2 "nonimmediate_operand")))] - "TARGET_MMX || (TARGET_SSE2 && <MODE>mode == V1DImode)" + "TARGET_MMX" + "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);") + +(define_expand "<plusminus_insn><mode>3" + [(set (match_operand:MMXMODEI8 0 "register_operand") + (plusminus:MMXMODEI8 + (match_operand:MMXMODEI8 1 "nonimmediate_operand") + (match_operand:MMXMODEI8 2 "nonimmediate_operand")))] + "TARGET_MMX_WITH_SSE" "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);") (define_insn "*mmx_<plusminus_insn><mode>3" - [(set (match_operand:MMXMODEI8 0 "register_operand" "=y") + [(set (match_operand:MMXMODEI8 0 "register_operand" "=y,x,v") (plusminus:MMXMODEI8 - (match_operand:MMXMODEI8 1 "nonimmediate_operand" "<comm>0") - (match_operand:MMXMODEI8 2 "nonimmediate_operand" "ym")))] - "(TARGET_MMX || (TARGET_SSE2 && <MODE>mode == V1DImode)) + (match_operand:MMXMODEI8 1 "nonimmediate_operand" "<comm>0,0,v") + (match_operand:MMXMODEI8 2 "nonimmediate_operand" "ym,x,v")))] + "(TARGET_MMX || TARGET_MMX_WITH_SSE) && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" - "p<plusminus_mnemonic><mmxvecsize>\t{%2, %0|%0, %2}" + "@ + p<plusminus_mnemonic><mmxvecsize>\t{%2, %0|%0, %2} + p<plusminus_mnemonic><mmxvecsize>\t{%2, %0|%0, %2} + vp<plusminus_mnemonic><mmxvecsize>\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "mmxadd") - (set_attr "mode" "DI")]) + (set_attr "mode" "DI") + (set (attr "enabled") + (cond [(eq_attr "alternative" "1") + (symbol_ref "TARGET_MMX_WITH_SSE") + (eq_attr "alternative" "2") + (symbol_ref "TARGET_AVX && TARGET_MMX_WITH_SSE") + ] + (symbol_ref ("!TARGET_MMX_WITH_SSE"))))]) (define_expand "mmx_<plusminus_insn><mode>3" [(set (match_operand:MMXMODE12 0 "register_operand")