On 2/10/19, H.J. Lu <hjl.to...@gmail.com> wrote: > In 64-bit mode, implement V2SF add/sub/mul with SEE. Only SSE register > source operand is allowed. > > gcc/ > > PR target/89028 > * config/i386/i386.md (comm): Handle mult. > * config/i386/mmx.md (plusminusmult): New. > (plusminusmult_insn): Likewse. > (plusminusmult_mnemonic): Likewse. > (plusminusmult_type): Likewse. > (mmx_addv2sf3): Add "&& !TARGET_MMX_WITH_SSE". > (*mmx_addv2sf3): Likewise. > (mmx_subv2sf3): Likewise. > (mmx_subrv2sf3): Likewise. > (*mmx_subv2sf3): Likewise. > (mmx_mulv2sf3): Likewise. > (*mmx_mulv2sf3): Likewise. > (<plusminusmult_insn>v2sf3): New. > (*sse_<plusminusmult_insn>v2sf3): Likewise.
No. There is no native support for V2SF in SSE, so we'll leave these out. Uros. > > gcc/testsuite/ > > PR target/89028 > * gcc.target/i386/pr89028-2.c: New test. > * gcc.target/i386/pr89028-3.c: Likewise. > * gcc.target/i386/pr89028-4.c: Likewise. > * gcc.target/i386/pr89028-5.c: Likewise. > * gcc.target/i386/pr89028-6.c: Likewise. > * gcc.target/i386/pr89028-7.c: Likewise. > --- > gcc/config/i386/i386.md | 3 +- > gcc/config/i386/mmx.md | 56 ++++++++++++++++++++--- > gcc/testsuite/gcc.target/i386/pr89028-2.c | 11 +++++ > gcc/testsuite/gcc.target/i386/pr89028-3.c | 14 ++++++ > gcc/testsuite/gcc.target/i386/pr89028-4.c | 14 ++++++ > gcc/testsuite/gcc.target/i386/pr89028-5.c | 11 +++++ > gcc/testsuite/gcc.target/i386/pr89028-6.c | 14 ++++++ > gcc/testsuite/gcc.target/i386/pr89028-7.c | 14 ++++++ > 8 files changed, 129 insertions(+), 8 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/i386/pr89028-2.c > create mode 100644 gcc/testsuite/gcc.target/i386/pr89028-3.c > create mode 100644 gcc/testsuite/gcc.target/i386/pr89028-4.c > create mode 100644 gcc/testsuite/gcc.target/i386/pr89028-5.c > create mode 100644 gcc/testsuite/gcc.target/i386/pr89028-6.c > create mode 100644 gcc/testsuite/gcc.target/i386/pr89028-7.c > > diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md > index 72685107fc0..cda973c0fbf 100644 > --- a/gcc/config/i386/i386.md > +++ b/gcc/config/i386/i386.md > @@ -873,7 +873,8 @@ > > ;; Mark commutative operators as such in constraints. > (define_code_attr comm [(plus "%") (ss_plus "%") (us_plus "%") > - (minus "") (ss_minus "") (us_minus "")]) > + (minus "") (ss_minus "") (us_minus "") > + (mult "%")]) > > ;; Mapping of max and min > (define_code_iterator maxmin [smax smin umax umin]) > diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md > index e56d2e71168..88c1ecd9ae6 100644 > --- a/gcc/config/i386/mmx.md > +++ b/gcc/config/i386/mmx.md > @@ -63,6 +63,20 @@ > ;; Instruction suffix for truncations with saturation. > (define_code_attr s_trunsuffix [(ss_truncate "s") (us_truncate "u")]) > > +(define_code_iterator plusminusmult [plus minus mult]) > + > +;; Base name for define_insn > +(define_code_attr plusminusmult_insn > + [(plus "add") (minus "sub") (mult "mul")]) > + > +;; Base name for insn mnemonic. > +(define_code_attr plusminusmult_mnemonic > + [(plus "add") (minus "sub") (mult "mul")]) > + > +;; Insn type name for insn mnemonic. > +(define_code_attr plusminusmult_type > + [(plus "add") (minus "add") (mult "mul")]) > + > ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; > ;; > ;; Move patterns > @@ -279,14 +293,16 @@ > (plus:V2SF > (match_operand:V2SF 1 "nonimmediate_operand") > (match_operand:V2SF 2 "nonimmediate_operand")))] > - "TARGET_3DNOW" > + "TARGET_3DNOW && !TARGET_MMX_WITH_SSE" > "ix86_fixup_binary_operands_no_copy (PLUS, V2SFmode, operands);") > > (define_insn "*mmx_addv2sf3" > [(set (match_operand:V2SF 0 "register_operand" "=y") > (plus:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "%0") > (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] > - "TARGET_3DNOW && ix86_binary_operator_ok (PLUS, V2SFmode, operands)" > + "TARGET_3DNOW > + && !TARGET_MMX_WITH_SSE > + && ix86_binary_operator_ok (PLUS, V2SFmode, operands)" > "pfadd\t{%2, %0|%0, %2}" > [(set_attr "type" "mmxadd") > (set_attr "prefix_extra" "1") > @@ -296,19 +312,21 @@ > [(set (match_operand:V2SF 0 "register_operand") > (minus:V2SF (match_operand:V2SF 1 "register_operand") > (match_operand:V2SF 2 "nonimmediate_operand")))] > - "TARGET_3DNOW") > + "TARGET_3DNOW && !TARGET_MMX_WITH_SSE") > > (define_expand "mmx_subrv2sf3" > [(set (match_operand:V2SF 0 "register_operand") > (minus:V2SF (match_operand:V2SF 2 "register_operand") > (match_operand:V2SF 1 "nonimmediate_operand")))] > - "TARGET_3DNOW") > + "TARGET_3DNOW && !TARGET_MMX_WITH_SSE") > > (define_insn "*mmx_subv2sf3" > [(set (match_operand:V2SF 0 "register_operand" "=y,y") > (minus:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "0,ym") > (match_operand:V2SF 2 "nonimmediate_operand" "ym,0")))] > - "TARGET_3DNOW && !(MEM_P (operands[0]) && MEM_P (operands[1]))" > + "TARGET_3DNOW > + && !TARGET_MMX_WITH_SSE > + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" > "@ > pfsub\t{%2, %0|%0, %2} > pfsubr\t{%1, %0|%0, %1}" > @@ -320,19 +338,43 @@ > [(set (match_operand:V2SF 0 "register_operand") > (mult:V2SF (match_operand:V2SF 1 "nonimmediate_operand") > (match_operand:V2SF 2 "nonimmediate_operand")))] > - "TARGET_3DNOW" > + "TARGET_3DNOW && !TARGET_MMX_WITH_SSE" > "ix86_fixup_binary_operands_no_copy (MULT, V2SFmode, operands);") > > (define_insn "*mmx_mulv2sf3" > [(set (match_operand:V2SF 0 "register_operand" "=y") > (mult:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "%0") > (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] > - "TARGET_3DNOW && ix86_binary_operator_ok (MULT, V2SFmode, operands)" > + "TARGET_3DNOW > + && !TARGET_MMX_WITH_SSE > + && ix86_binary_operator_ok (MULT, V2SFmode, operands)" > "pfmul\t{%2, %0|%0, %2}" > [(set_attr "type" "mmxmul") > (set_attr "prefix_extra" "1") > (set_attr "mode" "V2SF")]) > > +(define_expand "<plusminusmult_insn>v2sf3" > + [(set (match_operand:V2SF 0 "register_operand") > + (plusminusmult:V2SF > + (match_operand:V2SF 1 "nonimmediate_operand") > + (match_operand:V2SF 2 "nonimmediate_operand")))] > + "TARGET_MMX_WITH_SSE" > + "ix86_fixup_binary_operands_no_copy (<CODE>, V2SFmode, operands);") > + > +(define_insn "*sse_<plusminusmult_insn>v2sf3" > + [(set (match_operand:V2SF 0 "register_operand" "=x,Yv") > + (plusminusmult:V2SF > + (match_operand:V2SF 1 "nonimmediate_operand" "<comm>0,Yv") > + (match_operand:V2SF 2 "nonimmediate_operand" "x,Yv")))] > + "TARGET_MMX_WITH_SSE > + && ix86_binary_operator_ok (<CODE>, V4SFmode, operands)" > + "@ > + <plusminusmult_mnemonic>ps\t{%2, %0|%0, %2} > + v<plusminusmult_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}" > + [(set_attr "isa" "noavx,avx") > + (set_attr "type" "sse<plusminusmult_type>") > + (set_attr "mode" "V4SF")]) > + > (define_expand "mmx_<code>v2sf3" > [(set (match_operand:V2SF 0 "register_operand") > (smaxmin:V2SF > diff --git a/gcc/testsuite/gcc.target/i386/pr89028-2.c > b/gcc/testsuite/gcc.target/i386/pr89028-2.c > new file mode 100644 > index 00000000000..d096b0b6863 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr89028-2.c > @@ -0,0 +1,11 @@ > +/* { dg-do compile { target { ! ia32 } } } */ > +/* { dg-options "-O2 -msse2 -mno-mmx" } */ > +/* { dg-final { scan-assembler-times "addps" 1 } } */ > + > +typedef float __v2sf __attribute__ ((__vector_size__ (8))); > + > +__v2sf > +foo1 (__v2sf x, __v2sf y) > +{ > + return x + y; > +} > diff --git a/gcc/testsuite/gcc.target/i386/pr89028-3.c > b/gcc/testsuite/gcc.target/i386/pr89028-3.c > new file mode 100644 > index 00000000000..0fa187aaf72 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr89028-3.c > @@ -0,0 +1,14 @@ > +/* { dg-do compile { target { ! ia32 } } } */ > +/* { dg-options "-O2 -msse2 -mno-mmx" } */ > +/* { dg-final { scan-assembler-times "movlps" 2 } } */ > +/* { dg-final { scan-assembler-times "addps" 1 } } */ > + > +typedef float __v2sf __attribute__ ((__vector_size__ (8))); > + > +extern __v2sf x, y, z; > + > +__v2sf > +foo2 (void) > +{ > + return x + y; > +} > diff --git a/gcc/testsuite/gcc.target/i386/pr89028-4.c > b/gcc/testsuite/gcc.target/i386/pr89028-4.c > new file mode 100644 > index 00000000000..b25f67632cb > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr89028-4.c > @@ -0,0 +1,14 @@ > +/* { dg-do compile { target { ! ia32 } } } */ > +/* { dg-options "-O2 -msse2 -mno-mmx" } */ > +/* { dg-final { scan-assembler-times "movlps" 1 } } */ > +/* { dg-final { scan-assembler-times "addps" 1 } } */ > + > +typedef float __v2sf __attribute__ ((__vector_size__ (8))); > + > +extern __v2sf x, y, z; > + > +void > +foo3 (__v2sf x, __v2sf y) > +{ > + z = x + y; > +} > diff --git a/gcc/testsuite/gcc.target/i386/pr89028-5.c > b/gcc/testsuite/gcc.target/i386/pr89028-5.c > new file mode 100644 > index 00000000000..4ead7187605 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr89028-5.c > @@ -0,0 +1,11 @@ > +/* { dg-do compile { target { ! ia32 } } } */ > +/* { dg-options "-O2 -msse2 -mno-mmx" } */ > +/* { dg-final { scan-assembler-times "mulps" 1 } } */ > + > +typedef float __v2sf __attribute__ ((__vector_size__ (8))); > + > +__v2sf > +foo1 (__v2sf x, __v2sf y) > +{ > + return x * y; > +} > diff --git a/gcc/testsuite/gcc.target/i386/pr89028-6.c > b/gcc/testsuite/gcc.target/i386/pr89028-6.c > new file mode 100644 > index 00000000000..9277c848c6c > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr89028-6.c > @@ -0,0 +1,14 @@ > +/* { dg-do compile { target { ! ia32 } } } */ > +/* { dg-options "-O2 -msse2 -mno-mmx" } */ > +/* { dg-final { scan-assembler-times "movlps" 2 } } */ > +/* { dg-final { scan-assembler-times "mulps" 1 } } */ > + > +typedef float __v2sf __attribute__ ((__vector_size__ (8))); > + > +extern __v2sf x, y, z; > + > +__v2sf > +foo2 (void) > +{ > + return x * y; > +} > diff --git a/gcc/testsuite/gcc.target/i386/pr89028-7.c > b/gcc/testsuite/gcc.target/i386/pr89028-7.c > new file mode 100644 > index 00000000000..c8af7b2a4e9 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr89028-7.c > @@ -0,0 +1,14 @@ > +/* { dg-do compile { target { ! ia32 } } } */ > +/* { dg-options "-O2 -msse2 -mno-mmx" } */ > +/* { dg-final { scan-assembler-times "movlps" 1 } } */ > +/* { dg-final { scan-assembler-times "mulps" 1 } } */ > + > +typedef float __v2sf __attribute__ ((__vector_size__ (8))); > + > +extern __v2sf x, y, z; > + > +void > +foo3 (__v2sf x, __v2sf y) > +{ > + z = x * y; > +} > -- > 2.20.1 > >