Implement FP vector compares for V2SFmode for TARGET_MMX_WITH_SSE. 2021-05-12 Uroš Bizjak <ubiz...@gmail.com>
gcc/ PR target/98218 * config/i386/i386-expand.c (ix86_expand_sse_movcc): Handle V2SF mode. * config/i386/mmx.md (MMXMODE124): New mode iterator. (V2FI): Ditto. (mmxintvecmode): New mode attribute. (mmxintvecmodelower): Ditto. (*mmx_maskcmpv2sf3_comm): New insn pattern. (*mmx_maskcmpv2sf3): Ditto. (vec_cmpv2sfv2si): New expander. (vcond<V2FI:mode>v2si): Ditto. (mmx_vlendvps): New insn pattern. (vcond<MMXMODE124:mode><MMXMODEI:mode>): Also handle V2SFmode. (vcondu<MMXMODE124:mode><MMXMODEI:mode>): Ditto. (vcond_mask_<mode><mmxintvecmodelower>): Ditto. gcc/testsuite/ PR target/98218 * g++.target/i386/pr98218-1.C: Ditto. * gcc.target/i386/pr98218-4.c: New test. * gcc.target/i386/pr98218-1.c: Correct PR number. * gcc.target/i386/pr98218-1a.c: Ditto. * gcc.target/i386/pr98218-2.c: Ditto. * gcc.target/i386/pr98218-2a.c: Ditto. * gcc.target/i386/pr98218-3.c: Ditto. * gcc.target/i386/pr98218-3a.c: Ditto. Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}. Pushed to master. Uros.
diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c index 5cfde5b3d30..dd230081b16 100644 --- a/gcc/config/i386/i386-expand.c +++ b/gcc/config/i386/i386-expand.c @@ -3680,6 +3680,13 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false) switch (mode) { + case E_V2SFmode: + if (TARGET_SSE4_1) + { + gen = gen_mmx_blendvps; + op_true = force_reg (mode, op_true); + } + break; case E_V4SFmode: if (TARGET_SSE4_1) gen = gen_sse4_1_blendvps; diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index f08570856f9..d433c524652 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -49,6 +49,7 @@ (define_mode_iterator MMXMODEI8 [V8QI V4HI V2SI (V1DI "TARGET_SSE2")]) ;; All 8-byte vector modes handled by MMX (define_mode_iterator MMXMODE [V8QI V4HI V2SI V1DI V2SF]) +(define_mode_iterator MMXMODE124 [V8QI V4HI V2SI V2SF]) ;; Mix-n-match (define_mode_iterator MMXMODE12 [V8QI V4HI]) @@ -56,12 +57,22 @@ (define_mode_iterator MMXMODE14 [V8QI V2SI]) (define_mode_iterator MMXMODE24 [V4HI V2SI]) (define_mode_iterator MMXMODE248 [V4HI V2SI V1DI]) +;; All V2S* modes +(define_mode_iterator V2FI [V2SF V2SI]) + ;; Mapping from integer vector mode to mnemonic suffix (define_mode_attr mmxvecsize [(V8QI "b") (V4HI "w") (V2SI "d") (V1DI "q")]) (define_mode_attr mmxdoublemode [(V8QI "V8HI") (V4HI "V4SI")]) +;; Mapping of vector float modes to an integer mode of the same size +(define_mode_attr mmxintvecmode + [(V2SF "V2SI") (V2SI "V2SI") (V4HI "V4HI") (V8QI "V8QI")]) + +(define_mode_attr mmxintvecmodelower + [(V2SF "v2si") (V2SI "v2si") (V4HI "v4hi") (V8QI "v8qi")]) + (define_mode_attr Yv_Yw [(V8QI "Yw") (V4HI "Yw") (V2SI "Yv") (V1DI "Yv") (V2SF "Yv")]) @@ -714,6 +725,85 @@ (define_insn "mmx_gev2sf3" (set_attr "prefix_extra" "1") (set_attr "mode" "V2SF")]) +(define_insn "*mmx_maskcmpv2sf3_comm" + [(set (match_operand:V2SF 0 "register_operand" "=x,x") + (match_operator:V2SF 3 "sse_comparison_operator" + [(match_operand:V2SF 1 "register_operand" "%0,x") + (match_operand:V2SF 2 "register_operand" "x,x")]))] + "TARGET_MMX_WITH_SSE + && GET_RTX_CLASS (GET_CODE (operands[3])) == RTX_COMM_COMPARE" + "@ + cmp%D3ps\t{%2, %0|%0, %2} + vcmp%D3ps\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "ssecmp") + (set_attr "length_immediate" "1") + (set_attr "prefix" "orig,vex") + (set_attr "mode" "V4SF")]) + +(define_insn "*mmx_maskcmpv2sf3" + [(set (match_operand:V2SF 0 "register_operand" "=x,x") + (match_operator:V2SF 3 "sse_comparison_operator" + [(match_operand:V2SF 1 "register_operand" "0,x") + (match_operand:V2SF 2 "register_operand" "x,x")]))] + "TARGET_MMX_WITH_SSE" + "@ + cmp%D3ps\t{%2, %0|%0, %2} + vcmp%D3ps\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "ssecmp") + (set_attr "length_immediate" "1") + (set_attr "prefix" "orig,vex") + (set_attr "mode" "V4SF")]) + +(define_expand "vec_cmpv2sfv2si" + [(set (match_operand:V2SI 0 "register_operand") + (match_operator:V2SI 1 "" + [(match_operand:V2SF 2 "register_operand") + (match_operand:V2SF 3 "register_operand")]))] + "TARGET_MMX_WITH_SSE" +{ + bool ok = ix86_expand_fp_vec_cmp (operands); + gcc_assert (ok); + DONE; +}) + +(define_expand "vcond<mode>v2sf" + [(set (match_operand:V2FI 0 "register_operand") + (if_then_else:V2FI + (match_operator 3 "" + [(match_operand:V2SF 4 "register_operand") + (match_operand:V2SF 5 "register_operand")]) + (match_operand:V2FI 1) + (match_operand:V2FI 2)))] + "TARGET_MMX_WITH_SSE" +{ + bool ok = ix86_expand_fp_vcond (operands); + gcc_assert (ok); + DONE; +}) + +(define_insn "mmx_blendvps" + [(set (match_operand:V2SF 0 "register_operand" "=Yr,*x,x") + (unspec:V2SF + [(match_operand:V2SF 1 "register_operand" "0,0,x") + (match_operand:V2SF 2 "register_operand" "Yr,*x,x") + (match_operand:V2SF 3 "register_operand" "Yz,Yz,x")] + UNSPEC_BLENDV))] + "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE" + "@ + blendvps\t{%3, %2, %0|%0, %2, %3} + blendvps\t{%3, %2, %0|%0, %2, %3} + vblendvps\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "isa" "noavx,noavx,avx") + (set_attr "type" "ssemov") + (set_attr "length_immediate" "1") + (set_attr "prefix_data16" "1,1,*") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "orig,orig,vex") + (set_attr "btver2_decode" "vector") + (set_attr "mode" "V4SF")]) + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; Parallel single-precision floating point logical operations @@ -1657,42 +1747,46 @@ (define_expand "vec_cmpu<mode><mode>" DONE; }) -(define_expand "vcond<mode><mode>" - [(set (match_operand:MMXMODEI 0 "register_operand") - (if_then_else:MMXMODEI +(define_expand "vcond<MMXMODE124:mode><MMXMODEI:mode>" + [(set (match_operand:MMXMODE124 0 "register_operand") + (if_then_else:MMXMODE124 (match_operator 3 "" [(match_operand:MMXMODEI 4 "register_operand") (match_operand:MMXMODEI 5 "register_operand")]) - (match_operand:MMXMODEI 1) - (match_operand:MMXMODEI 2)))] - "TARGET_MMX_WITH_SSE" + (match_operand:MMXMODE124 1) + (match_operand:MMXMODE124 2)))] + "TARGET_MMX_WITH_SSE + && (GET_MODE_NUNITS (<MMXMODE124:MODE>mode) + == GET_MODE_NUNITS (<MMXMODEI:MODE>mode))" { bool ok = ix86_expand_int_vcond (operands); gcc_assert (ok); DONE; }) -(define_expand "vcondu<mode><mode>" - [(set (match_operand:MMXMODEI 0 "register_operand") - (if_then_else:MMXMODEI +(define_expand "vcondu<MMXMODE124:mode><MMXMODEI:mode>" + [(set (match_operand:MMXMODE124 0 "register_operand") + (if_then_else:MMXMODE124 (match_operator 3 "" [(match_operand:MMXMODEI 4 "register_operand") (match_operand:MMXMODEI 5 "register_operand")]) - (match_operand:MMXMODEI 1) - (match_operand:MMXMODEI 2)))] - "TARGET_MMX_WITH_SSE" + (match_operand:MMXMODE124 1) + (match_operand:MMXMODE124 2)))] + "TARGET_MMX_WITH_SSE + && (GET_MODE_NUNITS (<MMXMODE124:MODE>mode) + == GET_MODE_NUNITS (<MMXMODEI:MODE>mode))" { bool ok = ix86_expand_int_vcond (operands); gcc_assert (ok); DONE; }) -(define_expand "vcond_mask_<mode><mode>" - [(set (match_operand:MMXMODEI 0 "register_operand") - (vec_merge:MMXMODEI - (match_operand:MMXMODEI 1 "register_operand") - (match_operand:MMXMODEI 2 "register_operand") - (match_operand:MMXMODEI 3 "register_operand")))] +(define_expand "vcond_mask_<mode><mmxintvecmodelower>" + [(set (match_operand:MMXMODE124 0 "register_operand") + (vec_merge:MMXMODE124 + (match_operand:MMXMODE124 1 "register_operand") + (match_operand:MMXMODE124 2 "register_operand") + (match_operand:<mmxintvecmode> 3 "register_operand")))] "TARGET_MMX_WITH_SSE" { ix86_expand_sse_movcc (operands[0], operands[3], diff --git a/gcc/testsuite/g++.target/i386/pr98218-1.C b/gcc/testsuite/g++.target/i386/pr98218-1.C new file mode 100644 index 00000000000..61ea4bf9008 --- /dev/null +++ b/gcc/testsuite/g++.target/i386/pr98218-1.C @@ -0,0 +1,20 @@ +/* PR target/98218 */ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -msse2" } */ + +typedef unsigned int __attribute__((__vector_size__ (8))) v64u32; +typedef int __attribute__((__vector_size__ (8))) v64s32; +typedef float __attribute__((__vector_size__ (8))) v64f32; + +v64u32 au, bu; +v64s32 as, bs; +v64f32 af, bf; + +v64u32 tu (v64f32 a, v64f32 b) { return (a > b) ? au : bu; } +v64s32 ts (v64f32 a, v64f32 b) { return (a > b) ? as : bs; } +v64f32 fu (v64u32 a, v64u32 b) { return (a > b) ? af : bf; } +v64f32 fs (v64s32 a, v64s32 b) { return (a > b) ? af : bf; } +v64f32 ff (v64f32 a, v64f32 b) { return (a > b) ? af : bf; } + +/* { dg-final { scan-assembler-times "cmpltps" 3 } } */ +/* { dg-final { scan-assembler-times "pcmpgtd" 2 } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr98218-1.c b/gcc/testsuite/gcc.target/i386/pr98218-1.c index 48407dabc2a..9d6602c08a2 100644 --- a/gcc/testsuite/gcc.target/i386/pr98218-1.c +++ b/gcc/testsuite/gcc.target/i386/pr98218-1.c @@ -1,4 +1,4 @@ -/* PR target/98522 */ +/* PR target/98218 */ /* { dg-do compile { target { ! ia32 } } } */ /* { dg-options "-O2 -msse2" } */ diff --git a/gcc/testsuite/gcc.target/i386/pr98218-1a.c b/gcc/testsuite/gcc.target/i386/pr98218-1a.c index 3470c87cdc3..2610438b24a 100644 --- a/gcc/testsuite/gcc.target/i386/pr98218-1a.c +++ b/gcc/testsuite/gcc.target/i386/pr98218-1a.c @@ -1,4 +1,4 @@ -/* PR target/98522 */ +/* PR target/98218 */ /* { dg-do compile { target { ! ia32 } } } */ /* { dg-options "-O2 -ftree-vectorize -msse2" } */ diff --git a/gcc/testsuite/gcc.target/i386/pr98218-2.c b/gcc/testsuite/gcc.target/i386/pr98218-2.c index 0b716126413..948bf4f5978 100644 --- a/gcc/testsuite/gcc.target/i386/pr98218-2.c +++ b/gcc/testsuite/gcc.target/i386/pr98218-2.c @@ -1,4 +1,4 @@ -/* PR target/98522 */ +/* PR target/98218 */ /* { dg-do compile { target { ! ia32 } } } */ /* { dg-options "-O2 -msse2" } */ diff --git a/gcc/testsuite/gcc.target/i386/pr98218-2a.c b/gcc/testsuite/gcc.target/i386/pr98218-2a.c index 6afd0a412d7..73c7226044f 100644 --- a/gcc/testsuite/gcc.target/i386/pr98218-2a.c +++ b/gcc/testsuite/gcc.target/i386/pr98218-2a.c @@ -1,4 +1,4 @@ -/* PR target/98522 */ +/* PR target/98218 */ /* { dg-do compile { target { ! ia32 } } } */ /* { dg-options "-O2 -ftree-vectorize -msse2" } */ diff --git a/gcc/testsuite/gcc.target/i386/pr98218-3.c b/gcc/testsuite/gcc.target/i386/pr98218-3.c index 83a8c298640..1b40d0cee36 100644 --- a/gcc/testsuite/gcc.target/i386/pr98218-3.c +++ b/gcc/testsuite/gcc.target/i386/pr98218-3.c @@ -1,4 +1,4 @@ -/* PR target/98522 */ +/* PR target/98218 */ /* { dg-do compile { target { ! ia32 } } } */ /* { dg-options "-O2 -msse2" } */ diff --git a/gcc/testsuite/gcc.target/i386/pr98218-3a.c b/gcc/testsuite/gcc.target/i386/pr98218-3a.c index 272d54e5b34..cf1d4972807 100644 --- a/gcc/testsuite/gcc.target/i386/pr98218-3a.c +++ b/gcc/testsuite/gcc.target/i386/pr98218-3a.c @@ -1,4 +1,4 @@ -/* PR target/98522 */ +/* PR target/98218 */ /* { dg-do compile { target { ! ia32 } } } */ /* { dg-options "-O2 -ftree-vectorize -msse2" } */ diff --git a/gcc/testsuite/gcc.target/i386/pr98218-4.c b/gcc/testsuite/gcc.target/i386/pr98218-4.c new file mode 100644 index 00000000000..647bdb1171b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr98218-4.c @@ -0,0 +1,16 @@ +/* PR target/98218 */ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -msse2" } */ + +typedef unsigned int __attribute__((__vector_size__ (8))) v64u32; +typedef int __attribute__((__vector_size__ (8))) v64s32; +typedef float __attribute__((__vector_size__ (8))) v64f32; + +v64u32 tu (v64f32 a, v64f32 b) { return a > b; } +v64s32 ts (v64f32 a, v64f32 b) { return a > b; } +v64f32 fu (v64u32 a, v64u32 b) { return a > b; } +v64f32 fs (v64s32 a, v64s32 b) { return a > b; } +v64f32 ff (v64f32 a, v64f32 b) { return a > b; } + +/* { dg-final { scan-assembler-times "cmpltps" 3 } } */ +/* { dg-final { scan-assembler-times "pcmpgtd" 2 } } */