On Thu, Jul 1, 2021 at 2:17 PM liuhongt <hongtao....@intel.com> wrote: > > From: "H.J. Lu" <hjl.to...@gmail.com> > > gcc/ChangeLog: > > * config/i386/i386-expand.c > (ix86_avx256_split_vector_move_misalign): Handle V16HF mode. > * config/i386/i386.c > (ix86_preferred_simd_mode): Handle HF mode. > * config/i386/sse.md (V_256H): New mode iterator. > (avx_vextractf128<mode>): Use it. > > gcc/testsuite/ChangeLog: > > * gcc.target/i386/vect-float16-1.c: New test. > * gcc.target/i386/vect-float16-10.c: Ditto. > * gcc.target/i386/vect-float16-11.c: Ditto. > * gcc.target/i386/vect-float16-12.c: Ditto. > * gcc.target/i386/vect-float16-2.c: Ditto. > * gcc.target/i386/vect-float16-3.c: Ditto. > * gcc.target/i386/vect-float16-4.c: Ditto. > * gcc.target/i386/vect-float16-5.c: Ditto. > * gcc.target/i386/vect-float16-6.c: Ditto. > * gcc.target/i386/vect-float16-7.c: Ditto. > * gcc.target/i386/vect-float16-8.c: Ditto. > * gcc.target/i386/vect-float16-9.c: Ditto. I'm going to check in this patch w/ a bit change, the change is removing TARGET_AVX512FP16 for vector HFmodes when vpinsrw/../vpextrw instructions are used for V*HFmodevector_init and vector_extract{,_lo/hi}. Attach an updated patch. Also check in 6 patches which are [PATCH 10/62] to [PATH 15/62].
[PATCH 10/62] AVX512FP16: Add vaddsh/vsubsh/vmulsh/vdivsh. [PATCH 11/62] AVX512FP16: Add testcase for vaddsh/vsubsh/vmulsh/vdivsh. [PATCH 12/62] AVX512FP16: Add vmaxph/vminph/vmaxsh/vminsh. [PATCH 13/62] AVX512FP16: Add testcase for vmaxph/vmaxsh/vminph/vminsh. [PATCH 14/62] AVX512FP16: Add vcmpph/vcmpsh/vcomish/vucomish. [PATCH 15/62] AVX512FP16: Add testcase for vcmpph/vcmpsh/vcomish/vucomish. Bootstrapped and regtested on x86_64-linux-gnu{-m32,}. Also newly added runtime testcases were run on sde/SPR. [10] https://gcc.gnu.org/pipermail/gcc-patches/2021-July/574128.html [11] https://gcc.gnu.org/pipermail/gcc-patches/2021-July/574127.html [12] https://gcc.gnu.org/pipermail/gcc-patches/2021-July/574129.html [13] https://gcc.gnu.org/pipermail/gcc-patches/2021-July/574130.html [14] https://gcc.gnu.org/pipermail/gcc-patches/2021-July/574131.html [15] https://gcc.gnu.org/pipermail/gcc-patches/2021-July/574132.html > --- > gcc/config/i386/i386-expand.c | 4 ++++ > gcc/config/i386/i386.c | 14 ++++++++++++++ > gcc/config/i386/sse.md | 7 ++++++- > gcc/testsuite/gcc.target/i386/vect-float16-1.c | 14 ++++++++++++++ > gcc/testsuite/gcc.target/i386/vect-float16-10.c | 14 ++++++++++++++ > gcc/testsuite/gcc.target/i386/vect-float16-11.c | 14 ++++++++++++++ > gcc/testsuite/gcc.target/i386/vect-float16-12.c | 14 ++++++++++++++ > gcc/testsuite/gcc.target/i386/vect-float16-2.c | 14 ++++++++++++++ > gcc/testsuite/gcc.target/i386/vect-float16-3.c | 14 ++++++++++++++ > gcc/testsuite/gcc.target/i386/vect-float16-4.c | 14 ++++++++++++++ > gcc/testsuite/gcc.target/i386/vect-float16-5.c | 14 ++++++++++++++ > gcc/testsuite/gcc.target/i386/vect-float16-6.c | 14 ++++++++++++++ > gcc/testsuite/gcc.target/i386/vect-float16-7.c | 14 ++++++++++++++ > gcc/testsuite/gcc.target/i386/vect-float16-8.c | 14 ++++++++++++++ > gcc/testsuite/gcc.target/i386/vect-float16-9.c | 14 ++++++++++++++ > 15 files changed, 192 insertions(+), 1 deletion(-) > create mode 100644 gcc/testsuite/gcc.target/i386/vect-float16-1.c > create mode 100644 gcc/testsuite/gcc.target/i386/vect-float16-10.c > create mode 100644 gcc/testsuite/gcc.target/i386/vect-float16-11.c > create mode 100644 gcc/testsuite/gcc.target/i386/vect-float16-12.c > create mode 100644 gcc/testsuite/gcc.target/i386/vect-float16-2.c > create mode 100644 gcc/testsuite/gcc.target/i386/vect-float16-3.c > create mode 100644 gcc/testsuite/gcc.target/i386/vect-float16-4.c > create mode 100644 gcc/testsuite/gcc.target/i386/vect-float16-5.c > create mode 100644 gcc/testsuite/gcc.target/i386/vect-float16-6.c > create mode 100644 gcc/testsuite/gcc.target/i386/vect-float16-7.c > create mode 100644 gcc/testsuite/gcc.target/i386/vect-float16-8.c > create mode 100644 gcc/testsuite/gcc.target/i386/vect-float16-9.c > > diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c > index 39647eb2cf1..df50c72ab16 100644 > --- a/gcc/config/i386/i386-expand.c > +++ b/gcc/config/i386/i386-expand.c > @@ -498,6 +498,10 @@ ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1) > extract = gen_avx_vextractf128v32qi; > mode = V16QImode; > break; > + case E_V16HFmode: > + extract = gen_avx_vextractf128v16hf; > + mode = V8HFmode; > + break; > case E_V8SFmode: > extract = gen_avx_vextractf128v8sf; > mode = V4SFmode; > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c > index 79e6880d9dd..dc0d440061b 100644 > --- a/gcc/config/i386/i386.c > +++ b/gcc/config/i386/i386.c > @@ -22360,6 +22360,20 @@ ix86_preferred_simd_mode (scalar_mode mode) > else > return V2DImode; > > + case E_HFmode: > + if (TARGET_AVX512FP16) > + { > + if (TARGET_AVX512VL) > + { > + if (TARGET_PREFER_AVX128) > + return V8HFmode; > + else if (TARGET_PREFER_AVX256) > + return V16HFmode; > + } > + return V32HFmode; > + } > + return word_mode; > + > case E_SFmode: > if (TARGET_AVX512F && !TARGET_PREFER_AVX256) > return V16SFmode; > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md > index 2c1b6fbcd86..a0cfd611006 100644 > --- a/gcc/config/i386/sse.md > +++ b/gcc/config/i386/sse.md > @@ -276,6 +276,11 @@ (define_mode_iterator V_128 > (define_mode_iterator V_256 > [V32QI V16HI V8SI V4DI V8SF V4DF]) > > +;; All 256bit vector modes including HF vector mode > +(define_mode_iterator V_256H > + [V32QI V16HI V8SI V4DI V8SF V4DF > + (V16HF "TARGET_AVX512F && TARGET_AVX512VL")]) > + > ;; All 128bit and 256bit vector modes > (define_mode_iterator V_128_256 > [V32QI V16QI V16HI V8HI V8SI V4SI V4DI V2DI V8SF V4SF V4DF V2DF]) > @@ -9045,7 +9050,7 @@ (define_expand "avx512vl_vextractf128<mode>" > > (define_expand "avx_vextractf128<mode>" > [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand") > - (match_operand:V_256 1 "register_operand") > + (match_operand:V_256H 1 "register_operand") > (match_operand:SI 2 "const_0_to_1_operand")] > "TARGET_AVX" > { > diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-1.c > b/gcc/testsuite/gcc.target/i386/vect-float16-1.c > new file mode 100644 > index 00000000000..0f82cf94932 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/vect-float16-1.c > @@ -0,0 +1,14 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O3 -mavx512fp16 -mno-avx512vl" } */ > + > +/* Check that we vectorize to a full 128-bit vector for _Float16 types. */ > + > +void > +foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b, > + _Float16 *__restrict__ c) > +{ > + for (int i = 0; i < 256; i++) > + a[i] = b[i] + c[i]; > +} > + > +/* { dg-final { scan-assembler-times "vaddph" 8 } } */ > diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-10.c > b/gcc/testsuite/gcc.target/i386/vect-float16-10.c > new file mode 100644 > index 00000000000..217645692ad > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/vect-float16-10.c > @@ -0,0 +1,14 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O3 -mavx512fp16 -mno-avx512vl" } */ > + > +/* Check that we vectorize to a full 128-bit vector for _Float16 types. */ > + > +void > +foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b, > + _Float16 *__restrict__ c) > +{ > + for (int i = 0; i < 256; i++) > + a[i] = b[i] / c[i]; > +} > + > +/* { dg-final { scan-assembler-times "vdivph" 8 } } */ > diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-11.c > b/gcc/testsuite/gcc.target/i386/vect-float16-11.c > new file mode 100644 > index 00000000000..e0409ce9d3f > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/vect-float16-11.c > @@ -0,0 +1,14 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O3 -mavx512fp16 -mavx512vl -mprefer-vector-width=128" } */ > + > +/* Check that we vectorize to a full 128-bit vector for _Float16 types. */ > + > +void > +foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b, > + _Float16 *__restrict__ c) > +{ > + for (int i = 0; i < 128; i++) > + a[i] = b[i] / c[i]; > +} > + > +/* { dg-final { scan-assembler-times "vdivph" 16 } } */ > diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-12.c > b/gcc/testsuite/gcc.target/i386/vect-float16-12.c > new file mode 100644 > index 00000000000..d92a25dc255 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/vect-float16-12.c > @@ -0,0 +1,14 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O3 -mavx512fp16 -mavx512vl -mprefer-vector-width=256" } */ > + > +/* Check that we vectorize to a full 128-bit vector for _Float16 types. */ > + > +void > +foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b, > + _Float16 *__restrict__ c) > +{ > + for (int i = 0; i < 256; i++) > + a[i] = b[i] / c[i]; > +} > + > +/* { dg-final { scan-assembler-times "vdivph" 16 } } */ > diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-2.c > b/gcc/testsuite/gcc.target/i386/vect-float16-2.c > new file mode 100644 > index 00000000000..974fca4ce09 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/vect-float16-2.c > @@ -0,0 +1,14 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O3 -mavx512fp16 -mavx512vl -mprefer-vector-width=128" } */ > + > +/* Check that we vectorize to a full 128-bit vector for _Float16 types. */ > + > +void > +foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b, > + _Float16 *__restrict__ c) > +{ > + for (int i = 0; i < 128; i++) > + a[i] = b[i] + c[i]; > +} > + > +/* { dg-final { scan-assembler-times "vaddph" 16 } } */ > diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-3.c > b/gcc/testsuite/gcc.target/i386/vect-float16-3.c > new file mode 100644 > index 00000000000..9bca9142df7 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/vect-float16-3.c > @@ -0,0 +1,14 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O3 -mavx512fp16 -mavx512vl -mprefer-vector-width=256" } */ > + > +/* Check that we vectorize to a full 128-bit vector for _Float16 types. */ > + > +void > +foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b, > + _Float16 *__restrict__ c) > +{ > + for (int i = 0; i < 256; i++) > + a[i] = b[i] + c[i]; > +} > + > +/* { dg-final { scan-assembler-times "vaddph" 16 } } */ > diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-4.c > b/gcc/testsuite/gcc.target/i386/vect-float16-4.c > new file mode 100644 > index 00000000000..e6f26f0aa40 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/vect-float16-4.c > @@ -0,0 +1,14 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O3 -mavx512fp16 -mno-avx512vl" } */ > + > +/* Check that we vectorize to a full 128-bit vector for _Float16 types. */ > + > +void > +foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b, > + _Float16 *__restrict__ c) > +{ > + for (int i = 0; i < 256; i++) > + a[i] = b[i] - c[i]; > +} > + > +/* { dg-final { scan-assembler-times "vsubph" 8 } } */ > diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-5.c > b/gcc/testsuite/gcc.target/i386/vect-float16-5.c > new file mode 100644 > index 00000000000..38f287b1dc0 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/vect-float16-5.c > @@ -0,0 +1,14 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O3 -mavx512fp16 -mavx512vl -mprefer-vector-width=128" } */ > + > +/* Check that we vectorize to a full 128-bit vector for _Float16 types. */ > + > +void > +foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b, > + _Float16 *__restrict__ c) > +{ > + for (int i = 0; i < 128; i++) > + a[i] = b[i] - c[i]; > +} > + > +/* { dg-final { scan-assembler-times "vsubph" 16 } } */ > diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-6.c > b/gcc/testsuite/gcc.target/i386/vect-float16-6.c > new file mode 100644 > index 00000000000..bc9f7870061 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/vect-float16-6.c > @@ -0,0 +1,14 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O3 -mavx512fp16 -mavx512vl -mprefer-vector-width=256" } */ > + > +/* Check that we vectorize to a full 128-bit vector for _Float16 types. */ > + > +void > +foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b, > + _Float16 *__restrict__ c) > +{ > + for (int i = 0; i < 256; i++) > + a[i] = b[i] - c[i]; > +} > + > +/* { dg-final { scan-assembler-times "vsubph" 16 } } */ > diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-7.c > b/gcc/testsuite/gcc.target/i386/vect-float16-7.c > new file mode 100644 > index 00000000000..b4849cf77c7 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/vect-float16-7.c > @@ -0,0 +1,14 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O3 -mavx512fp16 -mno-avx512vl" } */ > + > +/* Check that we vectorize to a full 128-bit vector for _Float16 types. */ > + > +void > +foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b, > + _Float16 *__restrict__ c) > +{ > + for (int i = 0; i < 256; i++) > + a[i] = b[i] * c[i]; > +} > + > +/* { dg-final { scan-assembler-times "vmulph" 8 } } */ > diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-8.c > b/gcc/testsuite/gcc.target/i386/vect-float16-8.c > new file mode 100644 > index 00000000000..71631b17cc3 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/vect-float16-8.c > @@ -0,0 +1,14 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O3 -mavx512fp16 -mavx512vl -mprefer-vector-width=128" } */ > + > +/* Check that we vectorize to a full 128-bit vector for _Float16 types. */ > + > +void > +foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b, > + _Float16 *__restrict__ c) > +{ > + for (int i = 0; i < 128; i++) > + a[i] = b[i] * c[i]; > +} > + > +/* { dg-final { scan-assembler-times "vmulph" 16 } } */ > diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-9.c > b/gcc/testsuite/gcc.target/i386/vect-float16-9.c > new file mode 100644 > index 00000000000..1be5c7f022f > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/vect-float16-9.c > @@ -0,0 +1,14 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O3 -mavx512fp16 -mavx512vl -mprefer-vector-width=256" } */ > + > +/* Check that we vectorize to a full 128-bit vector for _Float16 types. */ > + > +void > +foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b, > + _Float16 *__restrict__ c) > +{ > + for (int i = 0; i < 256; i++) > + a[i] = b[i] * c[i]; > +} > + > +/* { dg-final { scan-assembler-times "vmulph" 16 } } */ > -- > 2.18.1 > -- BR, Hongtao
From 02399fddf24a2d7db60feaa8027b9cf95687024b Mon Sep 17 00:00:00 2001 From: "H.J. Lu" <hjl.to...@gmail.com> Date: Sun, 27 Jan 2019 19:38:02 -0800 Subject: [PATCH 1/7] AVX512FP16: Enable _Float16 autovectorization gcc/ChangeLog: * config/i386/i386-expand.c (ix86_avx256_split_vector_move_misalign): Handle V16HF mode. * config/i386/i386.c (ix86_preferred_simd_mode): Handle HF mode. * config/i386/sse.md (V_256H): New mode iterator. (avx_vextractf128<mode>): Use it. (VEC_INIT_MODE): Align vector HFmode condition to vector HImodes since there're no real HF instruction used. (VEC_INIT_HALF_MODE): Ditto. (VIHF): Ditto. (VIHF_AVX512BW): Ditto. (*vec_extracthf): Ditto. (VEC_EXTRACT_MODE): Ditto. gcc/testsuite/ChangeLog: * gcc.target/i386/vect-float16-1.c: New test. * gcc.target/i386/vect-float16-10.c: Ditto. * gcc.target/i386/vect-float16-11.c: Ditto. * gcc.target/i386/vect-float16-12.c: Ditto. * gcc.target/i386/vect-float16-2.c: Ditto. * gcc.target/i386/vect-float16-3.c: Ditto. * gcc.target/i386/vect-float16-4.c: Ditto. * gcc.target/i386/vect-float16-5.c: Ditto. * gcc.target/i386/vect-float16-6.c: Ditto. * gcc.target/i386/vect-float16-7.c: Ditto. * gcc.target/i386/vect-float16-8.c: Ditto. * gcc.target/i386/vect-float16-9.c: Ditto. --- gcc/config/i386/i386-expand.c | 4 ++++ gcc/config/i386/i386.c | 14 +++++++++++ gcc/config/i386/sse.md | 24 +++++++++---------- .../gcc.target/i386/vect-float16-1.c | 14 +++++++++++ .../gcc.target/i386/vect-float16-10.c | 14 +++++++++++ .../gcc.target/i386/vect-float16-11.c | 14 +++++++++++ .../gcc.target/i386/vect-float16-12.c | 14 +++++++++++ .../gcc.target/i386/vect-float16-2.c | 14 +++++++++++ .../gcc.target/i386/vect-float16-3.c | 14 +++++++++++ .../gcc.target/i386/vect-float16-4.c | 14 +++++++++++ .../gcc.target/i386/vect-float16-5.c | 14 +++++++++++ .../gcc.target/i386/vect-float16-6.c | 14 +++++++++++ .../gcc.target/i386/vect-float16-7.c | 14 +++++++++++ .../gcc.target/i386/vect-float16-8.c | 14 +++++++++++ .../gcc.target/i386/vect-float16-9.c | 14 +++++++++++ 15 files changed, 198 insertions(+), 12 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/vect-float16-1.c create mode 100644 gcc/testsuite/gcc.target/i386/vect-float16-10.c create mode 100644 gcc/testsuite/gcc.target/i386/vect-float16-11.c create mode 100644 gcc/testsuite/gcc.target/i386/vect-float16-12.c create mode 100644 gcc/testsuite/gcc.target/i386/vect-float16-2.c create mode 100644 gcc/testsuite/gcc.target/i386/vect-float16-3.c create mode 100644 gcc/testsuite/gcc.target/i386/vect-float16-4.c create mode 100644 gcc/testsuite/gcc.target/i386/vect-float16-5.c create mode 100644 gcc/testsuite/gcc.target/i386/vect-float16-6.c create mode 100644 gcc/testsuite/gcc.target/i386/vect-float16-7.c create mode 100644 gcc/testsuite/gcc.target/i386/vect-float16-8.c create mode 100644 gcc/testsuite/gcc.target/i386/vect-float16-9.c diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c index 0c1aec585fe..cac8354a067 100644 --- a/gcc/config/i386/i386-expand.c +++ b/gcc/config/i386/i386-expand.c @@ -678,6 +678,10 @@ ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1) extract = gen_avx_vextractf128v32qi; mode = V16QImode; break; + case E_V16HFmode: + extract = gen_avx_vextractf128v16hf; + mode = V8HFmode; + break; case E_V8SFmode: extract = gen_avx_vextractf128v8sf; mode = V4SFmode; diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index dc649f96d0d..7b173bc0beb 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -22641,6 +22641,20 @@ ix86_preferred_simd_mode (scalar_mode mode) else return V2DImode; + case E_HFmode: + if (TARGET_AVX512FP16) + { + if (TARGET_AVX512VL) + { + if (TARGET_PREFER_AVX128) + return V8HFmode; + else if (TARGET_PREFER_AVX256) + return V16HFmode; + } + return V32HFmode; + } + return word_mode; + case E_SFmode: if (TARGET_AVX512F && !TARGET_PREFER_AVX256) return V16SFmode; diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 06339163bc5..26024609e2b 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -279,6 +279,10 @@ (define_mode_iterator V_128 (define_mode_iterator V_256 [V32QI V16HI V8SI V4DI V8SF V4DF]) +;; All 256bit vector modes including HF vector mode +(define_mode_iterator V_256H + [V32QI V16HI V8SI V4DI V8SF V4DF V16HF]) + ;; All 128bit and 256bit vector modes (define_mode_iterator V_128_256 [V32QI V16QI V16HI V8HI V8SI V4SI V4DI V2DI V8SF V4SF V4DF V2DF]) @@ -406,8 +410,7 @@ (define_mode_iterator VIHF (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI (V8SI "TARGET_AVX") V4SI (V4DI "TARGET_AVX") V2DI - (V32HF "TARGET_AVX512FP16") (V16HF "TARGET_AVX512FP16") - (V8HF "TARGET_AVX512FP16")]) + (V32HF "TARGET_AVX512BW") (V16HF "TARGET_AVX") V8HF]) (define_mode_iterator VI_AVX2 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI @@ -752,7 +755,7 @@ (define_mode_iterator VI_AVX512BW [V16SI V8DI (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW")]) (define_mode_iterator VIHF_AVX512BW [V16SI V8DI (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW") - (V32HF "TARGET_AVX512FP16")]) + (V32HF "TARGET_AVX512BW")]) ;; Int-float size matches (define_mode_iterator VI4F_128 [V4SI V4SF]) @@ -9381,7 +9384,7 @@ (define_expand "avx512vl_vextractf128<mode>" (define_expand "avx_vextractf128<mode>" [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand") - (match_operand:V_256 1 "register_operand") + (match_operand:V_256H 1 "register_operand") (match_operand:SI 2 "const_0_to_1_operand")] "TARGET_AVX" { @@ -9868,7 +9871,7 @@ (define_insn "*vec_extracthf" (match_operand:V8HF 1 "register_operand" "v,v") (parallel [(match_operand:SI 2 "const_0_to_7_operand")])))] - "TARGET_AVX512FP16" + "TARGET_SSE2" "@ vpextrw\t{%2, %1, %k0|%k0, %1, %2} vpextrw\t{%2, %1, %0|%0, %1, %2}" @@ -9882,8 +9885,7 @@ (define_mode_iterator VEC_EXTRACT_MODE (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI - (V32HF "TARGET_AVX512FP16") (V16HF "TARGET_AVX512FP16") - (V8HF "TARGET_AVX512FP16") + (V32HF "TARGET_AVX512BW") (V16HF "TARGET_AVX") V8HF (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX")]) @@ -15615,7 +15617,7 @@ (define_expand "vec_interleave_low<mode>" ;; Modes handled by pinsr patterns. (define_mode_iterator PINSR_MODE - [(V16QI "TARGET_SSE4_1") V8HI (V8HF "TARGET_AVX512FP16") + [(V16QI "TARGET_SSE4_1") V8HI V8HF (V4SI "TARGET_SSE4_1") (V2DI "TARGET_SSE4_1 && TARGET_64BIT")]) @@ -23723,8 +23725,7 @@ (define_mode_iterator VEC_INIT_MODE (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI - (V32HF "TARGET_AVX512FP16") (V16HF "TARGET_AVX512FP16") - (V8HF "TARGET_AVX512FP16") + (V32HF "TARGET_AVX512F") (V16HF "TARGET_AVX") V8HF (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2") (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX")]) @@ -23736,8 +23737,7 @@ (define_mode_iterator VEC_INIT_HALF_MODE (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") - (V32HF "TARGET_AVX512FP16") (V16HF "TARGET_AVX512FP16") - (V8HF "TARGET_AVX512FP16") + (V32HF "TARGET_AVX512F") (V16HF "TARGET_AVX") V8HF (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V4TI "TARGET_AVX512F")]) diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-1.c b/gcc/testsuite/gcc.target/i386/vect-float16-1.c new file mode 100644 index 00000000000..0f82cf94932 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/vect-float16-1.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -mavx512fp16 -mno-avx512vl" } */ + +/* Check that we vectorize to a full 128-bit vector for _Float16 types. */ + +void +foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b, + _Float16 *__restrict__ c) +{ + for (int i = 0; i < 256; i++) + a[i] = b[i] + c[i]; +} + +/* { dg-final { scan-assembler-times "vaddph" 8 } } */ diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-10.c b/gcc/testsuite/gcc.target/i386/vect-float16-10.c new file mode 100644 index 00000000000..217645692ad --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/vect-float16-10.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -mavx512fp16 -mno-avx512vl" } */ + +/* Check that we vectorize to a full 128-bit vector for _Float16 types. */ + +void +foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b, + _Float16 *__restrict__ c) +{ + for (int i = 0; i < 256; i++) + a[i] = b[i] / c[i]; +} + +/* { dg-final { scan-assembler-times "vdivph" 8 } } */ diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-11.c b/gcc/testsuite/gcc.target/i386/vect-float16-11.c new file mode 100644 index 00000000000..e0409ce9d3f --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/vect-float16-11.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -mavx512fp16 -mavx512vl -mprefer-vector-width=128" } */ + +/* Check that we vectorize to a full 128-bit vector for _Float16 types. */ + +void +foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b, + _Float16 *__restrict__ c) +{ + for (int i = 0; i < 128; i++) + a[i] = b[i] / c[i]; +} + +/* { dg-final { scan-assembler-times "vdivph" 16 } } */ diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-12.c b/gcc/testsuite/gcc.target/i386/vect-float16-12.c new file mode 100644 index 00000000000..d92a25dc255 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/vect-float16-12.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -mavx512fp16 -mavx512vl -mprefer-vector-width=256" } */ + +/* Check that we vectorize to a full 128-bit vector for _Float16 types. */ + +void +foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b, + _Float16 *__restrict__ c) +{ + for (int i = 0; i < 256; i++) + a[i] = b[i] / c[i]; +} + +/* { dg-final { scan-assembler-times "vdivph" 16 } } */ diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-2.c b/gcc/testsuite/gcc.target/i386/vect-float16-2.c new file mode 100644 index 00000000000..974fca4ce09 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/vect-float16-2.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -mavx512fp16 -mavx512vl -mprefer-vector-width=128" } */ + +/* Check that we vectorize to a full 128-bit vector for _Float16 types. */ + +void +foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b, + _Float16 *__restrict__ c) +{ + for (int i = 0; i < 128; i++) + a[i] = b[i] + c[i]; +} + +/* { dg-final { scan-assembler-times "vaddph" 16 } } */ diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-3.c b/gcc/testsuite/gcc.target/i386/vect-float16-3.c new file mode 100644 index 00000000000..9bca9142df7 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/vect-float16-3.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -mavx512fp16 -mavx512vl -mprefer-vector-width=256" } */ + +/* Check that we vectorize to a full 128-bit vector for _Float16 types. */ + +void +foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b, + _Float16 *__restrict__ c) +{ + for (int i = 0; i < 256; i++) + a[i] = b[i] + c[i]; +} + +/* { dg-final { scan-assembler-times "vaddph" 16 } } */ diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-4.c b/gcc/testsuite/gcc.target/i386/vect-float16-4.c new file mode 100644 index 00000000000..e6f26f0aa40 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/vect-float16-4.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -mavx512fp16 -mno-avx512vl" } */ + +/* Check that we vectorize to a full 128-bit vector for _Float16 types. */ + +void +foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b, + _Float16 *__restrict__ c) +{ + for (int i = 0; i < 256; i++) + a[i] = b[i] - c[i]; +} + +/* { dg-final { scan-assembler-times "vsubph" 8 } } */ diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-5.c b/gcc/testsuite/gcc.target/i386/vect-float16-5.c new file mode 100644 index 00000000000..38f287b1dc0 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/vect-float16-5.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -mavx512fp16 -mavx512vl -mprefer-vector-width=128" } */ + +/* Check that we vectorize to a full 128-bit vector for _Float16 types. */ + +void +foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b, + _Float16 *__restrict__ c) +{ + for (int i = 0; i < 128; i++) + a[i] = b[i] - c[i]; +} + +/* { dg-final { scan-assembler-times "vsubph" 16 } } */ diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-6.c b/gcc/testsuite/gcc.target/i386/vect-float16-6.c new file mode 100644 index 00000000000..bc9f7870061 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/vect-float16-6.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -mavx512fp16 -mavx512vl -mprefer-vector-width=256" } */ + +/* Check that we vectorize to a full 128-bit vector for _Float16 types. */ + +void +foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b, + _Float16 *__restrict__ c) +{ + for (int i = 0; i < 256; i++) + a[i] = b[i] - c[i]; +} + +/* { dg-final { scan-assembler-times "vsubph" 16 } } */ diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-7.c b/gcc/testsuite/gcc.target/i386/vect-float16-7.c new file mode 100644 index 00000000000..b4849cf77c7 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/vect-float16-7.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -mavx512fp16 -mno-avx512vl" } */ + +/* Check that we vectorize to a full 128-bit vector for _Float16 types. */ + +void +foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b, + _Float16 *__restrict__ c) +{ + for (int i = 0; i < 256; i++) + a[i] = b[i] * c[i]; +} + +/* { dg-final { scan-assembler-times "vmulph" 8 } } */ diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-8.c b/gcc/testsuite/gcc.target/i386/vect-float16-8.c new file mode 100644 index 00000000000..71631b17cc3 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/vect-float16-8.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -mavx512fp16 -mavx512vl -mprefer-vector-width=128" } */ + +/* Check that we vectorize to a full 128-bit vector for _Float16 types. */ + +void +foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b, + _Float16 *__restrict__ c) +{ + for (int i = 0; i < 128; i++) + a[i] = b[i] * c[i]; +} + +/* { dg-final { scan-assembler-times "vmulph" 16 } } */ diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-9.c b/gcc/testsuite/gcc.target/i386/vect-float16-9.c new file mode 100644 index 00000000000..1be5c7f022f --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/vect-float16-9.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -mavx512fp16 -mavx512vl -mprefer-vector-width=256" } */ + +/* Check that we vectorize to a full 128-bit vector for _Float16 types. */ + +void +foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b, + _Float16 *__restrict__ c) +{ + for (int i = 0; i < 256; i++) + a[i] = b[i] * c[i]; +} + +/* { dg-final { scan-assembler-times "vmulph" 16 } } */ -- 2.27.0
From 02399fddf24a2d7db60feaa8027b9cf95687024b Mon Sep 17 00:00:00 2001 From: "H.J. Lu" <hjl.to...@gmail.com> Date: Sun, 27 Jan 2019 19:38:02 -0800 Subject: [PATCH 1/7] AVX512FP16: Enable _Float16 autovectorization gcc/ChangeLog: * config/i386/i386-expand.c (ix86_avx256_split_vector_move_misalign): Handle V16HF mode. * config/i386/i386.c (ix86_preferred_simd_mode): Handle HF mode. * config/i386/sse.md (V_256H): New mode iterator. (avx_vextractf128<mode>): Use it. (VEC_INIT_MODE): Align vector HFmode condition to vector HImodes since there're no real HF instruction used. (VEC_INIT_HALF_MODE): Ditto. (VIHF): Ditto. (VIHF_AVX512BW): Ditto. (*vec_extracthf): Ditto. (VEC_EXTRACT_MODE): Ditto. gcc/testsuite/ChangeLog: * gcc.target/i386/vect-float16-1.c: New test. * gcc.target/i386/vect-float16-10.c: Ditto. * gcc.target/i386/vect-float16-11.c: Ditto. * gcc.target/i386/vect-float16-12.c: Ditto. * gcc.target/i386/vect-float16-2.c: Ditto. * gcc.target/i386/vect-float16-3.c: Ditto. * gcc.target/i386/vect-float16-4.c: Ditto. * gcc.target/i386/vect-float16-5.c: Ditto. * gcc.target/i386/vect-float16-6.c: Ditto. * gcc.target/i386/vect-float16-7.c: Ditto. * gcc.target/i386/vect-float16-8.c: Ditto. * gcc.target/i386/vect-float16-9.c: Ditto. --- gcc/config/i386/i386-expand.c | 4 ++++ gcc/config/i386/i386.c | 14 +++++++++++ gcc/config/i386/sse.md | 24 +++++++++---------- .../gcc.target/i386/vect-float16-1.c | 14 +++++++++++ .../gcc.target/i386/vect-float16-10.c | 14 +++++++++++ .../gcc.target/i386/vect-float16-11.c | 14 +++++++++++ .../gcc.target/i386/vect-float16-12.c | 14 +++++++++++ .../gcc.target/i386/vect-float16-2.c | 14 +++++++++++ .../gcc.target/i386/vect-float16-3.c | 14 +++++++++++ .../gcc.target/i386/vect-float16-4.c | 14 +++++++++++ .../gcc.target/i386/vect-float16-5.c | 14 +++++++++++ .../gcc.target/i386/vect-float16-6.c | 14 +++++++++++ .../gcc.target/i386/vect-float16-7.c | 14 +++++++++++ .../gcc.target/i386/vect-float16-8.c | 14 +++++++++++ .../gcc.target/i386/vect-float16-9.c | 14 +++++++++++ 15 files changed, 198 insertions(+), 12 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/vect-float16-1.c create mode 100644 gcc/testsuite/gcc.target/i386/vect-float16-10.c create mode 100644 gcc/testsuite/gcc.target/i386/vect-float16-11.c create mode 100644 gcc/testsuite/gcc.target/i386/vect-float16-12.c create mode 100644 gcc/testsuite/gcc.target/i386/vect-float16-2.c create mode 100644 gcc/testsuite/gcc.target/i386/vect-float16-3.c create mode 100644 gcc/testsuite/gcc.target/i386/vect-float16-4.c create mode 100644 gcc/testsuite/gcc.target/i386/vect-float16-5.c create mode 100644 gcc/testsuite/gcc.target/i386/vect-float16-6.c create mode 100644 gcc/testsuite/gcc.target/i386/vect-float16-7.c create mode 100644 gcc/testsuite/gcc.target/i386/vect-float16-8.c create mode 100644 gcc/testsuite/gcc.target/i386/vect-float16-9.c diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c index 0c1aec585fe..cac8354a067 100644 --- a/gcc/config/i386/i386-expand.c +++ b/gcc/config/i386/i386-expand.c @@ -678,6 +678,10 @@ ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1) extract = gen_avx_vextractf128v32qi; mode = V16QImode; break; + case E_V16HFmode: + extract = gen_avx_vextractf128v16hf; + mode = V8HFmode; + break; case E_V8SFmode: extract = gen_avx_vextractf128v8sf; mode = V4SFmode; diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index dc649f96d0d..7b173bc0beb 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -22641,6 +22641,20 @@ ix86_preferred_simd_mode (scalar_mode mode) else return V2DImode; + case E_HFmode: + if (TARGET_AVX512FP16) + { + if (TARGET_AVX512VL) + { + if (TARGET_PREFER_AVX128) + return V8HFmode; + else if (TARGET_PREFER_AVX256) + return V16HFmode; + } + return V32HFmode; + } + return word_mode; + case E_SFmode: if (TARGET_AVX512F && !TARGET_PREFER_AVX256) return V16SFmode; diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 06339163bc5..26024609e2b 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -279,6 +279,10 @@ (define_mode_iterator V_128 (define_mode_iterator V_256 [V32QI V16HI V8SI V4DI V8SF V4DF]) +;; All 256bit vector modes including HF vector mode +(define_mode_iterator V_256H + [V32QI V16HI V8SI V4DI V8SF V4DF V16HF]) + ;; All 128bit and 256bit vector modes (define_mode_iterator V_128_256 [V32QI V16QI V16HI V8HI V8SI V4SI V4DI V2DI V8SF V4SF V4DF V2DF]) @@ -406,8 +410,7 @@ (define_mode_iterator VIHF (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI (V8SI "TARGET_AVX") V4SI (V4DI "TARGET_AVX") V2DI - (V32HF "TARGET_AVX512FP16") (V16HF "TARGET_AVX512FP16") - (V8HF "TARGET_AVX512FP16")]) + (V32HF "TARGET_AVX512BW") (V16HF "TARGET_AVX") V8HF]) (define_mode_iterator VI_AVX2 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI @@ -752,7 +755,7 @@ (define_mode_iterator VI_AVX512BW [V16SI V8DI (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW")]) (define_mode_iterator VIHF_AVX512BW [V16SI V8DI (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW") - (V32HF "TARGET_AVX512FP16")]) + (V32HF "TARGET_AVX512BW")]) ;; Int-float size matches (define_mode_iterator VI4F_128 [V4SI V4SF]) @@ -9381,7 +9384,7 @@ (define_expand "avx512vl_vextractf128<mode>" (define_expand "avx_vextractf128<mode>" [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand") - (match_operand:V_256 1 "register_operand") + (match_operand:V_256H 1 "register_operand") (match_operand:SI 2 "const_0_to_1_operand")] "TARGET_AVX" { @@ -9868,7 +9871,7 @@ (define_insn "*vec_extracthf" (match_operand:V8HF 1 "register_operand" "v,v") (parallel [(match_operand:SI 2 "const_0_to_7_operand")])))] - "TARGET_AVX512FP16" + "TARGET_SSE2" "@ vpextrw\t{%2, %1, %k0|%k0, %1, %2} vpextrw\t{%2, %1, %0|%0, %1, %2}" @@ -9882,8 +9885,7 @@ (define_mode_iterator VEC_EXTRACT_MODE (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI - (V32HF "TARGET_AVX512FP16") (V16HF "TARGET_AVX512FP16") - (V8HF "TARGET_AVX512FP16") + (V32HF "TARGET_AVX512BW") (V16HF "TARGET_AVX") V8HF (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX")]) @@ -15615,7 +15617,7 @@ (define_expand "vec_interleave_low<mode>" ;; Modes handled by pinsr patterns. (define_mode_iterator PINSR_MODE - [(V16QI "TARGET_SSE4_1") V8HI (V8HF "TARGET_AVX512FP16") + [(V16QI "TARGET_SSE4_1") V8HI V8HF (V4SI "TARGET_SSE4_1") (V2DI "TARGET_SSE4_1 && TARGET_64BIT")]) @@ -23723,8 +23725,7 @@ (define_mode_iterator VEC_INIT_MODE (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI - (V32HF "TARGET_AVX512FP16") (V16HF "TARGET_AVX512FP16") - (V8HF "TARGET_AVX512FP16") + (V32HF "TARGET_AVX512F") (V16HF "TARGET_AVX") V8HF (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2") (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX")]) @@ -23736,8 +23737,7 @@ (define_mode_iterator VEC_INIT_HALF_MODE (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") - (V32HF "TARGET_AVX512FP16") (V16HF "TARGET_AVX512FP16") - (V8HF "TARGET_AVX512FP16") + (V32HF "TARGET_AVX512F") (V16HF "TARGET_AVX") V8HF (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V4TI "TARGET_AVX512F")]) diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-1.c b/gcc/testsuite/gcc.target/i386/vect-float16-1.c new file mode 100644 index 00000000000..0f82cf94932 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/vect-float16-1.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -mavx512fp16 -mno-avx512vl" } */ + +/* Check that we vectorize to a full 128-bit vector for _Float16 types. */ + +void +foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b, + _Float16 *__restrict__ c) +{ + for (int i = 0; i < 256; i++) + a[i] = b[i] + c[i]; +} + +/* { dg-final { scan-assembler-times "vaddph" 8 } } */ diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-10.c b/gcc/testsuite/gcc.target/i386/vect-float16-10.c new file mode 100644 index 00000000000..217645692ad --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/vect-float16-10.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -mavx512fp16 -mno-avx512vl" } */ + +/* Check that we vectorize to a full 128-bit vector for _Float16 types. */ + +void +foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b, + _Float16 *__restrict__ c) +{ + for (int i = 0; i < 256; i++) + a[i] = b[i] / c[i]; +} + +/* { dg-final { scan-assembler-times "vdivph" 8 } } */ diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-11.c b/gcc/testsuite/gcc.target/i386/vect-float16-11.c new file mode 100644 index 00000000000..e0409ce9d3f --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/vect-float16-11.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -mavx512fp16 -mavx512vl -mprefer-vector-width=128" } */ + +/* Check that we vectorize to a full 128-bit vector for _Float16 types. */ + +void +foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b, + _Float16 *__restrict__ c) +{ + for (int i = 0; i < 128; i++) + a[i] = b[i] / c[i]; +} + +/* { dg-final { scan-assembler-times "vdivph" 16 } } */ diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-12.c b/gcc/testsuite/gcc.target/i386/vect-float16-12.c new file mode 100644 index 00000000000..d92a25dc255 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/vect-float16-12.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -mavx512fp16 -mavx512vl -mprefer-vector-width=256" } */ + +/* Check that we vectorize to a full 128-bit vector for _Float16 types. */ + +void +foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b, + _Float16 *__restrict__ c) +{ + for (int i = 0; i < 256; i++) + a[i] = b[i] / c[i]; +} + +/* { dg-final { scan-assembler-times "vdivph" 16 } } */ diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-2.c b/gcc/testsuite/gcc.target/i386/vect-float16-2.c new file mode 100644 index 00000000000..974fca4ce09 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/vect-float16-2.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -mavx512fp16 -mavx512vl -mprefer-vector-width=128" } */ + +/* Check that we vectorize to a full 128-bit vector for _Float16 types. */ + +void +foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b, + _Float16 *__restrict__ c) +{ + for (int i = 0; i < 128; i++) + a[i] = b[i] + c[i]; +} + +/* { dg-final { scan-assembler-times "vaddph" 16 } } */ diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-3.c b/gcc/testsuite/gcc.target/i386/vect-float16-3.c new file mode 100644 index 00000000000..9bca9142df7 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/vect-float16-3.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -mavx512fp16 -mavx512vl -mprefer-vector-width=256" } */ + +/* Check that we vectorize to a full 128-bit vector for _Float16 types. */ + +void +foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b, + _Float16 *__restrict__ c) +{ + for (int i = 0; i < 256; i++) + a[i] = b[i] + c[i]; +} + +/* { dg-final { scan-assembler-times "vaddph" 16 } } */ diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-4.c b/gcc/testsuite/gcc.target/i386/vect-float16-4.c new file mode 100644 index 00000000000..e6f26f0aa40 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/vect-float16-4.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -mavx512fp16 -mno-avx512vl" } */ + +/* Check that we vectorize to a full 128-bit vector for _Float16 types. */ + +void +foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b, + _Float16 *__restrict__ c) +{ + for (int i = 0; i < 256; i++) + a[i] = b[i] - c[i]; +} + +/* { dg-final { scan-assembler-times "vsubph" 8 } } */ diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-5.c b/gcc/testsuite/gcc.target/i386/vect-float16-5.c new file mode 100644 index 00000000000..38f287b1dc0 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/vect-float16-5.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -mavx512fp16 -mavx512vl -mprefer-vector-width=128" } */ + +/* Check that we vectorize to a full 128-bit vector for _Float16 types. */ + +void +foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b, + _Float16 *__restrict__ c) +{ + for (int i = 0; i < 128; i++) + a[i] = b[i] - c[i]; +} + +/* { dg-final { scan-assembler-times "vsubph" 16 } } */ diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-6.c b/gcc/testsuite/gcc.target/i386/vect-float16-6.c new file mode 100644 index 00000000000..bc9f7870061 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/vect-float16-6.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -mavx512fp16 -mavx512vl -mprefer-vector-width=256" } */ + +/* Check that we vectorize to a full 128-bit vector for _Float16 types. */ + +void +foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b, + _Float16 *__restrict__ c) +{ + for (int i = 0; i < 256; i++) + a[i] = b[i] - c[i]; +} + +/* { dg-final { scan-assembler-times "vsubph" 16 } } */ diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-7.c b/gcc/testsuite/gcc.target/i386/vect-float16-7.c new file mode 100644 index 00000000000..b4849cf77c7 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/vect-float16-7.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -mavx512fp16 -mno-avx512vl" } */ + +/* Check that we vectorize to a full 128-bit vector for _Float16 types. */ + +void +foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b, + _Float16 *__restrict__ c) +{ + for (int i = 0; i < 256; i++) + a[i] = b[i] * c[i]; +} + +/* { dg-final { scan-assembler-times "vmulph" 8 } } */ diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-8.c b/gcc/testsuite/gcc.target/i386/vect-float16-8.c new file mode 100644 index 00000000000..71631b17cc3 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/vect-float16-8.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -mavx512fp16 -mavx512vl -mprefer-vector-width=128" } */ + +/* Check that we vectorize to a full 128-bit vector for _Float16 types. */ + +void +foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b, + _Float16 *__restrict__ c) +{ + for (int i = 0; i < 128; i++) + a[i] = b[i] * c[i]; +} + +/* { dg-final { scan-assembler-times "vmulph" 16 } } */ diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-9.c b/gcc/testsuite/gcc.target/i386/vect-float16-9.c new file mode 100644 index 00000000000..1be5c7f022f --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/vect-float16-9.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -mavx512fp16 -mavx512vl -mprefer-vector-width=256" } */ + +/* Check that we vectorize to a full 128-bit vector for _Float16 types. */ + +void +foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b, + _Float16 *__restrict__ c) +{ + for (int i = 0; i < 256; i++) + a[i] = b[i] * c[i]; +} + +/* { dg-final { scan-assembler-times "vmulph" 16 } } */ -- 2.27.0