Re: [PATCH 09/62] AVX512FP16: Enable _Float16 autovectorization

Hongtao Liu via Gcc-patches Thu, 09 Sep 2021 23:58:28 -0700

On Thu, Jul 1, 2021 at 2:17 PM liuhongt <hongtao....@intel.com> wrote:
>
> From: "H.J. Lu" <hjl.to...@gmail.com>
>
> gcc/ChangeLog:
>
>         * config/i386/i386-expand.c
>         (ix86_avx256_split_vector_move_misalign): Handle V16HF mode.
>         * config/i386/i386.c
>         (ix86_preferred_simd_mode): Handle HF mode.
>         * config/i386/sse.md (V_256H): New mode iterator.
>         (avx_vextractf128<mode>): Use it.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/i386/vect-float16-1.c: New test.
>         * gcc.target/i386/vect-float16-10.c: Ditto.
>         * gcc.target/i386/vect-float16-11.c: Ditto.
>         * gcc.target/i386/vect-float16-12.c: Ditto.
>         * gcc.target/i386/vect-float16-2.c: Ditto.
>         * gcc.target/i386/vect-float16-3.c: Ditto.
>         * gcc.target/i386/vect-float16-4.c: Ditto.
>         * gcc.target/i386/vect-float16-5.c: Ditto.
>         * gcc.target/i386/vect-float16-6.c: Ditto.
>         * gcc.target/i386/vect-float16-7.c: Ditto.
>         * gcc.target/i386/vect-float16-8.c: Ditto.
>         * gcc.target/i386/vect-float16-9.c: Ditto.
I'm going to check in this patch w/ a bit change, the change is
removing TARGET_AVX512FP16 for vector HFmodes when vpinsrw/../vpextrw
instructions are used for V*HFmodevector_init and
vector_extract{,_lo/hi}.
Attach an updated patch.
Also check in 6 patches which are [PATCH 10/62] to [PATH 15/62].


[PATCH 10/62] AVX512FP16: Add vaddsh/vsubsh/vmulsh/vdivsh.
[PATCH 11/62] AVX512FP16: Add testcase for vaddsh/vsubsh/vmulsh/vdivsh.
[PATCH 12/62] AVX512FP16: Add vmaxph/vminph/vmaxsh/vminsh.
[PATCH 13/62] AVX512FP16: Add testcase for vmaxph/vmaxsh/vminph/vminsh.
[PATCH 14/62] AVX512FP16: Add vcmpph/vcmpsh/vcomish/vucomish.
[PATCH 15/62] AVX512FP16: Add testcase for vcmpph/vcmpsh/vcomish/vucomish.

  Bootstrapped and regtested on x86_64-linux-gnu{-m32,}.
  Also newly added runtime testcases  were run on sde/SPR.

[10] https://gcc.gnu.org/pipermail/gcc-patches/2021-July/574128.html
[11] https://gcc.gnu.org/pipermail/gcc-patches/2021-July/574127.html
[12] https://gcc.gnu.org/pipermail/gcc-patches/2021-July/574129.html
[13] https://gcc.gnu.org/pipermail/gcc-patches/2021-July/574130.html
[14] https://gcc.gnu.org/pipermail/gcc-patches/2021-July/574131.html
[15] https://gcc.gnu.org/pipermail/gcc-patches/2021-July/574132.html

> ---
>  gcc/config/i386/i386-expand.c                   |  4 ++++
>  gcc/config/i386/i386.c                          | 14 ++++++++++++++
>  gcc/config/i386/sse.md                          |  7 ++++++-
>  gcc/testsuite/gcc.target/i386/vect-float16-1.c  | 14 ++++++++++++++
>  gcc/testsuite/gcc.target/i386/vect-float16-10.c | 14 ++++++++++++++
>  gcc/testsuite/gcc.target/i386/vect-float16-11.c | 14 ++++++++++++++
>  gcc/testsuite/gcc.target/i386/vect-float16-12.c | 14 ++++++++++++++
>  gcc/testsuite/gcc.target/i386/vect-float16-2.c  | 14 ++++++++++++++
>  gcc/testsuite/gcc.target/i386/vect-float16-3.c  | 14 ++++++++++++++
>  gcc/testsuite/gcc.target/i386/vect-float16-4.c  | 14 ++++++++++++++
>  gcc/testsuite/gcc.target/i386/vect-float16-5.c  | 14 ++++++++++++++
>  gcc/testsuite/gcc.target/i386/vect-float16-6.c  | 14 ++++++++++++++
>  gcc/testsuite/gcc.target/i386/vect-float16-7.c  | 14 ++++++++++++++
>  gcc/testsuite/gcc.target/i386/vect-float16-8.c  | 14 ++++++++++++++
>  gcc/testsuite/gcc.target/i386/vect-float16-9.c  | 14 ++++++++++++++
>  15 files changed, 192 insertions(+), 1 deletion(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/vect-float16-1.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/vect-float16-10.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/vect-float16-11.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/vect-float16-12.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/vect-float16-2.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/vect-float16-3.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/vect-float16-4.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/vect-float16-5.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/vect-float16-6.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/vect-float16-7.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/vect-float16-8.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/vect-float16-9.c
>
> diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
> index 39647eb2cf1..df50c72ab16 100644
> --- a/gcc/config/i386/i386-expand.c
> +++ b/gcc/config/i386/i386-expand.c
> @@ -498,6 +498,10 @@ ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
>        extract = gen_avx_vextractf128v32qi;
>        mode = V16QImode;
>        break;
> +    case E_V16HFmode:
> +      extract = gen_avx_vextractf128v16hf;
> +      mode = V8HFmode;
> +      break;
>      case E_V8SFmode:
>        extract = gen_avx_vextractf128v8sf;
>        mode = V4SFmode;
> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> index 79e6880d9dd..dc0d440061b 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -22360,6 +22360,20 @@ ix86_preferred_simd_mode (scalar_mode mode)
>        else
>         return V2DImode;
>
> +    case E_HFmode:
> +      if (TARGET_AVX512FP16)
> +       {
> +         if (TARGET_AVX512VL)
> +           {
> +             if (TARGET_PREFER_AVX128)
> +               return V8HFmode;
> +             else if (TARGET_PREFER_AVX256)
> +               return V16HFmode;
> +           }
> +         return V32HFmode;
> +       }
> +      return word_mode;
> +
>      case E_SFmode:
>        if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
>         return V16SFmode;
> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> index 2c1b6fbcd86..a0cfd611006 100644
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -276,6 +276,11 @@ (define_mode_iterator V_128
>  (define_mode_iterator V_256
>    [V32QI V16HI V8SI V4DI V8SF V4DF])
>
> +;; All 256bit vector modes including HF vector mode
> +(define_mode_iterator V_256H
> +  [V32QI V16HI V8SI V4DI V8SF V4DF
> +   (V16HF "TARGET_AVX512F && TARGET_AVX512VL")])
> +
>  ;; All 128bit and 256bit vector modes
>  (define_mode_iterator V_128_256
>    [V32QI V16QI V16HI V8HI V8SI V4SI V4DI V2DI V8SF V4SF V4DF V2DF])
> @@ -9045,7 +9050,7 @@ (define_expand "avx512vl_vextractf128<mode>"
>
>  (define_expand "avx_vextractf128<mode>"
>    [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
> -   (match_operand:V_256 1 "register_operand")
> +   (match_operand:V_256H 1 "register_operand")
>     (match_operand:SI 2 "const_0_to_1_operand")]
>    "TARGET_AVX"
>  {
> diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-1.c 
> b/gcc/testsuite/gcc.target/i386/vect-float16-1.c
> new file mode 100644
> index 00000000000..0f82cf94932
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/vect-float16-1.c
> @@ -0,0 +1,14 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -mavx512fp16 -mno-avx512vl" } */
> +
> +/* Check that we vectorize to a full 128-bit vector for _Float16 types.  */
> +
> +void
> +foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b,
> +     _Float16 *__restrict__ c)
> +{
> +  for (int i = 0; i < 256; i++)
> +    a[i] = b[i] + c[i];
> +}
> +
> +/* { dg-final { scan-assembler-times "vaddph" 8 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-10.c 
> b/gcc/testsuite/gcc.target/i386/vect-float16-10.c
> new file mode 100644
> index 00000000000..217645692ad
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/vect-float16-10.c
> @@ -0,0 +1,14 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -mavx512fp16 -mno-avx512vl" } */
> +
> +/* Check that we vectorize to a full 128-bit vector for _Float16 types.  */
> +
> +void
> +foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b,
> +     _Float16 *__restrict__ c)
> +{
> +  for (int i = 0; i < 256; i++)
> +    a[i] = b[i] / c[i];
> +}
> +
> +/* { dg-final { scan-assembler-times "vdivph" 8 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-11.c 
> b/gcc/testsuite/gcc.target/i386/vect-float16-11.c
> new file mode 100644
> index 00000000000..e0409ce9d3f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/vect-float16-11.c
> @@ -0,0 +1,14 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -mavx512fp16 -mavx512vl -mprefer-vector-width=128" } */
> +
> +/* Check that we vectorize to a full 128-bit vector for _Float16 types.  */
> +
> +void
> +foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b,
> +     _Float16 *__restrict__ c)
> +{
> +  for (int i = 0; i < 128; i++)
> +    a[i] = b[i] / c[i];
> +}
> +
> +/* { dg-final { scan-assembler-times "vdivph" 16 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-12.c 
> b/gcc/testsuite/gcc.target/i386/vect-float16-12.c
> new file mode 100644
> index 00000000000..d92a25dc255
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/vect-float16-12.c
> @@ -0,0 +1,14 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -mavx512fp16 -mavx512vl -mprefer-vector-width=256" } */
> +
> +/* Check that we vectorize to a full 128-bit vector for _Float16 types.  */
> +
> +void
> +foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b,
> +     _Float16 *__restrict__ c)
> +{
> +  for (int i = 0; i < 256; i++)
> +    a[i] = b[i] / c[i];
> +}
> +
> +/* { dg-final { scan-assembler-times "vdivph" 16 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-2.c 
> b/gcc/testsuite/gcc.target/i386/vect-float16-2.c
> new file mode 100644
> index 00000000000..974fca4ce09
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/vect-float16-2.c
> @@ -0,0 +1,14 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -mavx512fp16 -mavx512vl -mprefer-vector-width=128" } */
> +
> +/* Check that we vectorize to a full 128-bit vector for _Float16 types.  */
> +
> +void
> +foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b,
> +     _Float16 *__restrict__ c)
> +{
> +  for (int i = 0; i < 128; i++)
> +    a[i] = b[i] + c[i];
> +}
> +
> +/* { dg-final { scan-assembler-times "vaddph" 16 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-3.c 
> b/gcc/testsuite/gcc.target/i386/vect-float16-3.c
> new file mode 100644
> index 00000000000..9bca9142df7
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/vect-float16-3.c
> @@ -0,0 +1,14 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -mavx512fp16 -mavx512vl -mprefer-vector-width=256" } */
> +
> +/* Check that we vectorize to a full 128-bit vector for _Float16 types.  */
> +
> +void
> +foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b,
> +     _Float16 *__restrict__ c)
> +{
> +  for (int i = 0; i < 256; i++)
> +    a[i] = b[i] + c[i];
> +}
> +
> +/* { dg-final { scan-assembler-times "vaddph" 16 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-4.c 
> b/gcc/testsuite/gcc.target/i386/vect-float16-4.c
> new file mode 100644
> index 00000000000..e6f26f0aa40
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/vect-float16-4.c
> @@ -0,0 +1,14 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -mavx512fp16 -mno-avx512vl" } */
> +
> +/* Check that we vectorize to a full 128-bit vector for _Float16 types.  */
> +
> +void
> +foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b,
> +     _Float16 *__restrict__ c)
> +{
> +  for (int i = 0; i < 256; i++)
> +    a[i] = b[i] - c[i];
> +}
> +
> +/* { dg-final { scan-assembler-times "vsubph" 8 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-5.c 
> b/gcc/testsuite/gcc.target/i386/vect-float16-5.c
> new file mode 100644
> index 00000000000..38f287b1dc0
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/vect-float16-5.c
> @@ -0,0 +1,14 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -mavx512fp16 -mavx512vl -mprefer-vector-width=128" } */
> +
> +/* Check that we vectorize to a full 128-bit vector for _Float16 types.  */
> +
> +void
> +foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b,
> +     _Float16 *__restrict__ c)
> +{
> +  for (int i = 0; i < 128; i++)
> +    a[i] = b[i] - c[i];
> +}
> +
> +/* { dg-final { scan-assembler-times "vsubph" 16 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-6.c 
> b/gcc/testsuite/gcc.target/i386/vect-float16-6.c
> new file mode 100644
> index 00000000000..bc9f7870061
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/vect-float16-6.c
> @@ -0,0 +1,14 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -mavx512fp16 -mavx512vl -mprefer-vector-width=256" } */
> +
> +/* Check that we vectorize to a full 128-bit vector for _Float16 types.  */
> +
> +void
> +foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b,
> +     _Float16 *__restrict__ c)
> +{
> +  for (int i = 0; i < 256; i++)
> +    a[i] = b[i] - c[i];
> +}
> +
> +/* { dg-final { scan-assembler-times "vsubph" 16 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-7.c 
> b/gcc/testsuite/gcc.target/i386/vect-float16-7.c
> new file mode 100644
> index 00000000000..b4849cf77c7
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/vect-float16-7.c
> @@ -0,0 +1,14 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -mavx512fp16 -mno-avx512vl" } */
> +
> +/* Check that we vectorize to a full 128-bit vector for _Float16 types.  */
> +
> +void
> +foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b,
> +     _Float16 *__restrict__ c)
> +{
> +  for (int i = 0; i < 256; i++)
> +    a[i] = b[i] * c[i];
> +}
> +
> +/* { dg-final { scan-assembler-times "vmulph" 8 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-8.c 
> b/gcc/testsuite/gcc.target/i386/vect-float16-8.c
> new file mode 100644
> index 00000000000..71631b17cc3
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/vect-float16-8.c
> @@ -0,0 +1,14 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -mavx512fp16 -mavx512vl -mprefer-vector-width=128" } */
> +
> +/* Check that we vectorize to a full 128-bit vector for _Float16 types.  */
> +
> +void
> +foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b,
> +     _Float16 *__restrict__ c)
> +{
> +  for (int i = 0; i < 128; i++)
> +    a[i] = b[i] * c[i];
> +}
> +
> +/* { dg-final { scan-assembler-times "vmulph" 16 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-9.c 
> b/gcc/testsuite/gcc.target/i386/vect-float16-9.c
> new file mode 100644
> index 00000000000..1be5c7f022f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/vect-float16-9.c
> @@ -0,0 +1,14 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -mavx512fp16 -mavx512vl -mprefer-vector-width=256" } */
> +
> +/* Check that we vectorize to a full 128-bit vector for _Float16 types.  */
> +
> +void
> +foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b,
> +     _Float16 *__restrict__ c)
> +{
> +  for (int i = 0; i < 256; i++)
> +    a[i] = b[i] * c[i];
> +}
> +
> +/* { dg-final { scan-assembler-times "vmulph" 16 } } */
> --
> 2.18.1
>


-- 
BR,
Hongtao

From 02399fddf24a2d7db60feaa8027b9cf95687024b Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.to...@gmail.com>
Date: Sun, 27 Jan 2019 19:38:02 -0800
Subject: [PATCH 1/7] AVX512FP16: Enable _Float16 autovectorization

gcc/ChangeLog:

	* config/i386/i386-expand.c
	(ix86_avx256_split_vector_move_misalign): Handle V16HF mode.
	* config/i386/i386.c
	(ix86_preferred_simd_mode): Handle HF mode.
	* config/i386/sse.md (V_256H): New mode iterator.
	(avx_vextractf128<mode>): Use it.
	(VEC_INIT_MODE): Align vector HFmode condition to vector
	HImodes since there're no real HF instruction used.
	(VEC_INIT_HALF_MODE): Ditto.
	(VIHF): Ditto.
	(VIHF_AVX512BW): Ditto.
	(*vec_extracthf): Ditto.
	(VEC_EXTRACT_MODE): Ditto.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/vect-float16-1.c: New test.
	* gcc.target/i386/vect-float16-10.c: Ditto.
	* gcc.target/i386/vect-float16-11.c: Ditto.
	* gcc.target/i386/vect-float16-12.c: Ditto.
	* gcc.target/i386/vect-float16-2.c: Ditto.
	* gcc.target/i386/vect-float16-3.c: Ditto.
	* gcc.target/i386/vect-float16-4.c: Ditto.
	* gcc.target/i386/vect-float16-5.c: Ditto.
	* gcc.target/i386/vect-float16-6.c: Ditto.
	* gcc.target/i386/vect-float16-7.c: Ditto.
	* gcc.target/i386/vect-float16-8.c: Ditto.
	* gcc.target/i386/vect-float16-9.c: Ditto.
---
 gcc/config/i386/i386-expand.c                 |  4 ++++
 gcc/config/i386/i386.c                        | 14 +++++++++++
 gcc/config/i386/sse.md                        | 24 +++++++++----------
 .../gcc.target/i386/vect-float16-1.c          | 14 +++++++++++
 .../gcc.target/i386/vect-float16-10.c         | 14 +++++++++++
 .../gcc.target/i386/vect-float16-11.c         | 14 +++++++++++
 .../gcc.target/i386/vect-float16-12.c         | 14 +++++++++++
 .../gcc.target/i386/vect-float16-2.c          | 14 +++++++++++
 .../gcc.target/i386/vect-float16-3.c          | 14 +++++++++++
 .../gcc.target/i386/vect-float16-4.c          | 14 +++++++++++
 .../gcc.target/i386/vect-float16-5.c          | 14 +++++++++++
 .../gcc.target/i386/vect-float16-6.c          | 14 +++++++++++
 .../gcc.target/i386/vect-float16-7.c          | 14 +++++++++++
 .../gcc.target/i386/vect-float16-8.c          | 14 +++++++++++
 .../gcc.target/i386/vect-float16-9.c          | 14 +++++++++++
 15 files changed, 198 insertions(+), 12 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/vect-float16-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/vect-float16-10.c
 create mode 100644 gcc/testsuite/gcc.target/i386/vect-float16-11.c
 create mode 100644 gcc/testsuite/gcc.target/i386/vect-float16-12.c
 create mode 100644 gcc/testsuite/gcc.target/i386/vect-float16-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/vect-float16-3.c
 create mode 100644 gcc/testsuite/gcc.target/i386/vect-float16-4.c
 create mode 100644 gcc/testsuite/gcc.target/i386/vect-float16-5.c
 create mode 100644 gcc/testsuite/gcc.target/i386/vect-float16-6.c
 create mode 100644 gcc/testsuite/gcc.target/i386/vect-float16-7.c
 create mode 100644 gcc/testsuite/gcc.target/i386/vect-float16-8.c
 create mode 100644 gcc/testsuite/gcc.target/i386/vect-float16-9.c

diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
index 0c1aec585fe..cac8354a067 100644
--- a/gcc/config/i386/i386-expand.c
+++ b/gcc/config/i386/i386-expand.c
@@ -678,6 +678,10 @@ ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
       extract = gen_avx_vextractf128v32qi;
       mode = V16QImode;
       break;
+    case E_V16HFmode:
+      extract = gen_avx_vextractf128v16hf;
+      mode = V8HFmode;
+      break;
     case E_V8SFmode:
       extract = gen_avx_vextractf128v8sf;
       mode = V4SFmode;
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index dc649f96d0d..7b173bc0beb 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -22641,6 +22641,20 @@ ix86_preferred_simd_mode (scalar_mode mode)
       else
 	return V2DImode;
 
+    case E_HFmode:
+      if (TARGET_AVX512FP16)
+	{
+	  if (TARGET_AVX512VL)
+	    {
+	      if (TARGET_PREFER_AVX128)
+		return V8HFmode;
+	      else if (TARGET_PREFER_AVX256)
+		return V16HFmode;
+	    }
+	  return V32HFmode;
+	}
+      return word_mode;
+
     case E_SFmode:
       if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
 	return V16SFmode;
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 06339163bc5..26024609e2b 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -279,6 +279,10 @@ (define_mode_iterator V_128
 (define_mode_iterator V_256
   [V32QI V16HI V8SI V4DI V8SF V4DF])
 
+;; All 256bit vector modes including HF vector mode
+(define_mode_iterator V_256H
+  [V32QI V16HI V8SI V4DI V8SF V4DF V16HF])
+
 ;; All 128bit and 256bit vector modes
 (define_mode_iterator V_128_256
   [V32QI V16QI V16HI V8HI V8SI V4SI V4DI V2DI V8SF V4SF V4DF V2DF])
@@ -406,8 +410,7 @@ (define_mode_iterator VIHF
    (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
    (V8SI "TARGET_AVX") V4SI
    (V4DI "TARGET_AVX") V2DI
-   (V32HF "TARGET_AVX512FP16") (V16HF "TARGET_AVX512FP16")
-   (V8HF "TARGET_AVX512FP16")])
+   (V32HF "TARGET_AVX512BW") (V16HF "TARGET_AVX") V8HF])
 
 (define_mode_iterator VI_AVX2
   [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
@@ -752,7 +755,7 @@ (define_mode_iterator VI_AVX512BW
   [V16SI V8DI (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW")])
 (define_mode_iterator VIHF_AVX512BW
   [V16SI V8DI (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW")
-  (V32HF "TARGET_AVX512FP16")])
+  (V32HF "TARGET_AVX512BW")])
 
 ;; Int-float size matches
 (define_mode_iterator VI4F_128 [V4SI V4SF])
@@ -9381,7 +9384,7 @@ (define_expand "avx512vl_vextractf128<mode>"
 
 (define_expand "avx_vextractf128<mode>"
   [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
-   (match_operand:V_256 1 "register_operand")
+   (match_operand:V_256H 1 "register_operand")
    (match_operand:SI 2 "const_0_to_1_operand")]
   "TARGET_AVX"
 {
@@ -9868,7 +9871,7 @@ (define_insn "*vec_extracthf"
 	  (match_operand:V8HF 1 "register_operand" "v,v")
 	  (parallel
 	    [(match_operand:SI 2 "const_0_to_7_operand")])))]
-  "TARGET_AVX512FP16"
+  "TARGET_SSE2"
   "@
    vpextrw\t{%2, %1, %k0|%k0, %1, %2}
    vpextrw\t{%2, %1, %0|%0, %1, %2}"
@@ -9882,8 +9885,7 @@ (define_mode_iterator VEC_EXTRACT_MODE
    (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
    (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
    (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
-   (V32HF "TARGET_AVX512FP16") (V16HF "TARGET_AVX512FP16")
-   (V8HF "TARGET_AVX512FP16")
+   (V32HF "TARGET_AVX512BW") (V16HF "TARGET_AVX") V8HF
    (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
    (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF
    (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX")])
@@ -15615,7 +15617,7 @@ (define_expand "vec_interleave_low<mode>"
 
 ;; Modes handled by pinsr patterns.
 (define_mode_iterator PINSR_MODE
-  [(V16QI "TARGET_SSE4_1") V8HI (V8HF "TARGET_AVX512FP16")
+  [(V16QI "TARGET_SSE4_1") V8HI V8HF
    (V4SI "TARGET_SSE4_1")
    (V2DI "TARGET_SSE4_1 && TARGET_64BIT")])
 
@@ -23723,8 +23725,7 @@ (define_mode_iterator VEC_INIT_MODE
    (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
    (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
    (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
-   (V32HF "TARGET_AVX512FP16") (V16HF "TARGET_AVX512FP16")
-   (V8HF "TARGET_AVX512FP16")
+   (V32HF "TARGET_AVX512F") (V16HF "TARGET_AVX") V8HF
    (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
    (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")
    (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX")])
@@ -23736,8 +23737,7 @@ (define_mode_iterator VEC_INIT_HALF_MODE
    (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
    (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
    (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX")
-   (V32HF "TARGET_AVX512FP16") (V16HF "TARGET_AVX512FP16")
-   (V8HF "TARGET_AVX512FP16")
+   (V32HF "TARGET_AVX512F") (V16HF "TARGET_AVX") V8HF
    (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
    (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX")
    (V4TI "TARGET_AVX512F")])
diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-1.c b/gcc/testsuite/gcc.target/i386/vect-float16-1.c
new file mode 100644
index 00000000000..0f82cf94932
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-float16-1.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512fp16 -mno-avx512vl" } */
+
+/* Check that we vectorize to a full 128-bit vector for _Float16 types.  */
+
+void
+foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b,
+     _Float16 *__restrict__ c)
+{
+  for (int i = 0; i < 256; i++)
+    a[i] = b[i] + c[i];
+}
+
+/* { dg-final { scan-assembler-times "vaddph" 8 } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-10.c b/gcc/testsuite/gcc.target/i386/vect-float16-10.c
new file mode 100644
index 00000000000..217645692ad
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-float16-10.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512fp16 -mno-avx512vl" } */
+
+/* Check that we vectorize to a full 128-bit vector for _Float16 types.  */
+
+void
+foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b,
+     _Float16 *__restrict__ c)
+{
+  for (int i = 0; i < 256; i++)
+    a[i] = b[i] / c[i];
+}
+
+/* { dg-final { scan-assembler-times "vdivph" 8 } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-11.c b/gcc/testsuite/gcc.target/i386/vect-float16-11.c
new file mode 100644
index 00000000000..e0409ce9d3f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-float16-11.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512fp16 -mavx512vl -mprefer-vector-width=128" } */
+
+/* Check that we vectorize to a full 128-bit vector for _Float16 types.  */
+
+void
+foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b,
+     _Float16 *__restrict__ c)
+{
+  for (int i = 0; i < 128; i++)
+    a[i] = b[i] / c[i];
+}
+
+/* { dg-final { scan-assembler-times "vdivph" 16 } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-12.c b/gcc/testsuite/gcc.target/i386/vect-float16-12.c
new file mode 100644
index 00000000000..d92a25dc255
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-float16-12.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512fp16 -mavx512vl -mprefer-vector-width=256" } */
+
+/* Check that we vectorize to a full 128-bit vector for _Float16 types.  */
+
+void
+foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b,
+     _Float16 *__restrict__ c)
+{
+  for (int i = 0; i < 256; i++)
+    a[i] = b[i] / c[i];
+}
+
+/* { dg-final { scan-assembler-times "vdivph" 16 } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-2.c b/gcc/testsuite/gcc.target/i386/vect-float16-2.c
new file mode 100644
index 00000000000..974fca4ce09
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-float16-2.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512fp16 -mavx512vl -mprefer-vector-width=128" } */
+
+/* Check that we vectorize to a full 128-bit vector for _Float16 types.  */
+
+void
+foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b,
+     _Float16 *__restrict__ c)
+{
+  for (int i = 0; i < 128; i++)
+    a[i] = b[i] + c[i];
+}
+
+/* { dg-final { scan-assembler-times "vaddph" 16 } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-3.c b/gcc/testsuite/gcc.target/i386/vect-float16-3.c
new file mode 100644
index 00000000000..9bca9142df7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-float16-3.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512fp16 -mavx512vl -mprefer-vector-width=256" } */
+
+/* Check that we vectorize to a full 128-bit vector for _Float16 types.  */
+
+void
+foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b,
+     _Float16 *__restrict__ c)
+{
+  for (int i = 0; i < 256; i++)
+    a[i] = b[i] + c[i];
+}
+
+/* { dg-final { scan-assembler-times "vaddph" 16 } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-4.c b/gcc/testsuite/gcc.target/i386/vect-float16-4.c
new file mode 100644
index 00000000000..e6f26f0aa40
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-float16-4.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512fp16 -mno-avx512vl" } */
+
+/* Check that we vectorize to a full 128-bit vector for _Float16 types.  */
+
+void
+foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b,
+     _Float16 *__restrict__ c)
+{
+  for (int i = 0; i < 256; i++)
+    a[i] = b[i] - c[i];
+}
+
+/* { dg-final { scan-assembler-times "vsubph" 8 } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-5.c b/gcc/testsuite/gcc.target/i386/vect-float16-5.c
new file mode 100644
index 00000000000..38f287b1dc0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-float16-5.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512fp16 -mavx512vl -mprefer-vector-width=128" } */
+
+/* Check that we vectorize to a full 128-bit vector for _Float16 types.  */
+
+void
+foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b,
+     _Float16 *__restrict__ c)
+{
+  for (int i = 0; i < 128; i++)
+    a[i] = b[i] - c[i];
+}
+
+/* { dg-final { scan-assembler-times "vsubph" 16 } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-6.c b/gcc/testsuite/gcc.target/i386/vect-float16-6.c
new file mode 100644
index 00000000000..bc9f7870061
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-float16-6.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512fp16 -mavx512vl -mprefer-vector-width=256" } */
+
+/* Check that we vectorize to a full 128-bit vector for _Float16 types.  */
+
+void
+foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b,
+     _Float16 *__restrict__ c)
+{
+  for (int i = 0; i < 256; i++)
+    a[i] = b[i] - c[i];
+}
+
+/* { dg-final { scan-assembler-times "vsubph" 16 } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-7.c b/gcc/testsuite/gcc.target/i386/vect-float16-7.c
new file mode 100644
index 00000000000..b4849cf77c7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-float16-7.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512fp16 -mno-avx512vl" } */
+
+/* Check that we vectorize to a full 128-bit vector for _Float16 types.  */
+
+void
+foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b,
+     _Float16 *__restrict__ c)
+{
+  for (int i = 0; i < 256; i++)
+    a[i] = b[i] * c[i];
+}
+
+/* { dg-final { scan-assembler-times "vmulph" 8 } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-8.c b/gcc/testsuite/gcc.target/i386/vect-float16-8.c
new file mode 100644
index 00000000000..71631b17cc3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-float16-8.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512fp16 -mavx512vl -mprefer-vector-width=128" } */
+
+/* Check that we vectorize to a full 128-bit vector for _Float16 types.  */
+
+void
+foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b,
+     _Float16 *__restrict__ c)
+{
+  for (int i = 0; i < 128; i++)
+    a[i] = b[i] * c[i];
+}
+
+/* { dg-final { scan-assembler-times "vmulph" 16 } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-9.c b/gcc/testsuite/gcc.target/i386/vect-float16-9.c
new file mode 100644
index 00000000000..1be5c7f022f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-float16-9.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512fp16 -mavx512vl -mprefer-vector-width=256" } */
+
+/* Check that we vectorize to a full 128-bit vector for _Float16 types.  */
+
+void
+foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b,
+     _Float16 *__restrict__ c)
+{
+  for (int i = 0; i < 256; i++)
+    a[i] = b[i] * c[i];
+}
+
+/* { dg-final { scan-assembler-times "vmulph" 16 } } */
-- 
2.27.0

From 02399fddf24a2d7db60feaa8027b9cf95687024b Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.to...@gmail.com>
Date: Sun, 27 Jan 2019 19:38:02 -0800
Subject: [PATCH 1/7] AVX512FP16: Enable _Float16 autovectorization

gcc/ChangeLog:

	* config/i386/i386-expand.c
	(ix86_avx256_split_vector_move_misalign): Handle V16HF mode.
	* config/i386/i386.c
	(ix86_preferred_simd_mode): Handle HF mode.
	* config/i386/sse.md (V_256H): New mode iterator.
	(avx_vextractf128<mode>): Use it.
	(VEC_INIT_MODE): Align vector HFmode condition to vector
	HImodes since there're no real HF instruction used.
	(VEC_INIT_HALF_MODE): Ditto.
	(VIHF): Ditto.
	(VIHF_AVX512BW): Ditto.
	(*vec_extracthf): Ditto.
	(VEC_EXTRACT_MODE): Ditto.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/vect-float16-1.c: New test.
	* gcc.target/i386/vect-float16-10.c: Ditto.
	* gcc.target/i386/vect-float16-11.c: Ditto.
	* gcc.target/i386/vect-float16-12.c: Ditto.
	* gcc.target/i386/vect-float16-2.c: Ditto.
	* gcc.target/i386/vect-float16-3.c: Ditto.
	* gcc.target/i386/vect-float16-4.c: Ditto.
	* gcc.target/i386/vect-float16-5.c: Ditto.
	* gcc.target/i386/vect-float16-6.c: Ditto.
	* gcc.target/i386/vect-float16-7.c: Ditto.
	* gcc.target/i386/vect-float16-8.c: Ditto.
	* gcc.target/i386/vect-float16-9.c: Ditto.
---
 gcc/config/i386/i386-expand.c                 |  4 ++++
 gcc/config/i386/i386.c                        | 14 +++++++++++
 gcc/config/i386/sse.md                        | 24 +++++++++----------
 .../gcc.target/i386/vect-float16-1.c          | 14 +++++++++++
 .../gcc.target/i386/vect-float16-10.c         | 14 +++++++++++
 .../gcc.target/i386/vect-float16-11.c         | 14 +++++++++++
 .../gcc.target/i386/vect-float16-12.c         | 14 +++++++++++
 .../gcc.target/i386/vect-float16-2.c          | 14 +++++++++++
 .../gcc.target/i386/vect-float16-3.c          | 14 +++++++++++
 .../gcc.target/i386/vect-float16-4.c          | 14 +++++++++++
 .../gcc.target/i386/vect-float16-5.c          | 14 +++++++++++
 .../gcc.target/i386/vect-float16-6.c          | 14 +++++++++++
 .../gcc.target/i386/vect-float16-7.c          | 14 +++++++++++
 .../gcc.target/i386/vect-float16-8.c          | 14 +++++++++++
 .../gcc.target/i386/vect-float16-9.c          | 14 +++++++++++
 15 files changed, 198 insertions(+), 12 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/vect-float16-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/vect-float16-10.c
 create mode 100644 gcc/testsuite/gcc.target/i386/vect-float16-11.c
 create mode 100644 gcc/testsuite/gcc.target/i386/vect-float16-12.c
 create mode 100644 gcc/testsuite/gcc.target/i386/vect-float16-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/vect-float16-3.c
 create mode 100644 gcc/testsuite/gcc.target/i386/vect-float16-4.c
 create mode 100644 gcc/testsuite/gcc.target/i386/vect-float16-5.c
 create mode 100644 gcc/testsuite/gcc.target/i386/vect-float16-6.c
 create mode 100644 gcc/testsuite/gcc.target/i386/vect-float16-7.c
 create mode 100644 gcc/testsuite/gcc.target/i386/vect-float16-8.c
 create mode 100644 gcc/testsuite/gcc.target/i386/vect-float16-9.c

diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
index 0c1aec585fe..cac8354a067 100644
--- a/gcc/config/i386/i386-expand.c
+++ b/gcc/config/i386/i386-expand.c
@@ -678,6 +678,10 @@ ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
       extract = gen_avx_vextractf128v32qi;
       mode = V16QImode;
       break;
+    case E_V16HFmode:
+      extract = gen_avx_vextractf128v16hf;
+      mode = V8HFmode;
+      break;
     case E_V8SFmode:
       extract = gen_avx_vextractf128v8sf;
       mode = V4SFmode;
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index dc649f96d0d..7b173bc0beb 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -22641,6 +22641,20 @@ ix86_preferred_simd_mode (scalar_mode mode)
       else
 	return V2DImode;
 
+    case E_HFmode:
+      if (TARGET_AVX512FP16)
+	{
+	  if (TARGET_AVX512VL)
+	    {
+	      if (TARGET_PREFER_AVX128)
+		return V8HFmode;
+	      else if (TARGET_PREFER_AVX256)
+		return V16HFmode;
+	    }
+	  return V32HFmode;
+	}
+      return word_mode;
+
     case E_SFmode:
       if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
 	return V16SFmode;
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 06339163bc5..26024609e2b 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -279,6 +279,10 @@ (define_mode_iterator V_128
 (define_mode_iterator V_256
   [V32QI V16HI V8SI V4DI V8SF V4DF])
 
+;; All 256bit vector modes including HF vector mode
+(define_mode_iterator V_256H
+  [V32QI V16HI V8SI V4DI V8SF V4DF V16HF])
+
 ;; All 128bit and 256bit vector modes
 (define_mode_iterator V_128_256
   [V32QI V16QI V16HI V8HI V8SI V4SI V4DI V2DI V8SF V4SF V4DF V2DF])
@@ -406,8 +410,7 @@ (define_mode_iterator VIHF
    (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
    (V8SI "TARGET_AVX") V4SI
    (V4DI "TARGET_AVX") V2DI
-   (V32HF "TARGET_AVX512FP16") (V16HF "TARGET_AVX512FP16")
-   (V8HF "TARGET_AVX512FP16")])
+   (V32HF "TARGET_AVX512BW") (V16HF "TARGET_AVX") V8HF])
 
 (define_mode_iterator VI_AVX2
   [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
@@ -752,7 +755,7 @@ (define_mode_iterator VI_AVX512BW
   [V16SI V8DI (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW")])
 (define_mode_iterator VIHF_AVX512BW
   [V16SI V8DI (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW")
-  (V32HF "TARGET_AVX512FP16")])
+  (V32HF "TARGET_AVX512BW")])
 
 ;; Int-float size matches
 (define_mode_iterator VI4F_128 [V4SI V4SF])
@@ -9381,7 +9384,7 @@ (define_expand "avx512vl_vextractf128<mode>"
 
 (define_expand "avx_vextractf128<mode>"
   [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
-   (match_operand:V_256 1 "register_operand")
+   (match_operand:V_256H 1 "register_operand")
    (match_operand:SI 2 "const_0_to_1_operand")]
   "TARGET_AVX"
 {
@@ -9868,7 +9871,7 @@ (define_insn "*vec_extracthf"
 	  (match_operand:V8HF 1 "register_operand" "v,v")
 	  (parallel
 	    [(match_operand:SI 2 "const_0_to_7_operand")])))]
-  "TARGET_AVX512FP16"
+  "TARGET_SSE2"
   "@
    vpextrw\t{%2, %1, %k0|%k0, %1, %2}
    vpextrw\t{%2, %1, %0|%0, %1, %2}"
@@ -9882,8 +9885,7 @@ (define_mode_iterator VEC_EXTRACT_MODE
    (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
    (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
    (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
-   (V32HF "TARGET_AVX512FP16") (V16HF "TARGET_AVX512FP16")
-   (V8HF "TARGET_AVX512FP16")
+   (V32HF "TARGET_AVX512BW") (V16HF "TARGET_AVX") V8HF
    (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
    (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF
    (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX")])
@@ -15615,7 +15617,7 @@ (define_expand "vec_interleave_low<mode>"
 
 ;; Modes handled by pinsr patterns.
 (define_mode_iterator PINSR_MODE
-  [(V16QI "TARGET_SSE4_1") V8HI (V8HF "TARGET_AVX512FP16")
+  [(V16QI "TARGET_SSE4_1") V8HI V8HF
    (V4SI "TARGET_SSE4_1")
    (V2DI "TARGET_SSE4_1 && TARGET_64BIT")])
 
@@ -23723,8 +23725,7 @@ (define_mode_iterator VEC_INIT_MODE
    (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
    (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
    (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
-   (V32HF "TARGET_AVX512FP16") (V16HF "TARGET_AVX512FP16")
-   (V8HF "TARGET_AVX512FP16")
+   (V32HF "TARGET_AVX512F") (V16HF "TARGET_AVX") V8HF
    (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
    (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")
    (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX")])
@@ -23736,8 +23737,7 @@ (define_mode_iterator VEC_INIT_HALF_MODE
    (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
    (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
    (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX")
-   (V32HF "TARGET_AVX512FP16") (V16HF "TARGET_AVX512FP16")
-   (V8HF "TARGET_AVX512FP16")
+   (V32HF "TARGET_AVX512F") (V16HF "TARGET_AVX") V8HF
    (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
    (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX")
    (V4TI "TARGET_AVX512F")])
diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-1.c b/gcc/testsuite/gcc.target/i386/vect-float16-1.c
new file mode 100644
index 00000000000..0f82cf94932
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-float16-1.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512fp16 -mno-avx512vl" } */
+
+/* Check that we vectorize to a full 128-bit vector for _Float16 types.  */
+
+void
+foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b,
+     _Float16 *__restrict__ c)
+{
+  for (int i = 0; i < 256; i++)
+    a[i] = b[i] + c[i];
+}
+
+/* { dg-final { scan-assembler-times "vaddph" 8 } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-10.c b/gcc/testsuite/gcc.target/i386/vect-float16-10.c
new file mode 100644
index 00000000000..217645692ad
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-float16-10.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512fp16 -mno-avx512vl" } */
+
+/* Check that we vectorize to a full 128-bit vector for _Float16 types.  */
+
+void
+foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b,
+     _Float16 *__restrict__ c)
+{
+  for (int i = 0; i < 256; i++)
+    a[i] = b[i] / c[i];
+}
+
+/* { dg-final { scan-assembler-times "vdivph" 8 } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-11.c b/gcc/testsuite/gcc.target/i386/vect-float16-11.c
new file mode 100644
index 00000000000..e0409ce9d3f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-float16-11.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512fp16 -mavx512vl -mprefer-vector-width=128" } */
+
+/* Check that we vectorize to a full 128-bit vector for _Float16 types.  */
+
+void
+foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b,
+     _Float16 *__restrict__ c)
+{
+  for (int i = 0; i < 128; i++)
+    a[i] = b[i] / c[i];
+}
+
+/* { dg-final { scan-assembler-times "vdivph" 16 } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-12.c b/gcc/testsuite/gcc.target/i386/vect-float16-12.c
new file mode 100644
index 00000000000..d92a25dc255
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-float16-12.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512fp16 -mavx512vl -mprefer-vector-width=256" } */
+
+/* Check that we vectorize to a full 128-bit vector for _Float16 types.  */
+
+void
+foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b,
+     _Float16 *__restrict__ c)
+{
+  for (int i = 0; i < 256; i++)
+    a[i] = b[i] / c[i];
+}
+
+/* { dg-final { scan-assembler-times "vdivph" 16 } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-2.c b/gcc/testsuite/gcc.target/i386/vect-float16-2.c
new file mode 100644
index 00000000000..974fca4ce09
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-float16-2.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512fp16 -mavx512vl -mprefer-vector-width=128" } */
+
+/* Check that we vectorize to a full 128-bit vector for _Float16 types.  */
+
+void
+foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b,
+     _Float16 *__restrict__ c)
+{
+  for (int i = 0; i < 128; i++)
+    a[i] = b[i] + c[i];
+}
+
+/* { dg-final { scan-assembler-times "vaddph" 16 } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-3.c b/gcc/testsuite/gcc.target/i386/vect-float16-3.c
new file mode 100644
index 00000000000..9bca9142df7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-float16-3.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512fp16 -mavx512vl -mprefer-vector-width=256" } */
+
+/* Check that we vectorize to a full 128-bit vector for _Float16 types.  */
+
+void
+foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b,
+     _Float16 *__restrict__ c)
+{
+  for (int i = 0; i < 256; i++)
+    a[i] = b[i] + c[i];
+}
+
+/* { dg-final { scan-assembler-times "vaddph" 16 } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-4.c b/gcc/testsuite/gcc.target/i386/vect-float16-4.c
new file mode 100644
index 00000000000..e6f26f0aa40
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-float16-4.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512fp16 -mno-avx512vl" } */
+
+/* Check that we vectorize to a full 128-bit vector for _Float16 types.  */
+
+void
+foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b,
+     _Float16 *__restrict__ c)
+{
+  for (int i = 0; i < 256; i++)
+    a[i] = b[i] - c[i];
+}
+
+/* { dg-final { scan-assembler-times "vsubph" 8 } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-5.c b/gcc/testsuite/gcc.target/i386/vect-float16-5.c
new file mode 100644
index 00000000000..38f287b1dc0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-float16-5.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512fp16 -mavx512vl -mprefer-vector-width=128" } */
+
+/* Check that we vectorize to a full 128-bit vector for _Float16 types.  */
+
+void
+foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b,
+     _Float16 *__restrict__ c)
+{
+  for (int i = 0; i < 128; i++)
+    a[i] = b[i] - c[i];
+}
+
+/* { dg-final { scan-assembler-times "vsubph" 16 } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-6.c b/gcc/testsuite/gcc.target/i386/vect-float16-6.c
new file mode 100644
index 00000000000..bc9f7870061
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-float16-6.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512fp16 -mavx512vl -mprefer-vector-width=256" } */
+
+/* Check that we vectorize to a full 128-bit vector for _Float16 types.  */
+
+void
+foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b,
+     _Float16 *__restrict__ c)
+{
+  for (int i = 0; i < 256; i++)
+    a[i] = b[i] - c[i];
+}
+
+/* { dg-final { scan-assembler-times "vsubph" 16 } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-7.c b/gcc/testsuite/gcc.target/i386/vect-float16-7.c
new file mode 100644
index 00000000000..b4849cf77c7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-float16-7.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512fp16 -mno-avx512vl" } */
+
+/* Check that we vectorize to a full 128-bit vector for _Float16 types.  */
+
+void
+foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b,
+     _Float16 *__restrict__ c)
+{
+  for (int i = 0; i < 256; i++)
+    a[i] = b[i] * c[i];
+}
+
+/* { dg-final { scan-assembler-times "vmulph" 8 } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-8.c b/gcc/testsuite/gcc.target/i386/vect-float16-8.c
new file mode 100644
index 00000000000..71631b17cc3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-float16-8.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512fp16 -mavx512vl -mprefer-vector-width=128" } */
+
+/* Check that we vectorize to a full 128-bit vector for _Float16 types.  */
+
+void
+foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b,
+     _Float16 *__restrict__ c)
+{
+  for (int i = 0; i < 128; i++)
+    a[i] = b[i] * c[i];
+}
+
+/* { dg-final { scan-assembler-times "vmulph" 16 } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-9.c b/gcc/testsuite/gcc.target/i386/vect-float16-9.c
new file mode 100644
index 00000000000..1be5c7f022f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-float16-9.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512fp16 -mavx512vl -mprefer-vector-width=256" } */
+
+/* Check that we vectorize to a full 128-bit vector for _Float16 types.  */
+
+void
+foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b,
+     _Float16 *__restrict__ c)
+{
+  for (int i = 0; i < 256; i++)
+    a[i] = b[i] * c[i];
+}
+
+/* { dg-final { scan-assembler-times "vmulph" 16 } } */
-- 
2.27.0

Re: [PATCH 09/62] AVX512FP16: Enable _Float16 autovectorization

Reply via email to