On Wed, Oct 20, 2021 at 1:31 PM Hongyu Wang via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> Since _Float16 type is enabled under sse2 target, returning
> V8HFmode vector without AVX512F target would generate wrong
> vmovdqa64 instruction. Adjust ix86_get_ssemov to avoid this.
>
> Bootstraped/regtested on x86_64-pc-linux-gnu{-m32,} and sde.
>
> OK for master?
>
> gcc/ChangeLog:
>         PR target/102812
>         * config/i386/i386.c (ix86_get_ssemov): Adjust HFmode vector
>         move without AVX512F target.
>
> gcc/testsuite/ChangeLog:
>         PR target/102812
>         * gcc.target/i386/pr102812.c: New test.
> ---
>  gcc/config/i386/i386.c                   |  9 ++++++---
>  gcc/testsuite/gcc.target/i386/pr102812.c | 12 ++++++++++++
>  2 files changed, 18 insertions(+), 3 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102812.c
>
> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> index 9cc903e826b..1d79180da9a 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -5399,9 +5399,12 @@ ix86_get_ssemov (rtx *operands, unsigned size,
>        switch (scalar_mode)
>         {
>         case E_HFmode:
> -         opcode = (misaligned_p
> -                   ? (TARGET_AVX512BW ? "vmovdqu16" : "vmovdqu64")
> -                   : "vmovdqa64");
> +         if (!TARGET_AVX512F)
> +           opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
> +         else
> +           opcode = (misaligned_p
> +                     ? (TARGET_AVX512BW ? "vmovdqu16" : "vmovdqu64")
> +                     : "vmovdqa64");
>           break;
Could we just use similar logic as HI?

        case E_HImode:
          if (evex_reg_p)
            opcode = (need_unaligned_p
                      ? (TARGET_AVX512BW
                         ? "vmovdqu16"
                         : "vmovdqu64")
                      : "vmovdqa64");
          else
            opcode = (need_unaligned_p
                      ? (TARGET_AVX512BW
                         ? "vmovdqu16"
                         : "%vmovdqu")
                      : "%vmovdqa");
          break;

>         case E_SFmode:
>           opcode = misaligned_p ? "%vmovups" : "%vmovaps";
> diff --git a/gcc/testsuite/gcc.target/i386/pr102812.c 
> b/gcc/testsuite/gcc.target/i386/pr102812.c
> new file mode 100644
> index 00000000000..bad4fa9394e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102812.c
> @@ -0,0 +1,12 @@
> +/* PR target/102812 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -msse4 -mno-avx" } */
> +/* { dg-final { scan-assembler-not "vmovdqa64\t" } } */
> +/* { dg-final { scan-assembler "movdqa\t" } } */
> +
> +typedef _Float16 v8hf __attribute__((__vector_size__ (16)));
> +
> +v8hf t (_Float16 a)
> +{
> +    return (v8hf) {a, 0, 0, 0, 0, 0, 0, 0};
> +}
> --
> 2.18.1
>


-- 
BR,
Hongtao

Reply via email to