Yes, updated patch.

gcc/ChangeLog:
PR target/102812
* config/i386/i386.c (ix86_get_ssemov): Adjust HFmode vector
move to use the same logic as HImode.

gcc/testsuite/ChangeLog:
PR target/102812
* gcc.target/i386/pr102812.c: New test.
---
 gcc/config/i386/i386.c                   | 15 ++++++++++++---
 gcc/testsuite/gcc.target/i386/pr102812.c | 12 ++++++++++++
 2 files changed, 24 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102812.c

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 9cc903e826b..159684ce549 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -5399,9 +5399,18 @@ ix86_get_ssemov (rtx *operands, unsigned size,
       switch (scalar_mode)
  {
  case E_HFmode:
-   opcode = (misaligned_p
-     ? (TARGET_AVX512BW ? "vmovdqu16" : "vmovdqu64")
-     : "vmovdqa64");
+   if (evex_reg_p)
+     opcode = (misaligned_p
+       ? (TARGET_AVX512BW
+ ? "vmovdqu16"
+ : "vmovdqu64")
+       : "vmovdqa64");
+   else
+     opcode = (misaligned_p
+       ? (TARGET_AVX512BW
+ ? "vmovdqu16"
+ : "%vmovdqu")
+       : "%vmovdqa");
    break;
  case E_SFmode:
    opcode = misaligned_p ? "%vmovups" : "%vmovaps";
diff --git a/gcc/testsuite/gcc.target/i386/pr102812.c
b/gcc/testsuite/gcc.target/i386/pr102812.c
new file mode 100644
index 00000000000..bad4fa9394e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102812.c
@@ -0,0 +1,12 @@
+/* PR target/102812 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse4 -mno-avx" } */
+/* { dg-final { scan-assembler-not "vmovdqa64\t" } } */
+/* { dg-final { scan-assembler "movdqa\t" } } */
+
+typedef _Float16 v8hf __attribute__((__vector_size__ (16)));
+
+v8hf t (_Float16 a)
+{
+    return (v8hf) {a, 0, 0, 0, 0, 0, 0, 0};
+}
-- 
2.18.1

Hongtao Liu via Gcc-patches <gcc-patches@gcc.gnu.org> 于2021年10月21日周四 下午1:24写道:
>
> On Wed, Oct 20, 2021 at 1:31 PM Hongyu Wang via Gcc-patches
> <gcc-patches@gcc.gnu.org> wrote:
> >
> > Since _Float16 type is enabled under sse2 target, returning
> > V8HFmode vector without AVX512F target would generate wrong
> > vmovdqa64 instruction. Adjust ix86_get_ssemov to avoid this.
> >
> > Bootstraped/regtested on x86_64-pc-linux-gnu{-m32,} and sde.
> >
> > OK for master?
> >
> > gcc/ChangeLog:
> >         PR target/102812
> >         * config/i386/i386.c (ix86_get_ssemov): Adjust HFmode vector
> >         move without AVX512F target.
> >
> > gcc/testsuite/ChangeLog:
> >         PR target/102812
> >         * gcc.target/i386/pr102812.c: New test.
> > ---
> >  gcc/config/i386/i386.c                   |  9 ++++++---
> >  gcc/testsuite/gcc.target/i386/pr102812.c | 12 ++++++++++++
> >  2 files changed, 18 insertions(+), 3 deletions(-)
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102812.c
> >
> > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> > index 9cc903e826b..1d79180da9a 100644
> > --- a/gcc/config/i386/i386.c
> > +++ b/gcc/config/i386/i386.c
> > @@ -5399,9 +5399,12 @@ ix86_get_ssemov (rtx *operands, unsigned size,
> >        switch (scalar_mode)
> >         {
> >         case E_HFmode:
> > -         opcode = (misaligned_p
> > -                   ? (TARGET_AVX512BW ? "vmovdqu16" : "vmovdqu64")
> > -                   : "vmovdqa64");
> > +         if (!TARGET_AVX512F)
> > +           opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
> > +         else
> > +           opcode = (misaligned_p
> > +                     ? (TARGET_AVX512BW ? "vmovdqu16" : "vmovdqu64")
> > +                     : "vmovdqa64");
> >           break;
> Could we just use similar logic as HI?
>
>         case E_HImode:
>           if (evex_reg_p)
>             opcode = (need_unaligned_p
>                       ? (TARGET_AVX512BW
>                          ? "vmovdqu16"
>                          : "vmovdqu64")
>                       : "vmovdqa64");
>           else
>             opcode = (need_unaligned_p
>                       ? (TARGET_AVX512BW
>                          ? "vmovdqu16"
>                          : "%vmovdqu")
>                       : "%vmovdqa");
>           break;
>
> >         case E_SFmode:
> >           opcode = misaligned_p ? "%vmovups" : "%vmovaps";
> > diff --git a/gcc/testsuite/gcc.target/i386/pr102812.c 
> > b/gcc/testsuite/gcc.target/i386/pr102812.c
> > new file mode 100644
> > index 00000000000..bad4fa9394e
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr102812.c
> > @@ -0,0 +1,12 @@
> > +/* PR target/102812 */
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2 -msse4 -mno-avx" } */
> > +/* { dg-final { scan-assembler-not "vmovdqa64\t" } } */
> > +/* { dg-final { scan-assembler "movdqa\t" } } */
> > +
> > +typedef _Float16 v8hf __attribute__((__vector_size__ (16)));
> > +
> > +v8hf t (_Float16 a)
> > +{
> > +    return (v8hf) {a, 0, 0, 0, 0, 0, 0, 0};
> > +}
> > --
> > 2.18.1
> >
>
>
> --
> BR,
> Hongtao

Reply via email to