Yes, updated patch. gcc/ChangeLog: PR target/102812 * config/i386/i386.c (ix86_get_ssemov): Adjust HFmode vector move to use the same logic as HImode.
gcc/testsuite/ChangeLog: PR target/102812 * gcc.target/i386/pr102812.c: New test. --- gcc/config/i386/i386.c | 15 ++++++++++++--- gcc/testsuite/gcc.target/i386/pr102812.c | 12 ++++++++++++ 2 files changed, 24 insertions(+), 3 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr102812.c diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 9cc903e826b..159684ce549 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -5399,9 +5399,18 @@ ix86_get_ssemov (rtx *operands, unsigned size, switch (scalar_mode) { case E_HFmode: - opcode = (misaligned_p - ? (TARGET_AVX512BW ? "vmovdqu16" : "vmovdqu64") - : "vmovdqa64"); + if (evex_reg_p) + opcode = (misaligned_p + ? (TARGET_AVX512BW + ? "vmovdqu16" + : "vmovdqu64") + : "vmovdqa64"); + else + opcode = (misaligned_p + ? (TARGET_AVX512BW + ? "vmovdqu16" + : "%vmovdqu") + : "%vmovdqa"); break; case E_SFmode: opcode = misaligned_p ? "%vmovups" : "%vmovaps"; diff --git a/gcc/testsuite/gcc.target/i386/pr102812.c b/gcc/testsuite/gcc.target/i386/pr102812.c new file mode 100644 index 00000000000..bad4fa9394e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr102812.c @@ -0,0 +1,12 @@ +/* PR target/102812 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse4 -mno-avx" } */ +/* { dg-final { scan-assembler-not "vmovdqa64\t" } } */ +/* { dg-final { scan-assembler "movdqa\t" } } */ + +typedef _Float16 v8hf __attribute__((__vector_size__ (16))); + +v8hf t (_Float16 a) +{ + return (v8hf) {a, 0, 0, 0, 0, 0, 0, 0}; +} -- 2.18.1 Hongtao Liu via Gcc-patches <gcc-patches@gcc.gnu.org> 于2021年10月21日周四 下午1:24写道: > > On Wed, Oct 20, 2021 at 1:31 PM Hongyu Wang via Gcc-patches > <gcc-patches@gcc.gnu.org> wrote: > > > > Since _Float16 type is enabled under sse2 target, returning > > V8HFmode vector without AVX512F target would generate wrong > > vmovdqa64 instruction. Adjust ix86_get_ssemov to avoid this. > > > > Bootstraped/regtested on x86_64-pc-linux-gnu{-m32,} and sde. > > > > OK for master? > > > > gcc/ChangeLog: > > PR target/102812 > > * config/i386/i386.c (ix86_get_ssemov): Adjust HFmode vector > > move without AVX512F target. > > > > gcc/testsuite/ChangeLog: > > PR target/102812 > > * gcc.target/i386/pr102812.c: New test. > > --- > > gcc/config/i386/i386.c | 9 ++++++--- > > gcc/testsuite/gcc.target/i386/pr102812.c | 12 ++++++++++++ > > 2 files changed, 18 insertions(+), 3 deletions(-) > > create mode 100644 gcc/testsuite/gcc.target/i386/pr102812.c > > > > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c > > index 9cc903e826b..1d79180da9a 100644 > > --- a/gcc/config/i386/i386.c > > +++ b/gcc/config/i386/i386.c > > @@ -5399,9 +5399,12 @@ ix86_get_ssemov (rtx *operands, unsigned size, > > switch (scalar_mode) > > { > > case E_HFmode: > > - opcode = (misaligned_p > > - ? (TARGET_AVX512BW ? "vmovdqu16" : "vmovdqu64") > > - : "vmovdqa64"); > > + if (!TARGET_AVX512F) > > + opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa"; > > + else > > + opcode = (misaligned_p > > + ? (TARGET_AVX512BW ? "vmovdqu16" : "vmovdqu64") > > + : "vmovdqa64"); > > break; > Could we just use similar logic as HI? > > case E_HImode: > if (evex_reg_p) > opcode = (need_unaligned_p > ? (TARGET_AVX512BW > ? "vmovdqu16" > : "vmovdqu64") > : "vmovdqa64"); > else > opcode = (need_unaligned_p > ? (TARGET_AVX512BW > ? "vmovdqu16" > : "%vmovdqu") > : "%vmovdqa"); > break; > > > case E_SFmode: > > opcode = misaligned_p ? "%vmovups" : "%vmovaps"; > > diff --git a/gcc/testsuite/gcc.target/i386/pr102812.c > > b/gcc/testsuite/gcc.target/i386/pr102812.c > > new file mode 100644 > > index 00000000000..bad4fa9394e > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/i386/pr102812.c > > @@ -0,0 +1,12 @@ > > +/* PR target/102812 */ > > +/* { dg-do compile } */ > > +/* { dg-options "-O2 -msse4 -mno-avx" } */ > > +/* { dg-final { scan-assembler-not "vmovdqa64\t" } } */ > > +/* { dg-final { scan-assembler "movdqa\t" } } */ > > + > > +typedef _Float16 v8hf __attribute__((__vector_size__ (16))); > > + > > +v8hf t (_Float16 a) > > +{ > > + return (v8hf) {a, 0, 0, 0, 0, 0, 0, 0}; > > +} > > -- > > 2.18.1 > > > > > -- > BR, > Hongtao