On Wed, Oct 20, 2021 at 1:31 PM Hongyu Wang via Gcc-patches <gcc-patches@gcc.gnu.org> wrote: > > Since _Float16 type is enabled under sse2 target, returning > V8HFmode vector without AVX512F target would generate wrong > vmovdqa64 instruction. Adjust ix86_get_ssemov to avoid this. > > Bootstraped/regtested on x86_64-pc-linux-gnu{-m32,} and sde. > > OK for master? > > gcc/ChangeLog: > PR target/102812 > * config/i386/i386.c (ix86_get_ssemov): Adjust HFmode vector > move without AVX512F target. > > gcc/testsuite/ChangeLog: > PR target/102812 > * gcc.target/i386/pr102812.c: New test. > --- > gcc/config/i386/i386.c | 9 ++++++--- > gcc/testsuite/gcc.target/i386/pr102812.c | 12 ++++++++++++ > 2 files changed, 18 insertions(+), 3 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/i386/pr102812.c > > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c > index 9cc903e826b..1d79180da9a 100644 > --- a/gcc/config/i386/i386.c > +++ b/gcc/config/i386/i386.c > @@ -5399,9 +5399,12 @@ ix86_get_ssemov (rtx *operands, unsigned size, > switch (scalar_mode) > { > case E_HFmode: > - opcode = (misaligned_p > - ? (TARGET_AVX512BW ? "vmovdqu16" : "vmovdqu64") > - : "vmovdqa64"); > + if (!TARGET_AVX512F) > + opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa"; > + else > + opcode = (misaligned_p > + ? (TARGET_AVX512BW ? "vmovdqu16" : "vmovdqu64") > + : "vmovdqa64"); > break; Could we just use similar logic as HI?
case E_HImode: if (evex_reg_p) opcode = (need_unaligned_p ? (TARGET_AVX512BW ? "vmovdqu16" : "vmovdqu64") : "vmovdqa64"); else opcode = (need_unaligned_p ? (TARGET_AVX512BW ? "vmovdqu16" : "%vmovdqu") : "%vmovdqa"); break; > case E_SFmode: > opcode = misaligned_p ? "%vmovups" : "%vmovaps"; > diff --git a/gcc/testsuite/gcc.target/i386/pr102812.c > b/gcc/testsuite/gcc.target/i386/pr102812.c > new file mode 100644 > index 00000000000..bad4fa9394e > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr102812.c > @@ -0,0 +1,12 @@ > +/* PR target/102812 */ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -msse4 -mno-avx" } */ > +/* { dg-final { scan-assembler-not "vmovdqa64\t" } } */ > +/* { dg-final { scan-assembler "movdqa\t" } } */ > + > +typedef _Float16 v8hf __attribute__((__vector_size__ (16))); > + > +v8hf t (_Float16 a) > +{ > + return (v8hf) {a, 0, 0, 0, 0, 0, 0, 0}; > +} > -- > 2.18.1 > -- BR, Hongtao