> >>> On 13.12.17 at 10:33, wrote: > > Simply mirror the MODE_XI logic of handling unaligned operands in > > mov<mode>_internal into MODE_TI / MODE_OI handling. > > > > gcc/ > > 2017-12-13 Jan Beulich <jbeul...@suse.com> > > > > * sse.md (mov<mode>_internal): Tighten condition for when to use > > vmovdqu<ssescalarsize> for TI and OI modes. > > > > gcc/testsuite/ > > 2017-12-13 Jan Beulich <jbeul...@suse.com> > > > > * gcc.target/i386/avx512vl-no-vmovdqu8.c, > > gcc.target/i386/avx512vl-no-vmovdqu16.c: New.
Looks OK. We do not need to update instruction attribute becuase we make no difference between those instructions and both ends up being ssemov? Honza > > > > --- > > I'm also being puzzled by the code being generated for the 256-bit cases > > (which shouldn't differ much from the 128-bit ones). > > > > --- a/gcc/config/i386/sse.md > > +++ b/gcc/config/i386/sse.md > > @@ -1005,8 +1005,14 @@ > > case MODE_TI: > > if (misaligned_operand (operands[0], <MODE>mode) > > || misaligned_operand (operands[1], <MODE>mode)) > > - return TARGET_AVX512VL ? "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}" > > - : "%vmovdqu\t{%1, %0|%0, %1}"; > > + return TARGET_AVX512VL > > + && (<MODE>mode == V4SImode > > + || <MODE>mode == V2DImode > > + || <MODE>mode == V8SImode > > + || <MODE>mode == V4DImode > > + || TARGET_AVX512BW) > > + ? "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}" > > + : "%vmovdqu\t{%1, %0|%0, %1}"; > > else > > return TARGET_AVX512VL ? "vmovdqa64\t{%1, %0|%0, %1}" > > : "%vmovdqa\t{%1, %0|%0, %1}"; > > --- a/gcc/testsuite/gcc.target/i386/avx512vl-no-vmovdqu16.c > > +++ b/gcc/testsuite/gcc.target/i386/avx512vl-no-vmovdqu16.c > > @@ -0,0 +1,24 @@ > > +/* { dg-do compile } */ > > +/* { dg-options "-O2 -mavx512vl -mno-avx512bw" } */ > > + > > +typedef unsigned int __attribute__((mode(HI), vector_size(16))) v8hi_t; > > +typedef unsigned int __attribute__((mode(HI), vector_size(32))) v16hi_t; > > + > > +struct s8hi { > > + int i; > > + v8hi_t __attribute__((packed)) v; > > +}; > > +struct s16hi { > > + int i; > > + v16hi_t __attribute__((packed)) v; > > +}; > > + > > +void f8hi(struct s8hi*p1, const struct s8hi*p2) { > > + p1->v += p2->v; > > +} > > + > > +void f16hi(struct s16hi*p1, const struct s16hi*p2) { > > + p1->v += p2->v; > > +} > > + > > +/* { dg-final { scan-assembler-not "^\[ \t\]*vmovdq\[au\](8|16)" } } */ > > --- a/gcc/testsuite/gcc.target/i386/avx512vl-no-vmovdqu8.c > > +++ b/gcc/testsuite/gcc.target/i386/avx512vl-no-vmovdqu8.c > > @@ -0,0 +1,24 @@ > > +/* { dg-do compile } */ > > +/* { dg-options "-O2 -mavx512vl -mno-avx512bw" } */ > > + > > +typedef unsigned int __attribute__((mode(QI), vector_size(16))) v16qi_t; > > +typedef unsigned int __attribute__((mode(QI), vector_size(32))) v32qi_t; > > + > > +struct s16qi { > > + int i; > > + v16qi_t __attribute__((packed)) v; > > +}; > > +struct s32qi { > > + int i; > > + v32qi_t __attribute__((packed)) v; > > +}; > > + > > +void f16qi(struct s16qi*p1, const struct s16qi*p2) { > > + p1->v += p2->v; > > +} > > + > > +void f32qi(struct s32qi*p1, const struct s32qi*p2) { > > + p1->v += p2->v; > > +} > > + > > +/* { dg-final { scan-assembler-not "^\[ \t\]*vmovdq\[au\](8|16)" } } */ > > > > > > > >