Hongtao Liu via Gcc-patches <gcc-patches@gcc.gnu.org> writes: > Hi: > If SRC had been assigned a mode narrower than the copy, we can't link > DEST into the chain even they have same > hard_regno_nregs(i.e. HImode/SImode in i386 backend).
In general, changes between modes within the same hard register are OK. Could you explain in more detail what's going wrong? Thanks, Richard > > i.e > kmovw %k0, %edi > vmovd %edi, %xmm2 > vpshuflw $0, %xmm2, %xmm0 > kmovw %k0, %r8d > kmovd %k0, %r9d > ... > - movl %r9d, %r11d > + vmovd %xmm2, %r11d > > Bootstrap and regtested on x86_64-linux-gnu{-m32,}. > Ok for trunk? > > gcc/ChangeLog: > > PR rtl-optimization/98694 > * regcprop.c (copy_value): If SRC had been assigned a mode > narrower than the copy, we can't link DEST into the chain even > they have same hard_regno_nregs(i.e. HImode/SImode in i386 > backend). > > gcc/testsuite/ChangeLog: > > PR rtl-optimization/98694 > * gcc.target/i386/pr98694.c: New test. > > --- > gcc/regcprop.c | 3 +- > gcc/testsuite/gcc.target/i386/pr98694.c | 38 +++++++++++++++++++++++++ > 2 files changed, 40 insertions(+), 1 deletion(-) > create mode 100644 gcc/testsuite/gcc.target/i386/pr98694.c > > diff --git a/gcc/regcprop.c b/gcc/regcprop.c > index dd62cb36013..997516eca07 100644 > --- a/gcc/regcprop.c > +++ b/gcc/regcprop.c > @@ -355,7 +355,8 @@ copy_value (rtx dest, rtx src, struct value_data *vd) > /* If SRC had been assigned a mode narrower than the copy, we can't > link DEST into the chain, because not all of the pieces of the > copy came from oldest_regno. */ > - else if (sn > hard_regno_nregs (sr, vd->e[sr].mode)) > + else if (sn > hard_regno_nregs (sr, vd->e[sr].mode) > + || partial_subreg_p (vd->e[sr].mode, GET_MODE (src))) > return; > > /* Link DR at the end of the value chain used by SR. */ > diff --git a/gcc/testsuite/gcc.target/i386/pr98694.c > b/gcc/testsuite/gcc.target/i386/pr98694.c > new file mode 100644 > index 00000000000..611f9e77627 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr98694.c > @@ -0,0 +1,38 @@ > +/* PR rtl-optimization/98694 */ > +/* { dg-do run { target { ! ia32 } } } */ > +/* { dg-options "-O2 -mavx512bw" } */ > +/* { dg-require-effective-target avx512bw } */ > + > +#include<immintrin.h> > +typedef short v4hi __attribute__ ((vector_size (8))); > +typedef int v2si __attribute__ ((vector_size (8))); > +v4hi b; > + > +__attribute__ ((noipa)) > +v2si > +foo (__m512i src1, __m512i src2) > +{ > + __mmask64 m = _mm512_cmpeq_epu8_mask (src1, src2); > + short s = (short) m; > + int i = (int)m; > + b = __extension__ (v4hi) {s, s, s, s}; > + return __extension__ (v2si) {i, i}; > +} > + > +int main () > +{ > + __m512i src1 = _mm512_setzero_si512 (); > + __m512i src2 = _mm512_set_epi8 (0, 1, 0, 1, 0, 1, 0, 1, > + 0, 1, 0, 1, 0, 1, 0, 1, > + 0, 1, 0, 1, 0, 1, 0, 1, > + 0, 1, 0, 1, 0, 1, 0, 1, > + 0, 1, 0, 1, 0, 1, 0, 1, > + 0, 1, 0, 1, 0, 1, 0, 1, > + 0, 1, 0, 1, 0, 1, 0, 1, > + 0, 1, 0, 1, 0, 1, 0, 1); > + __mmask64 m = _mm512_cmpeq_epu8_mask (src1, src2); > + v2si a = foo (src1, src2); > + if (a[0] != (int)m) > + __builtin_abort (); > + return 0; > +} > --