On Thu, Jan 30, 2025 at 05:11:54PM +0100, Jakub Jelinek wrote: > Hi! > > The following testcase is miscompiled on s390x-linux with e.g. -march=z13 > (both -O0 and -O2) starting with r15-7053. > The problem is in the splitters which emulate TImode/V1TImode GT and GTU > comparisons. > For GT we want to do > (ior (gt (hi op1) (hi op2)) > (and (eq (hi op1) (hi op2)) (gtu (lo op1) (lo op2)))) > and for GTU similarly except for gtu instead of gt in there. > Now, the splitter emulation is using V2DImode comparisons where on s390x > the hi part is in the first element of the vector, lo part in the second, > and for the gtu case it swaps the elements of the vector. > So, we get the right result in the first element of the result vector. > But vrepg was then broadcasting the second element of the result vector > rather than the first, and the value of the second element of the vector > is instead > (ior (gt (lo op1) (lo op2)) > (and (eq (lo op1) (lo op2)) (gtu (hi op1) (hi op2)))) > so something not really usable for the emulated comparison. > > The following patch fixes that. The testcase tries to test behavior of > double-word smin/smax/umin/umax with various cases of the halves of both > operands (one that is sometimes EQ, sometimes GT, sometimes LT, sometimes > GTU, sometimes LTU). > > Stefan has successfully bootstrapped/regtested this on s390x-linux (thanks > for that; I'm still in stage3 of LTO profiledbootstrap), ok for trunk?
Ok and thanks again for fixing this so quickly! Cheers, Stefan > > 2025-01-30 Jakub Jelinek <ja...@redhat.com> > Stefan Schulze Frielinghaus <stefa...@gcc.gnu.org> > > PR target/118696 > * config/s390/vector.md (*vec_cmpgt<mode><mode>_nocc_emu, > *vec_cmpgtu<mode><mode>_nocc_emu): Duplicate the first rather than > second V2DImode element. > > * gcc.dg/pr118696.c: New test. > * gcc.target/s390/vector/pr118696.c: New test. > * gcc.target/s390/vector/vec-abs-emu.c: Expect vrepg with 0 as last > operand rather than 1. > * gcc.target/s390/vector/vec-max-emu.c: Likewise. > * gcc.target/s390/vector/vec-min-emu.c: Likewise. > > --- gcc/config/s390/vector.md.jj 2025-01-24 17:37:48.987458141 +0100 > +++ gcc/config/s390/vector.md 2025-01-30 09:10:53.413542300 +0100 > @@ -2166,7 +2166,7 @@ (define_insn_and_split "*vec_cmpgt<mode> > (vec_duplicate:V2DI > (vec_select:DI > (match_dup 4) > - (parallel [(const_int 1)])))) > + (parallel [(const_int 0)])))) > (set (match_dup 0) > (subreg:<MODE> (match_dup 4) 0))] > { > @@ -2198,7 +2198,7 @@ (define_insn_and_split "*vec_cmpgtu<mode > (vec_duplicate:V2DI > (vec_select:DI > (match_dup 4) > - (parallel [(const_int 1)])))) > + (parallel [(const_int 0)])))) > (set (match_dup 0) > (subreg:<MODE> (match_dup 4) 0))] > { > --- gcc/testsuite/gcc.dg/pr118696.c.jj 2025-01-30 09:52:52.064679434 > +0100 > +++ gcc/testsuite/gcc.dg/pr118696.c 2025-01-30 09:52:33.430936447 +0100 > @@ -0,0 +1,131 @@ > +/* PR target/118696 */ > +/* { dg-do run } */ > +/* { dg-options "-O2" } */ > + > +#if __CHAR_BIT__ == 8 > +#if __SIZEOF_INT128__ == 16 && __SIZEOF_LONG_LONG__ == 8 > +#define D __int128 > +#define S long long > +#define M 0x8000000000000000ULL > +#define C 64 > +#elif __SIZEOF_LONG_LONG__ == 8 && __SIZEOF_INT__ == 4 > +#define D long long > +#define S int > +#define M 0x80000000U > +#define C 32 > +#endif > +#endif > + > +#ifdef D > +static inline D > +combine (unsigned S x, unsigned S y) > +{ > + return (unsigned D) x << C | y; > +} > + > +__attribute__((noipa)) D > +smin (D x, D y) > +{ > + return x < y ? x : y; > +} > + > +__attribute__((noipa)) D > +smax (D x, D y) > +{ > + return x > y ? x : y; > +} > + > +__attribute__((noipa)) unsigned D > +umin (unsigned D x, unsigned D y) > +{ > + return x < y ? x : y; > +} > + > +__attribute__((noipa)) unsigned D > +umax (unsigned D x, unsigned D y) > +{ > + return x > y ? x : y; > +} > +#endif > + > +int > +main () > +{ > +#ifdef D > + unsigned S vals[] = { > + 0, 12, 42, M, M | 12, M | 42 > + }; > + unsigned char expected[] = { > + 4,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2, > + 2,2,2,2,0,4,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2,2,2,2, > + 2,2,2,2,2,2,2,2,0,0,4,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2, > + 2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,4,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2, > + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,4,3,3,3,3,3,3,3,3,3,3,3, > + 3,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,4,3,3,3,3,3,3, > + 3,3,3,3,3,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,4,3, > + 3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0, > + 0,0,0,4,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, > + 0,0,0,0,0,0,0,0,4,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2, > + 2,2,2,2,0,0,0,0,0,0,0,0,0,4,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2,2,2,2, > + 2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,4,3,3,3,3,3,3,3,2,2,2,2,2,2, > + 2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,4,3,3,3,3,3,3,2,2, > + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,4,3,3,3, > + 3,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0, > + 0,4,3,3,3,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,0, > + 0,0,0,0,0,0,4,3,3,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0, > + 0,0,0,0,0,0,0,0,0,0,0,4,3,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, > + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2, > + 2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,2,2,2,2,2,2,2,2,2,2, > + 2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,4,3,3,3,3,3, > + 3,3,3,3,3,3,3,3,3,3,3,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,4, > + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, > + 1,1,0,0,4,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,1,1,1,1,1,1,1,1,1,1,1, > + 1,1,1,1,1,1,0,0,0,4,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,1,1,1,1,1,1,1, > + 1,1,1,1,1,1,1,1,1,1,0,0,0,0,4,3,3,3,3,3,3,3,3,3,3,3,3,3,1,1,1,1, > + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,4,3,3,3,3,3,3,3,3,3,3,3,3, > + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,4,3,3,3,3,3,3,3, > + 3,3,3,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,4,3,3, > + 3,3,3,3,3,3,3,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0, > + 0,0,4,3,3,3,3,3,3,3,3,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0, > + 0,0,0,0,0,0,0,4,3,3,3,3,3,3,3,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, > + 1,1,0,0,0,0,0,0,0,0,0,0,4,3,3,3,3,3,3,3,1,1,1,1,1,1,1,1,1,1,1,1, > + 1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,4,3,3,3,3,3,3,1,1,1,1,1,1,1,1, > + 1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,4,3,3,3,3,3,1,1,1,1, > + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,4,3,3,3,3, > + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, > + 4,3,3,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0, > + 0,0,0,0,0,4,3,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0, > + 0,0,0,0,0,0,0,0,0,0,4,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0, > + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4 > + }; > + int m = 0; > + for (int i = 0; i < 6; ++i) > + for (int j = 0; j < 6; ++j) > + for (int k = 0; k < 6; ++k) > + for (int l = 0; l < 6; ++l) > + { > + D a = combine (vals[i], vals[j]); > + D b = combine (vals[k], vals[l]); > + int r = 0; > + r |= (smin (a, b) == a) * 1; > + r |= (smin (a, b) == b) * 2; > + r |= (smax (a, b) == a) * 4; > + r |= (smax (a, b) == b) * 8; > + r |= (umin (a, b) == a) * 16; > + r |= (umin (a, b) == b) * 32; > + r |= (umax (a, b) == a) * 64; > + r |= (umax (a, b) == b) * 128; > + switch (r) > + { > + case 102: r = 0; break; > + case 105: r = 1; break; > + case 150: r = 2; break; > + case 153: r = 3; break; > + case 255: r = 4; break; > + default: __builtin_abort (); > + } > + if (r != expected[m++]) > + __builtin_abort (); > + } > +#endif > +} > --- gcc/testsuite/gcc.target/s390/vector/pr118696.c.jj 2025-01-30 > 09:55:53.134181945 +0100 > +++ gcc/testsuite/gcc.target/s390/vector/pr118696.c 2025-01-30 > 09:55:47.433260572 +0100 > @@ -0,0 +1,5 @@ > +/* PR target/118696 */ > +/* { dg-do run } */ > +/* { dg-options "-O2 -mzarch -march=z13" } */ > + > +#include "../../../gcc.dg/pr118696.c" > --- gcc/testsuite/gcc.target/s390/vector/vec-abs-emu.c.jj 2025-01-20 > 10:23:38.572601427 +0100 > +++ gcc/testsuite/gcc.target/s390/vector/vec-abs-emu.c 2025-01-30 > 16:58:51.853928339 +0100 > @@ -18,7 +18,7 @@ typedef __attribute__ ((vector_size (16) > ** vchg %v[0-9]+,%v[0-9]+,%v[0-9]+ > ** vn %v[0-9]+,%v[0-9]+,%v[0-9]+ > ** vo %v[0-9]+,%v[0-9]+,%v[0-9]+ > -** vrepg %v[0-9]+,%v[0-9]+,1 > +** vrepg %v[0-9]+,%v[0-9]+,0 > ** vsq %v[0-9]+,%v[0-9]+,%v[0-9]+ > ** vsel %v[0-9]+,%v[0-9]+,%v[0-9]+,%v[0-9]+ > ** br %r14 > --- gcc/testsuite/gcc.target/s390/vector/vec-max-emu.c.jj 2025-01-20 > 10:23:38.572601427 +0100 > +++ gcc/testsuite/gcc.target/s390/vector/vec-max-emu.c 2025-01-30 > 16:59:06.279729311 +0100 > @@ -17,7 +17,7 @@ typedef __attribute__ ((vector_size (16) > ** vchg %v[0-9]+,%v[0-9]+,%v[0-9]+ > ** vn %v[0-9]+,%v[0-9]+,%v[0-9]+ > ** vo %v[0-9]+,%v[0-9]+,%v[0-9]+ > -** vrepg %v[0-9]+,%v[0-9]+,1 > +** vrepg %v[0-9]+,%v[0-9]+,0 > ** vsel %v[0-9]+,%v[0-9]+,%v[0-9]+,%v[0-9]+ > ** br %r14 > */ > --- gcc/testsuite/gcc.target/s390/vector/vec-min-emu.c.jj 2025-01-20 > 10:23:38.572601427 +0100 > +++ gcc/testsuite/gcc.target/s390/vector/vec-min-emu.c 2025-01-30 > 16:59:00.054815191 +0100 > @@ -17,7 +17,7 @@ typedef __attribute__ ((vector_size (16) > ** vchg %v[0-9]+,%v[0-9]+,%v[0-9]+ > ** vn %v[0-9]+,%v[0-9]+,%v[0-9]+ > ** vo %v[0-9]+,%v[0-9]+,%v[0-9]+ > -** vrepg %v[0-9]+,%v[0-9]+,1 > +** vrepg %v[0-9]+,%v[0-9]+,0 > ** vsel %v[0-9]+,%v[0-9]+,%v[0-9]+,%v[0-9]+ > ** br %r14 > */ > > Jakub >