On Thu, Jan 30, 2025 at 05:11:54PM +0100, Jakub Jelinek wrote:
> Hi!
> 
> The following testcase is miscompiled on s390x-linux with e.g. -march=z13
> (both -O0 and -O2) starting with r15-7053.
> The problem is in the splitters which emulate TImode/V1TImode GT and GTU
> comparisons.
> For GT we want to do
> (ior (gt (hi op1) (hi op2))
>      (and (eq (hi op1) (hi op2)) (gtu (lo op1) (lo op2))))
> and for GTU similarly except for gtu instead of gt in there.
> Now, the splitter emulation is using V2DImode comparisons where on s390x
> the hi part is in the first element of the vector, lo part in the second,
> and for the gtu case it swaps the elements of the vector.
> So, we get the right result in the first element of the result vector.
> But vrepg was then broadcasting the second element of the result vector
> rather than the first, and the value of the second element of the vector
> is instead
> (ior (gt (lo op1) (lo op2))
>      (and (eq (lo op1) (lo op2)) (gtu (hi op1) (hi op2))))
> so something not really usable for the emulated comparison.
> 
> The following patch fixes that.  The testcase tries to test behavior of
> double-word smin/smax/umin/umax with various cases of the halves of both
> operands (one that is sometimes EQ, sometimes GT, sometimes LT, sometimes
> GTU, sometimes LTU).
> 
> Stefan has successfully bootstrapped/regtested this on s390x-linux (thanks
> for that; I'm still in stage3 of LTO profiledbootstrap), ok for trunk?

Ok and thanks again for fixing this so quickly!

Cheers,
Stefan

> 
> 2025-01-30  Jakub Jelinek  <ja...@redhat.com>
>           Stefan Schulze Frielinghaus  <stefa...@gcc.gnu.org>
> 
>       PR target/118696
>       * config/s390/vector.md (*vec_cmpgt<mode><mode>_nocc_emu,
>       *vec_cmpgtu<mode><mode>_nocc_emu): Duplicate the first rather than
>       second V2DImode element.
> 
>       * gcc.dg/pr118696.c: New test.
>       * gcc.target/s390/vector/pr118696.c: New test.
>       * gcc.target/s390/vector/vec-abs-emu.c: Expect vrepg with 0 as last
>       operand rather than 1.
>       * gcc.target/s390/vector/vec-max-emu.c: Likewise.
>       * gcc.target/s390/vector/vec-min-emu.c: Likewise.
> 
> --- gcc/config/s390/vector.md.jj      2025-01-24 17:37:48.987458141 +0100
> +++ gcc/config/s390/vector.md 2025-01-30 09:10:53.413542300 +0100
> @@ -2166,7 +2166,7 @@ (define_insn_and_split "*vec_cmpgt<mode>
>       (vec_duplicate:V2DI
>        (vec_select:DI
>         (match_dup 4)
> -       (parallel [(const_int 1)]))))
> +       (parallel [(const_int 0)]))))
>     (set (match_dup 0)
>       (subreg:<MODE> (match_dup 4) 0))]
>  {
> @@ -2198,7 +2198,7 @@ (define_insn_and_split "*vec_cmpgtu<mode
>       (vec_duplicate:V2DI
>        (vec_select:DI
>         (match_dup 4)
> -       (parallel [(const_int 1)]))))
> +       (parallel [(const_int 0)]))))
>     (set (match_dup 0)
>       (subreg:<MODE> (match_dup 4) 0))]
>  {
> --- gcc/testsuite/gcc.dg/pr118696.c.jj        2025-01-30 09:52:52.064679434 
> +0100
> +++ gcc/testsuite/gcc.dg/pr118696.c   2025-01-30 09:52:33.430936447 +0100
> @@ -0,0 +1,131 @@
> +/* PR target/118696 */
> +/* { dg-do run } */
> +/* { dg-options "-O2" } */
> +
> +#if __CHAR_BIT__ == 8
> +#if __SIZEOF_INT128__ == 16 && __SIZEOF_LONG_LONG__ == 8
> +#define D __int128
> +#define S long long
> +#define M 0x8000000000000000ULL
> +#define C 64
> +#elif __SIZEOF_LONG_LONG__ == 8 && __SIZEOF_INT__ == 4
> +#define D long long
> +#define S int
> +#define M 0x80000000U
> +#define C 32
> +#endif
> +#endif
> +
> +#ifdef D
> +static inline D
> +combine (unsigned S x, unsigned S y)
> +{
> +  return (unsigned D) x << C | y;
> +}
> +
> +__attribute__((noipa)) D
> +smin (D x, D y)
> +{
> +  return x < y ? x : y;
> +}
> +
> +__attribute__((noipa)) D
> +smax (D x, D y)
> +{
> +  return x > y ? x : y;
> +}
> +
> +__attribute__((noipa)) unsigned D
> +umin (unsigned D x, unsigned D y)
> +{
> +  return x < y ? x : y;
> +}
> +
> +__attribute__((noipa)) unsigned D
> +umax (unsigned D x, unsigned D y)
> +{
> +  return x > y ? x : y;
> +}
> +#endif
> +
> +int
> +main ()
> +{
> +#ifdef D
> +  unsigned S vals[] = {
> +    0, 12, 42, M, M | 12, M | 42
> +  };
> +  unsigned char expected[] = {
> +    4,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
> +    2,2,2,2,0,4,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2,2,2,2,
> +    2,2,2,2,2,2,2,2,0,0,4,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,
> +    2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,4,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,
> +    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,4,3,3,3,3,3,3,3,3,3,3,3,
> +    3,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,4,3,3,3,3,3,3,
> +    3,3,3,3,3,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,4,3,
> +    3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,
> +    0,0,0,4,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
> +    0,0,0,0,0,0,0,0,4,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
> +    2,2,2,2,0,0,0,0,0,0,0,0,0,4,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2,2,2,2,
> +    2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,4,3,3,3,3,3,3,3,2,2,2,2,2,2,
> +    2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,4,3,3,3,3,3,3,2,2,
> +    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,4,3,3,3,
> +    3,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,
> +    0,4,3,3,3,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,0,
> +    0,0,0,0,0,0,4,3,3,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,
> +    0,0,0,0,0,0,0,0,0,0,0,4,3,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
> +    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
> +    2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,2,2,2,2,2,2,2,2,2,2,
> +    2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,4,3,3,3,3,3,
> +    3,3,3,3,3,3,3,3,3,3,3,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,4,
> +    3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
> +    1,1,0,0,4,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,1,1,1,1,1,1,1,1,1,1,1,
> +    1,1,1,1,1,1,0,0,0,4,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,1,1,1,1,1,1,1,
> +    1,1,1,1,1,1,1,1,1,1,0,0,0,0,4,3,3,3,3,3,3,3,3,3,3,3,3,3,1,1,1,1,
> +    1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,4,3,3,3,3,3,3,3,3,3,3,3,3,
> +    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,4,3,3,3,3,3,3,3,
> +    3,3,3,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,4,3,3,
> +    3,3,3,3,3,3,3,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,
> +    0,0,4,3,3,3,3,3,3,3,3,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,
> +    0,0,0,0,0,0,0,4,3,3,3,3,3,3,3,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
> +    1,1,0,0,0,0,0,0,0,0,0,0,4,3,3,3,3,3,3,3,1,1,1,1,1,1,1,1,1,1,1,1,
> +    1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,4,3,3,3,3,3,3,1,1,1,1,1,1,1,1,
> +    1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,4,3,3,3,3,3,1,1,1,1,
> +    1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,4,3,3,3,3,
> +    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
> +    4,3,3,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,
> +    0,0,0,0,0,4,3,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,
> +    0,0,0,0,0,0,0,0,0,0,4,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,
> +    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4
> +  };
> +  int m = 0;
> +  for (int i = 0; i < 6; ++i)
> +    for (int j = 0; j < 6; ++j)
> +      for (int k = 0; k < 6; ++k)
> +     for (int l = 0; l < 6; ++l)
> +       {
> +         D a = combine (vals[i], vals[j]);
> +         D b = combine (vals[k], vals[l]);
> +         int r = 0;
> +         r |= (smin (a, b) == a) * 1;
> +         r |= (smin (a, b) == b) * 2;
> +         r |= (smax (a, b) == a) * 4;
> +         r |= (smax (a, b) == b) * 8;
> +         r |= (umin (a, b) == a) * 16;
> +         r |= (umin (a, b) == b) * 32;
> +         r |= (umax (a, b) == a) * 64;
> +         r |= (umax (a, b) == b) * 128;
> +         switch (r)
> +           {
> +           case 102: r = 0; break;
> +           case 105: r = 1; break;
> +           case 150: r = 2; break;
> +           case 153: r = 3; break;
> +           case 255: r = 4; break;
> +           default: __builtin_abort ();
> +           }
> +         if (r != expected[m++])
> +           __builtin_abort ();
> +       }
> +#endif
> +}
> --- gcc/testsuite/gcc.target/s390/vector/pr118696.c.jj        2025-01-30 
> 09:55:53.134181945 +0100
> +++ gcc/testsuite/gcc.target/s390/vector/pr118696.c   2025-01-30 
> 09:55:47.433260572 +0100
> @@ -0,0 +1,5 @@
> +/* PR target/118696 */
> +/* { dg-do run } */
> +/* { dg-options "-O2 -mzarch -march=z13" } */
> +
> +#include "../../../gcc.dg/pr118696.c"
> --- gcc/testsuite/gcc.target/s390/vector/vec-abs-emu.c.jj     2025-01-20 
> 10:23:38.572601427 +0100
> +++ gcc/testsuite/gcc.target/s390/vector/vec-abs-emu.c        2025-01-30 
> 16:58:51.853928339 +0100
> @@ -18,7 +18,7 @@ typedef __attribute__ ((vector_size (16)
>  **   vchg    %v[0-9]+,%v[0-9]+,%v[0-9]+
>  **   vn      %v[0-9]+,%v[0-9]+,%v[0-9]+
>  **   vo      %v[0-9]+,%v[0-9]+,%v[0-9]+
> -**   vrepg   %v[0-9]+,%v[0-9]+,1
> +**   vrepg   %v[0-9]+,%v[0-9]+,0
>  **   vsq     %v[0-9]+,%v[0-9]+,%v[0-9]+
>  **   vsel    %v[0-9]+,%v[0-9]+,%v[0-9]+,%v[0-9]+
>  **   br      %r14
> --- gcc/testsuite/gcc.target/s390/vector/vec-max-emu.c.jj     2025-01-20 
> 10:23:38.572601427 +0100
> +++ gcc/testsuite/gcc.target/s390/vector/vec-max-emu.c        2025-01-30 
> 16:59:06.279729311 +0100
> @@ -17,7 +17,7 @@ typedef __attribute__ ((vector_size (16)
>  **   vchg    %v[0-9]+,%v[0-9]+,%v[0-9]+
>  **   vn      %v[0-9]+,%v[0-9]+,%v[0-9]+
>  **   vo      %v[0-9]+,%v[0-9]+,%v[0-9]+
> -**   vrepg   %v[0-9]+,%v[0-9]+,1
> +**   vrepg   %v[0-9]+,%v[0-9]+,0
>  **   vsel    %v[0-9]+,%v[0-9]+,%v[0-9]+,%v[0-9]+
>  **   br      %r14
>  */
> --- gcc/testsuite/gcc.target/s390/vector/vec-min-emu.c.jj     2025-01-20 
> 10:23:38.572601427 +0100
> +++ gcc/testsuite/gcc.target/s390/vector/vec-min-emu.c        2025-01-30 
> 16:59:00.054815191 +0100
> @@ -17,7 +17,7 @@ typedef __attribute__ ((vector_size (16)
>  **   vchg    %v[0-9]+,%v[0-9]+,%v[0-9]+
>  **   vn      %v[0-9]+,%v[0-9]+,%v[0-9]+
>  **   vo      %v[0-9]+,%v[0-9]+,%v[0-9]+
> -**   vrepg   %v[0-9]+,%v[0-9]+,1
> +**   vrepg   %v[0-9]+,%v[0-9]+,0
>  **   vsel    %v[0-9]+,%v[0-9]+,%v[0-9]+,%v[0-9]+
>  **   br      %r14
>  */
> 
>       Jakub
> 

Reply via email to