Hi!

The following testcase is miscompiled I believe starting with
PR112941 r14-6742.  That commit fixed the bitint-55.c testcase.
The m_first initialization for such conversion initializes 2 SSA_NAMEs,
one is PHI result on the loop (m_data[save_data_cnt]) and the other
(m_data[save_data_cnt+1]) is the argument of that PHI from the latch
edge initialized somewhere in the loop.  Both of these are used to
propagate sign extension (i.e. either 0 or all ones limb) from the
iteration with the sign bit of a narrower type to following iterations.
The bitint-55.c testcase was ICEing with invalid SSA forms as it was 
using unconditionally the PHI argument SSA_NAME even in places which
weren't dominated by that.  And the code which was touched is about
handling constant idx, so if e.g. there are nested casts and the
outer one does conditional code based on index comparison with
a particular constant index.
In the following testcase there are 2 nested casts, one from signed
_BitInt(129) to unsigned _BitInt(255) and the outer from unsigned
_BitInt(255) to unsigned _BitInt(256).  The m_upward_2limbs case which
is used for handling mergeable arithmetics (like +-|&^ and casts etc.)
one loop iteration handles 2 limbs, the first half the even ones, the
second half the odd ones.
And for these 2 conversions, the special one for the inner conversion
on x86_64 is with index 2 where the sign bit of _BitInt(129) is present,
while for the outer one index 3 where we need to mask off the most
significant bit.
The r15-6742 change started using m_data[save_data_cnt] for all constant
indexes if it is still inside of the loop (and it is sign extension).
But that doesn't work correctly for the case where the inner conversion
produces the sign extension limb in the loop for an even index and
the outer conversion needs to special case the immediately next conversion,
because in that case using the PHI result will see still 0 there rather
than the updated value from the handling of previous limb.
So the following patch special cases this and uses the other SSA_NAME.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

Commented IL, trying to lower
  _1 = (unsigned _BitInt(255)) y_4(D);
  _2 = (unsigned _BitInt(256)) _1;
  _3 = _2 + x_5(D);
  <retval> = _3;
we were emitting
  <bb 3> [local count: 1073741824]:
  # _8 = PHI <0(2), _9(12)>     // This is the limb index
  # _10 = PHI <0(2), _11(12)>   // Sign extension limb from inner cast (0 or 
~0UL)
  # _22 = PHI <0(2), _23(12)>   // Overflow bit from addition of previous limb
  if (_8 <= 2)
    goto <bb 4>; [80.00%]
  else
    goto <bb 7>; [20.00%]

  <bb 4> [local count: 1073741824]:
  if (_8 == 2)
    goto <bb 6>; [20.00%]
  else
    goto <bb 5>; [80.00%]
  
  <bb 5> [local count: 1073741824]:
  _12 = VIEW_CONVERT_EXPR<unsigned long[3]>(y)[_8];     // Full limbs in y
  goto <bb 7>; [100.00%]
  
  <bb 6> [local count: 214748360]:
  _13 = MEM <unsigned long> [(_BitInt(129) *)&y + 16B]; // y[2] which
  _14 = (<unnamed-signed:1>) _13;                       // needs to be
  _15 = (unsigned long) _14;                            // sign extended
  _16 = (signed long) _15;                              // to full
  _17 = _16 >> 63;                                      // limb
  _18 = (unsigned long) _17;
  
  <bb 7> [local count: 1073741824]:
  # _19 = PHI <_12(5), _10(3), _15(6)>  // Limb to add for result of casts
  # _20 = PHI <0(5), _10(3), _18(6)>    // Sign extension limb from previous 
limb
  _11 = _20;                            // PHI _10 argument above
  _21 = VIEW_CONVERT_EXPR<unsigned long[4]>(x)[_8];
  _24 = .UADDC (_19, _21, _22);
  _25 = IMAGPART_EXPR <_24>;
  _26 = REALPART_EXPR <_24>;
  VIEW_CONVERT_EXPR<unsigned long[4]>(<retval>)[_8] = _26;
  _27 = _8 + 1;
  if (_27 == 3)                 // For the outer cast limb 3 is special
    goto <bb 11>; [20.00%]
  else
    goto <bb 8>; [80.00%]

  <bb 8> [local count: 1073741824]:
  if (_27 < 2)
    goto <bb 9>; [80.00%]
  else
    goto <bb 10>; [20.00%]

  <bb 9> [local count: 1073741824]:
  _28 = VIEW_CONVERT_EXPR<unsigned long[3]>(y)[_27];    // These are used in 
full

  <bb 10> [local count: 1073741824]:
  # _29 = PHI <_28(9), _11(8)>
  goto <bb 12>; [100.00%]

  <bb 11> [local count: 214748360]:
// And HERE is the actual bug.  Using _10 for idx 3 will mean it is always
// zero there and doesn't contain the _18 value propagated to it.
// It should be
// _30 = (<unnamed-unsigned:63>) _11;
// Now if the outer conversion had special iteration say 5, we could
// have used _10 fine here, by that time it already propagates through
// the PHI.
  _30 = (<unnamed-unsigned:63>) _10;
  _31 = (unsigned long) _30;

  <bb 12> [local count: 1073741824]:
  # _32 = PHI <_29(10), _31(11)>
  _33 = VIEW_CONVERT_EXPR<unsigned long[4]>(x)[_27];
  _34 = .UADDC (_32, _33, _25);
  _23 = IMAGPART_EXPR <_34>;
  _35 = REALPART_EXPR <_34>;
  VIEW_CONVERT_EXPR<unsigned long[4]>(<retval>)[_27] = _35;
  _9 = _8 + 2;
  if (_9 != 4)
    goto <bb 3>; [0.05%]
  else
    goto <bb 13>; [99.95%]

2025-04-10  Jakub Jelinek  <ja...@redhat.com>

        PR tree-optimization/119707
        * gimple-lower-bitint.cc (bitint_large_huge::handle_cast): Only use
        m_data[save_data_cnt] instead of m_data[save_data_cnt + 1] if
        idx is odd and equal to low + 1.  Remember tree_to_uhwi (idx) in
        a temporary instead of calling the function multiple times.

        * gcc.dg/torture/bitint-76.c: New test.

--- gcc/gimple-lower-bitint.cc.jj       2025-04-08 14:08:51.000000000 +0200
+++ gcc/gimple-lower-bitint.cc  2025-04-10 16:18:53.315303307 +0200
@@ -1547,14 +1547,15 @@ bitint_large_huge::handle_cast (tree lhs
        }
       else
        {
-         if (tree_to_uhwi (idx) < low)
+         unsigned tidx = tree_to_uhwi (idx);
+         if (tidx < low)
            {
              t = handle_operand (rhs1, idx);
              if (m_first)
                m_data[save_data_cnt + 2]
                  = build_int_cst (NULL_TREE, m_data_cnt);
            }
-         else if (tree_to_uhwi (idx) < high)
+         else if (tidx < high)
            {
              t = handle_operand (rhs1, size_int (low));
              if (m_first)
@@ -1587,7 +1588,9 @@ bitint_large_huge::handle_cast (tree lhs
                m_data_cnt = tree_to_uhwi (m_data[save_data_cnt + 2]);
              if (TYPE_UNSIGNED (rhs_type))
                t = build_zero_cst (m_limb_type);
-             else if (m_bb && m_data[save_data_cnt])
+             else if (m_bb
+                      && m_data[save_data_cnt]
+                      && ((tidx & 1) == 0 || tidx != low + 1))
                t = m_data[save_data_cnt];
              else
                t = m_data[save_data_cnt + 1];
--- gcc/testsuite/gcc.dg/torture/bitint-76.c.jj 2025-04-10 16:24:01.677066011 
+0200
+++ gcc/testsuite/gcc.dg/torture/bitint-76.c    2025-04-10 16:23:35.743422373 
+0200
@@ -0,0 +1,19 @@
+/* PR tree-optimization/119707 */
+/* { dg-do run { target bitint } } */
+
+#if __BITINT_MAXWIDTH__ >= 256
+__attribute__((noipa)) unsigned _BitInt(256)
+foo (unsigned _BitInt(256) x, _BitInt(129) y)
+{
+  return x + (unsigned _BitInt(255)) y;
+}
+#endif
+
+int
+main ()
+{
+#if __BITINT_MAXWIDTH__ >= 256
+  if (foo (0, -1) != 
0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffuwb)
+    __builtin_abort ();
+#endif
+}

        Jakub

Reply via email to