https://gcc.gnu.org/g:3e6e885beb7097c5c5ee2c48ddb3b0e61f3a1fc7

commit r16-3124-g3e6e885beb7097c5c5ee2c48ddb3b0e61f3a1fc7
Author: Richard Sandiford <richard.sandif...@arm.com>
Date:   Mon Aug 11 09:24:10 2025 +0100

    simplify-rtx: Distribute some non-narrowing subregs [PR121306]
    
    In g:965564eafb721f8000013a3112f1bba8d8fae32b I'd added code
    to try distributing non-widening subregs through logic ops,
    in cases where that would eliminate a term of the logic op.
    
    For "reasons", this indirectly caused combine to generate:
    
      (set (zero_extract:SI (reg/v:SI 101 [ a ])
              (const_int 8 [0x8])
              (const_int 8 [0x8]))
          (not:SI (sign_extract:SI (reg:SI 107 [ b ])
                  (const_int 8 [0x8])
                  (const_int 8 [0x8]))))
    
    instead of:
    
      (set (zero_extract:SI (reg/v:SI 101 [ a ])
              (const_int 8 [0x8])
              (const_int 8 [0x8]))
          (subreg:SI (not:QI (subreg:QI (sign_extract:SI (reg:SI 107 [ b ])
                          (const_int 8 [0x8])
                          (const_int 8 [0x8])) 0)) 0))
    
    for some tests that were intended to match x86's *one_cmplqi_ext<mode>_1
    (see g:a58d770fa1d17ead3c38417b299cce3f19f392db).  However, other more
    direct ways of generating the pattern continued to have the unsimplified
    (subreg:SI (not:QI (subreg:QI (...:SI ...)))) structure, since that
    structure wasn't the focus of the original patch.
    
    This patch tries to tackle that simplification head-on.  It's another
    case of distributing subregs, but this time for non-narrowing rather
    than non-widening subregs.  We already do the same distribution for
    word_mode:
    
      /* Attempt to simplify WORD_MODE SUBREGs of bitwise expressions.  */
      if (outermode == word_mode
          && (GET_CODE (op) == IOR || GET_CODE (op) == XOR || GET_CODE (op) == 
AND)
          && SCALAR_INT_MODE_P (innermode))
        {
          rtx op0 = simplify_subreg (outermode, XEXP (op, 0), innermode, byte);
          rtx op1 = simplify_subreg (outermode, XEXP (op, 1), innermode, byte);
          if (op0 && op1)
            return simplify_gen_binary (GET_CODE (op), outermode, op0, op1);
        }
    
    which g:0340177d54d08b6375391ba164a878e6a596275e extended to NOT.
    For word_mode, there are (reasonably) no restrictions on the inner
    mode other than that it is an integer.  Doing word_mode logic ops
    should be at least as efficient as subword logic ops (if the target
    provides subword ops at all).  And word_mode logic ops should be
    cheaper than multi-word logic ops.
    
    But here we need the distribution for SImode rather than word_mode
    (DImode).  The patch therefore extends the word_mode distributions
    to non-narrowing subregs in which the two modes occupy the same
    number of words.  This should hopefully be relatively conservative.
    It prevents the new rule from going away from word_mode, and attempting
    to convert (say) a QImode subreg of a word_mode AND into a QImode AND.
    It should be suitable for both CISCy and RISCy targets, including
    those that define WORD_REGISTER_OPERATIONS.
    
    The patch also fixes some overlong lines in related code.
    
    gcc/
            PR rtl-optimization/121306
            * simplify-rtx.cc (simplify_context::simplify_subreg): Distribute
            non-narrowing integer-to-integer subregs through logic ops,
            in a similar way to the existing word_mode handling.

Diff:
---
 gcc/simplify-rtx.cc | 102 +++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 81 insertions(+), 21 deletions(-)

diff --git a/gcc/simplify-rtx.cc b/gcc/simplify-rtx.cc
index ee9c048fd274..8f0f16c865d1 100644
--- a/gcc/simplify-rtx.cc
+++ b/gcc/simplify-rtx.cc
@@ -8333,27 +8333,56 @@ simplify_context::simplify_subreg (machine_mode 
outermode, rtx op,
        return XEXP (XEXP (op, 0), 0);
     }
 
-  /* Attempt to simplify WORD_MODE SUBREGs of bitwise expressions.  */
-  if (outermode == word_mode
-      && (GET_CODE (op) == IOR || GET_CODE (op) == XOR || GET_CODE (op) == AND)
-      && SCALAR_INT_MODE_P (innermode))
+  auto distribute_subreg = [&](rtx op)
     {
-      rtx op0 = simplify_subreg (outermode, XEXP (op, 0), innermode, byte);
-      rtx op1 = simplify_subreg (outermode, XEXP (op, 1), innermode, byte);
-      if (op0 && op1)
-       return simplify_gen_binary (GET_CODE (op), outermode, op0, op1);
-    }
+      return simplify_subreg (outermode, op, innermode, byte);
+    };
 
-  /* Attempt to simplify WORD_MODE SUBREGs of unary bitwise expression.  */
-  if (outermode == word_mode && GET_CODE (op) == NOT
-      && SCALAR_INT_MODE_P (innermode))
-    {
-      rtx op0 = simplify_subreg (outermode, XEXP (op, 0), innermode, byte);
-      if (op0)
-       return simplify_gen_unary (GET_CODE (op), outermode, op0, outermode);
-    }
+  /* Try distributing the subreg through logic operations, if that
+     leads to all subexpressions being simplified.  For example,
+     distributing the outer subreg in:
+
+       (subreg:SI (not:QI (subreg:QI (reg:SI X) <lowpart>)) 0)
+
+     gives:
 
+       (not:SI (reg:SI X))
+
+     This should be a win if the outermode is word_mode, since logical
+     operations on word_mode should (a) be no more expensive than logical
+     operations on subword modes and (b) are likely to be cheaper than
+     logical operations on multiword modes.
+
+     Otherwise, handle the case where the subreg is non-narrowing and does
+     not change the number of words.  The non-narrowing condition ensures
+     that we don't convert word_mode operations to subword operations.  */
   scalar_int_mode int_outermode, int_innermode;
+  if (is_a <scalar_int_mode> (outermode, &int_outermode)
+      && is_a <scalar_int_mode> (innermode, &int_innermode)
+      && (outermode == word_mode
+         || ((GET_MODE_PRECISION (int_outermode)
+              >= GET_MODE_PRECISION (int_innermode))
+             && (CEIL (GET_MODE_SIZE (int_outermode), UNITS_PER_WORD)
+                 <= CEIL (GET_MODE_SIZE (int_innermode), UNITS_PER_WORD)))))
+    switch (GET_CODE (op))
+      {
+      case NOT:
+       if (rtx op0 = distribute_subreg (XEXP (op, 0)))
+         return simplify_gen_unary (GET_CODE (op), outermode, op0, outermode);
+       break;
+
+      case AND:
+      case IOR:
+      case XOR:
+       if (rtx op0 = distribute_subreg (XEXP (op, 0)))
+         if (rtx op1 = distribute_subreg (XEXP (op, 1)))
+           return simplify_gen_binary (GET_CODE (op), outermode, op0, op1);
+       break;
+
+      default:
+       break;
+      }
+
   if (is_a <scalar_int_mode> (outermode, &int_outermode)
       && is_a <scalar_int_mode> (innermode, &int_innermode)
       && known_eq (byte, subreg_lowpart_offset (int_outermode, int_innermode)))
@@ -8407,8 +8436,8 @@ simplify_context::simplify_subreg (machine_mode 
outermode, rtx op,
     return simplify_gen_relational (GET_CODE (op), outermode, innermode,
                                    XEXP (op, 0), XEXP (op, 1));
 
-  /* Distribute non-paradoxical subregs through logic ops in cases where one 
term
-     disappears.
+  /* Distribute non-paradoxical subregs through logic ops in cases where
+     one term disappears.
 
      (subreg:M1 (and:M2 X C1)) -> (subreg:M1 X)
      (subreg:M1 (ior:M2 X C1)) -> (subreg:M1 C1)
@@ -8425,7 +8454,7 @@ simplify_context::simplify_subreg (machine_mode 
outermode, rtx op,
       && (GET_CODE (op) == AND || GET_CODE (op) == IOR || GET_CODE (op) == XOR)
       && CONSTANT_P (XEXP (op, 1)))
     {
-      rtx op1_subreg = simplify_subreg (outermode, XEXP (op, 1), innermode, 
byte);
+      rtx op1_subreg = distribute_subreg (XEXP (op, 1));
       if (op1_subreg == CONSTM1_RTX (outermode))
        {
          if (GET_CODE (op) == IOR)
@@ -8439,7 +8468,7 @@ simplify_context::simplify_subreg (machine_mode 
outermode, rtx op,
       if (op1_subreg == CONST0_RTX (outermode))
        return (GET_CODE (op) == AND
                ? op1_subreg
-               : simplify_gen_subreg (outermode, XEXP (op, 0), innermode, 
byte));
+               : distribute_subreg (XEXP (op, 0)));
     }
 
   return NULL_RTX;
@@ -8746,6 +8775,37 @@ test_scalar_int_ext_ops (machine_mode bmode, 
machine_mode smode)
                                                      bsubreg, inv_smask),
                                 bmode),
                 sreg);
+
+  if (known_le (GET_MODE_PRECISION (bmode), BITS_PER_WORD))
+    {
+      rtx breg1 = make_test_reg (bmode);
+      rtx breg2 = make_test_reg (bmode);
+      rtx ssubreg1 = lowpart_subreg (smode, breg1, bmode);
+      rtx ssubreg2 = lowpart_subreg (smode, breg2, bmode);
+      rtx not_1 = simplify_gen_unary (NOT, smode, ssubreg1, smode);
+      rtx and_12 = simplify_gen_binary (AND, smode, ssubreg1, ssubreg2);
+      rtx ior_12 = simplify_gen_binary (IOR, smode, ssubreg1, ssubreg2);
+      rtx xor_12 = simplify_gen_binary (XOR, smode, ssubreg1, ssubreg2);
+      rtx and_n12 = simplify_gen_binary (AND, smode, not_1, ssubreg2);
+      rtx ior_n12 = simplify_gen_binary (IOR, smode, not_1, ssubreg2);
+      rtx xor_12_c = simplify_gen_binary (XOR, smode, xor_12, const1_rtx);
+      ASSERT_RTX_EQ (lowpart_subreg (bmode, not_1, smode),
+                    gen_rtx_NOT (bmode, breg1));
+      ASSERT_RTX_EQ (lowpart_subreg (bmode, and_12, smode),
+                    gen_rtx_AND (bmode, breg1, breg2));
+      ASSERT_RTX_EQ (lowpart_subreg (bmode, ior_12, smode),
+                    gen_rtx_IOR (bmode, breg1, breg2));
+      ASSERT_RTX_EQ (lowpart_subreg (bmode, xor_12, smode),
+                    gen_rtx_XOR (bmode, breg1, breg2));
+      ASSERT_RTX_EQ (lowpart_subreg (bmode, and_n12, smode),
+                    gen_rtx_AND (bmode, gen_rtx_NOT (bmode, breg1), breg2));
+      ASSERT_RTX_EQ (lowpart_subreg (bmode, ior_n12, smode),
+                    gen_rtx_IOR (bmode, gen_rtx_NOT (bmode, breg1), breg2));
+      ASSERT_RTX_EQ (lowpart_subreg (bmode, xor_12_c, smode),
+                    gen_rtx_XOR (bmode,
+                                 gen_rtx_XOR (bmode, breg1, breg2),
+                                 const1_rtx));
+    }
 }
 
 /* Verify more simplifications of integer extension/truncation.

Reply via email to