On Mon, Aug 4, 2025 at 3:28 PM H.J. Lu <hjl.to...@gmail.com> wrote:
>
> On Mon, Aug 4, 2025 at 2:04 PM H.J. Lu <hjl.to...@gmail.com> wrote:
> >
> > On Mon, Aug 4, 2025 at 8:50 AM Richard Sandiford
> > <richard.sandif...@arm.com> wrote:
> > >
> > > Uros Bizjak <ubiz...@gmail.com> writes:
> > > > On Sat, Aug 2, 2025 at 8:56 PM H.J. Lu <hjl.to...@gmail.com> wrote:
> > > >>
> > > >> On Fri, Aug 1, 2025 at 10:32 PM Uros Bizjak <ubiz...@gmail.com> wrote:
> > > >> >
> > > >> > On Sat, Aug 2, 2025 at 3:22 AM H.J. Lu <hjl.to...@gmail.com> wrote:
> > > >> > >
> > > >> > > After
> > > >> > >
> > > >> > > commit 965564eafb721f8000013a3112f1bba8d8fae32b
> > > >> > > Author: Richard Sandiford <richard.sandif...@arm.com>
> > > >> > > Date:   Tue Jul 29 15:58:34 2025 +0100
> > > >> > >
> > > >> > >     simplify-rtx: Simplify subregs of logic ops
> > > >> > >
> > > >> > > combine generates
> > > >> > >
> > > >> > > (set (zero_extract:SI (reg/v:SI 101 [ a ])
> > > >> > >         (const_int 8 [0x8])
> > > >> > >         (const_int 8 [0x8]))
> > > >> > >     (not:SI (sign_extract:SI (reg:SI 107 [ b ])
> > > >> > >             (const_int 8 [0x8])
> > > >> > >             (const_int 8 [0x8]))))
> > > >> > >
> > > >> > > instead of
> > > >> > >
> > > >> > > (set (zero_extract:SI (reg/v:SI 101 [ a ])
> > > >> > >         (const_int 8 [0x8])
> > > >> > >         (const_int 8 [0x8]))
> > > >> > >     (subreg:SI (not:QI (subreg:QI (sign_extract:SI (reg:SI 107 [ b 
> > > >> > > ])
> > > >> > >                     (const_int 8 [0x8])
> > > >> > >                     (const_int 8 [0x8])) 0)) 0))
> > > >> > >
> > > >> > > Add *one_cmplqi_ext<mode>_2 to support the new pattern.
> > > >> > >
> > > >> > >         PR target/121306
> > > >> > >         * config/i386/i386.md (*one_cmplqi_ext<mode>_2): New.
> > > >> >
> > > >> > Why not just change the old pattern? I'd expect that the old form is
> > > >> > now obsolete.
> > > >> >
> > > >>
> > > >> *one_cmplqi_ext<mode>_1 is still needed.  Otherwise combine will
> > > >> fail to match this instruction:
> > > >>
> > > >> (set (zero_extract:SI (reg/v:SI 102 [ a ])
> > > >>         (const_int 8 [0x8])
> > > >>         (const_int 8 [0x8]))
> > > >>     (subreg:SI (not:QI (subreg:QI (reg:SI 105 [ _2 ]) 0)) 0))
> > > >
> > > > Do you perhaps have a testcase that still shows this combination?
> > > > Perhaps the author of the simplification (CC'd) would be interested in
> > > > this missing case.
> > > >
> > > > I was under the impression that the new simplification should always
> > > > trigger, there is no point in having it if the backend still has to
> > > > provide simplified and non-simplified patterns.
> > > >
> > > > There are also other similar logic patterns in i386.md that would have
> > > > to be amended.
> > >
> > > Sorry, I hadn't realised that there were still unfixed regressions
> > > from that patch.  I suppose if we wanted to avoid two patterns here,
> > > we'd need to extend the pre-existing word_mode folds to support
> > > subword modes too (for !WORD_REGISTER_OPERATIONS).  The attached
> > > untested patch does that, but I expect it would have similar
> > > knock-on effects.  I'll give it a spin overnight on x86 anyway
> > > just to see what happens.
> >
> > Yes, it fixes:
> >
> > FAIL: gcc.target/i386/pr82524.c scan-assembler-not mov[sz]bl
> > FAIL: gcc.target/i386/pr82524.c scan-assembler [ \t]notb
> >
> > together with the enclosed patch.
>
> 64-bit libgo failed to compile
>
> during RTL pass: late_combine
> /export/gnu/import/git/gitlab/x86-gcc/libgo/go/runtime/mpallocbits.go:
> In function ‘runtime.pageBits.clear’:
> /export/gnu/import/git/gitlab/x86-gcc/libgo/go/runtime/mpallocbits.go:67:1:
> internal compiler error: in simplify_subreg, at simplify-rtx.cc:8085
>    67 | func (b *pageBits) clear(i uint) {
>       | ^
> 0x2276abf internal_error(char const*, ...)
> /export/gnu/import/git/gitlab/x86-gcc/gcc/diagnostic-global-context.cc:534
> 0x669387 fancy_abort(char const*, int, char const*)
> /export/gnu/import/git/gitlab/x86-gcc/gcc/diagnostics/context.cc:1640
> 0x4d0a70 simplify_context::simplify_subreg(machine_mode, rtx_def*,
> machine_mode, poly_int<1u, unsigned long>)
> /export/gnu/import/git/gitlab/x86-gcc/gcc/simplify-rtx.cc:8085
> 0xdbc955 simplify_context::simplify_subreg(machine_mode, rtx_def*,
> machine_mode, poly_int<1u, unsigned long>)
> /export/gnu/import/git/gitlab/x86-gcc/gcc/simplify-rtx.cc:8349
> 0xd38441 insn_propagation::apply_to_rvalue_1(rtx_def**)
> /export/gnu/import/git/gitlab/x86-gcc/gcc/recog.cc:1224
> 0xd37e8b insn_propagation::apply_to_rvalue_1(rtx_def**)
> /export/gnu/import/git/gitlab/x86-gcc/gcc/recog.cc:1166
> 0xd38942 insn_propagation::apply_to_pattern_1(rtx_def**)
> /export/gnu/import/git/gitlab/x86-gcc/gcc/recog.cc:1400
> 0xd389ef insn_propagation::apply_to_pattern(rtx_def**)
> /export/gnu/import/git/gitlab/x86-gcc/gcc/recog.cc:1444
> 0x20c441d substitute_nondebug_use
> /export/gnu/import/git/gitlab/x86-gcc/gcc/late-combine.cc:198
> 0x20c441d substitute_nondebug_uses
> /export/gnu/import/git/gitlab/x86-gcc/gcc/late-combine.cc:271
> 0x20c5b5d run
> /export/gnu/import/git/gitlab/x86-gcc/gcc/late-combine.cc:440
> 0x20c5b5d combine_into_uses
> /export/gnu/import/git/gitlab/x86-gcc/gcc/late-combine.cc:690
> 0x20c669c execute
> /export/gnu/import/git/gitlab/x86-gcc/gcc/late-combine.cc:718
> 0x20c669c execute
> /export/gnu/import/git/gitlab/x86-gcc/gcc/late-combine.cc:771
> Please submit a full bug report, with preprocessed source (by using
> -freport-bug).
> Please include the complete backtrace with any bug report.
> See <https://gcc.gnu.org/bugs/> for instructions.
>
> (gdb) call debug (op)
> (and:SI (reg:DI 113 [ i ])
>     (const_int 63 [0x3f]))
> (gdb)

The enclosed patch works.

> > Thanks.
> >
> > > Richard
> > >
> > > diff --git a/gcc/simplify-rtx.cc b/gcc/simplify-rtx.cc
> > > index c723a07f06b..0374fec1d9c 100644
> > > --- a/gcc/simplify-rtx.cc
> > > +++ b/gcc/simplify-rtx.cc
> > > @@ -8333,27 +8333,33 @@ simplify_context::simplify_subreg (machine_mode 
> > > outermode, rtx op,
> > >         return XEXP (XEXP (op, 0), 0);
> > >      }
> > >
> > > -  /* Attempt to simplify WORD_MODE SUBREGs of bitwise expressions.  */
> > > -  if (outermode == word_mode
> > > -      && (GET_CODE (op) == IOR || GET_CODE (op) == XOR || GET_CODE (op) 
> > > == AND)
> > > -      && SCALAR_INT_MODE_P (innermode))
> > > -    {
> > > -      rtx op0 = simplify_subreg (outermode, XEXP (op, 0), innermode, 
> > > byte);
> > > -      rtx op1 = simplify_subreg (outermode, XEXP (op, 1), innermode, 
> > > byte);
> > > -      if (op0 && op1)
> > > -       return simplify_gen_binary (GET_CODE (op), outermode, op0, op1);
> > > -    }
> > > -
> > > -  /* Attempt to simplify WORD_MODE SUBREGs of unary bitwise expression.  
> > > */
> > > -  if (outermode == word_mode && GET_CODE (op) == NOT
> > > -      && SCALAR_INT_MODE_P (innermode))
> > > +  /* Attempt to simplify WORD_MODE and sub-WORD_MODE SUBREGs of bitwise
> > > +     expressions.  */
> > > +  scalar_int_mode int_outermode;
> > > +  if (is_a<scalar_int_mode> (outermode, &int_outermode)
> > > +      && (WORD_REGISTER_OPERATIONS
> > > +         ? int_outermode == word_mode
> > > +         : GET_MODE_PRECISION (int_outermode) <= BITS_PER_WORD)
> > > +      && SCALAR_INT_MODE_P (innermode)
> > > +      && (GET_CODE (op) == IOR
> > > +         || GET_CODE (op) == XOR
> > > +         || GET_CODE (op) == AND
> > > +         || GET_CODE (op) == NOT))
> > >      {
> > >        rtx op0 = simplify_subreg (outermode, XEXP (op, 0), innermode, 
> > > byte);
> > >        if (op0)
> > > -       return simplify_gen_unary (GET_CODE (op), outermode, op0, 
> > > outermode);
> > > +       {
> > > +         if (GET_CODE (op) == NOT)
> > > +           return simplify_gen_unary (GET_CODE (op), outermode,
> > > +                                      op0, outermode);
> > > +
> > > +         rtx op1 = simplify_subreg (outermode, XEXP (op, 1), innermode, 
> > > byte);
> > > +         if (op1)
> > > +           return simplify_gen_binary (GET_CODE (op), outermode, op0, 
> > > op1);
> > > +       }
> > >      }
> > >
> > > -  scalar_int_mode int_outermode, int_innermode;
> > > +  scalar_int_mode int_innermode;
> > >    if (is_a <scalar_int_mode> (outermode, &int_outermode)
> > >        && is_a <scalar_int_mode> (innermode, &int_innermode)
> > >        && known_eq (byte, subreg_lowpart_offset (int_outermode, 
> > > int_innermode)))
> > > --
> > > 2.43.0
> > >
> >
> >
> > --
> > H.J.
>
>
>
> --
> H.J.



-- 
H.J.
diff --git a/gcc/simplify-rtx.cc b/gcc/simplify-rtx.cc
index c723a07f06b..91038a4c9d2 100644
--- a/gcc/simplify-rtx.cc
+++ b/gcc/simplify-rtx.cc
@@ -8333,27 +8333,44 @@ simplify_context::simplify_subreg (machine_mode outermode, rtx op,
 	return XEXP (XEXP (op, 0), 0);
     }
 
-  /* Attempt to simplify WORD_MODE SUBREGs of bitwise expressions.  */
-  if (outermode == word_mode
-      && (GET_CODE (op) == IOR || GET_CODE (op) == XOR || GET_CODE (op) == AND)
-      && SCALAR_INT_MODE_P (innermode))
-    {
-      rtx op0 = simplify_subreg (outermode, XEXP (op, 0), innermode, byte);
-      rtx op1 = simplify_subreg (outermode, XEXP (op, 1), innermode, byte);
-      if (op0 && op1)
-	return simplify_gen_binary (GET_CODE (op), outermode, op0, op1);
-    }
-
-  /* Attempt to simplify WORD_MODE SUBREGs of unary bitwise expression.  */
-  if (outermode == word_mode && GET_CODE (op) == NOT
-      && SCALAR_INT_MODE_P (innermode))
+  /* Attempt to simplify WORD_MODE and sub-WORD_MODE SUBREGs of bitwise
+     expressions.  */
+  scalar_int_mode int_outermode;
+  if (is_a<scalar_int_mode> (outermode, &int_outermode)
+      && (WORD_REGISTER_OPERATIONS
+	  ? int_outermode == word_mode
+	  : GET_MODE_PRECISION (int_outermode) <= BITS_PER_WORD)
+      && SCALAR_INT_MODE_P (innermode)
+      && (GET_CODE (op) == IOR
+	  || GET_CODE (op) == XOR
+	  || GET_CODE (op) == AND
+	  || GET_CODE (op) == NOT))
     {
-      rtx op0 = simplify_subreg (outermode, XEXP (op, 0), innermode, byte);
-      if (op0)
-	return simplify_gen_unary (GET_CODE (op), outermode, op0, outermode);
+      rtx op0 = XEXP (op, 0);
+      if (GET_MODE (op0) != innermode)
+	{
+	  if (validate_subreg (innermode, GET_MODE (op0), op0, 0))
+	    op0 = gen_rtx_SUBREG (innermode, op0, 0);
+	}
+      if (GET_MODE (op0) == innermode)
+	{
+	  op0 = simplify_subreg (outermode, op0, innermode, byte);
+	  if (op0)
+	    {
+	      if (GET_CODE (op) == NOT)
+		return simplify_gen_unary (GET_CODE (op), outermode,
+					   op0, outermode);
+
+	      rtx op1 = simplify_subreg (outermode, XEXP (op, 1),
+					 innermode, byte);
+	      if (op1)
+		return simplify_gen_binary (GET_CODE (op), outermode,
+					    op0, op1);
+	    }
+	}
     }
 
-  scalar_int_mode int_outermode, int_innermode;
+  scalar_int_mode int_innermode;
   if (is_a <scalar_int_mode> (outermode, &int_outermode)
       && is_a <scalar_int_mode> (innermode, &int_innermode)
       && known_eq (byte, subreg_lowpart_offset (int_outermode, int_innermode)))

Reply via email to