On Tue, Feb 16, 2021 at 11:47:51AM +0100, Uros Bizjak wrote:
> > In this case the match_scratch wouldn't work, since CC_REGNUM is fixed.
> > But as you said on irc, there's peep2_regno_dead_p instead.
> >
> > Haven't tried it and so don't know whether it would work with only
> > one construct in the first […].  But it seems like it would be a better
> > fit, since peephole2 is designed to have up-to-date register information.
> >
> > Uros, does that sound reasonable, or is it a non-starter?
> 
> We have some splits after peephole2 on x86, e.g. "convert impossible
> pushes of immediate". There we try to get scratch register with
> define_peephole2 and if this fails (or when peephole2 pass is not ran
> at all with -O0), define_split after peephole2 splits the impossible
> insn to some less optimal sequence.
> 
> Another usage of late split pass is SSE partial register dependency
> stall that should run after allocated registers won't change anymore.
> 
> Please also note that peephole2 pass doesn't run with -O0.

True, but it seems these ix86_avoid_lea_for_add{,r} splitters are only
an optimization (mostly for the Atom/Bonell like CPUs).
And actually looking at it, there are 2 separate cases.

One is ix86_avoid_lea_for_add where I see no reason to check that
flags are dead, because it is used in splitters:
(define_split
  [(set (match_operand:SWI48 0 "register_operand")
        (plus:SWI48 (match_operand:SWI48 1 "register_operand")
                    (match_operand:SWI48 2 "x86_64_nonmemory_operand")))
   (clobber (reg:CC FLAGS_REG))]
  "reload_completed && ix86_avoid_lea_for_add (insn, operands)"
...
(define_split
  [(set (match_operand:DI 0 "register_operand")
        (zero_extend:DI
          (plus:SI (match_operand:SI 1 "register_operand")
                   (match_operand:SI 2 "x86_64_nonmemory_operand"))))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_64BIT
   && reload_completed && ix86_avoid_lea_for_add (insn, operands)"
where the splitter will only match if the flags reg is dead at that point.
So, I think we can just keep those as splitters and just don't call the
function that makes no sense in that case.

And then there is the ix86_avoid_lea_for_addr case, where indeed we
need to check if flags reg is dead at that point (i.e. if we can clobber
it).  But the define_peephole2 pass seems to have much better infrastructure
for that, the walking of the rest of the bb can be quadratic.

The splitting in that case when optimizing (nobody should care about code
performance microoptimizations with -O0) could happen during split2 and
split3 passes (when people don't disable sched2 or enable sel sched - the
latter only with a few days old trunk):

          NEXT_PASS (pass_split_after_reload);
          NEXT_PASS (pass_ree);
          NEXT_PASS (pass_compare_elim_after_reload);
          NEXT_PASS (pass_thread_prologue_and_epilogue);
          NEXT_PASS (pass_rtl_dse2);
          NEXT_PASS (pass_stack_adjustments);
          NEXT_PASS (pass_jump2);
          NEXT_PASS (pass_duplicate_computed_gotos);
          NEXT_PASS (pass_sched_fusion);
          NEXT_PASS (pass_peephole2);
          NEXT_PASS (pass_if_after_reload);
          NEXT_PASS (pass_regrename);
          NEXT_PASS (pass_cprop_hardreg);
          NEXT_PASS (pass_fast_rtl_dce);
          NEXT_PASS (pass_reorder_blocks);
          NEXT_PASS (pass_leaf_regs);
          NEXT_PASS (pass_split_before_sched2);
          NEXT_PASS (pass_sched2);

I think the passes between split2 and peephole2 won't really benefit
from the splitting being done earlier, and I doubt the passes between
peephole2 and split3 will often create new opportunities to split.
And during/after sched2 we weren't splitting anymore.

So, I think doing following would be best:

2021-02-16  Jakub Jelinek  <ja...@redhat.com>

        PR target/99104
        * config/i386/i386.c (ix86_ok_to_clobber_flags): Remove.
        (ix86_avoid_lea_for_add): Don't call ix86_ok_to_clobber_flags.
        (ix86_avoid_lea_for_addr): Likewise.  Adjust function comment.
        * config/i386/i386.m (*lea<mode>): Change from define_insn_and_split
        into define_insn.  Move the splitting to define_peephole2 and
        check there using peep2_regno_dead_p if FLAGS_REG is dead.

        * gcc.dg/pr99104.c: New test.

--- gcc/config/i386/i386.c.jj   2021-02-16 08:58:55.197890195 +0100
+++ gcc/config/i386/i386.c      2021-02-16 11:45:44.233204555 +0100
@@ -14968,38 +14968,6 @@ ix86_lea_outperforms (rtx_insn *insn, un
   return dist_define >= dist_use;
 }
 
-/* Return true if it is legal to clobber flags by INSN and
-   false otherwise.  */
-
-static bool
-ix86_ok_to_clobber_flags (rtx_insn *insn)
-{
-  basic_block bb = BLOCK_FOR_INSN (insn);
-  df_ref use;
-  bitmap live;
-
-  while (insn)
-    {
-      if (NONDEBUG_INSN_P (insn))
-       {
-         FOR_EACH_INSN_USE (use, insn)
-           if (DF_REF_REG_USE_P (use) && DF_REF_REGNO (use) == FLAGS_REG)
-             return false;
-
-         if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
-           return true;
-       }
-
-      if (insn == BB_END (bb))
-       break;
-
-      insn = NEXT_INSN (insn);
-    }
-
-  live = df_get_live_out(bb);
-  return !REGNO_REG_SET_P (live, FLAGS_REG);
-}
-
 /* Return true if we need to split op0 = op1 + op2 into a sequence of
    move and add to avoid AGU stalls.  */
 
@@ -15012,10 +14980,6 @@ ix86_avoid_lea_for_add (rtx_insn *insn,
   if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
     return false;
 
-  /* Check it is correct to split here.  */
-  if (!ix86_ok_to_clobber_flags(insn))
-    return false;
-
   regno0 = true_regnum (operands[0]);
   regno1 = true_regnum (operands[1]);
   regno2 = true_regnum (operands[2]);
@@ -15051,7 +15015,7 @@ ix86_use_lea_for_mov (rtx_insn *insn, rt
 }
 
 /* Return true if we need to split lea into a sequence of
-   instructions to avoid AGU stalls. */
+   instructions to avoid AGU stalls during peephole2. */
 
 bool
 ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
@@ -15071,10 +15035,6 @@ ix86_avoid_lea_for_addr (rtx_insn *insn,
          && REG_P (XEXP (operands[1], 0))))
     return false;
 
-  /* Check if it is OK to split here.  */
-  if (!ix86_ok_to_clobber_flags (insn))
-    return false;
-
   ok = ix86_decompose_address (operands[1], &parts);
   gcc_assert (ok);
 
--- gcc/config/i386/i386.md.jj  2021-01-13 10:12:07.036506101 +0100
+++ gcc/config/i386/i386.md     2021-02-16 11:43:27.666737790 +0100
@@ -5176,7 +5176,7 @@ (define_expand "floatunsdidf2"
 
 ;; Load effective address instructions
 
-(define_insn_and_split "*lea<mode>"
+(define_insn "*lea<mode>"
   [(set (match_operand:SWI48 0 "register_operand" "=r")
        (match_operand:SWI48 1 "address_no_seg_operand" "Ts"))]
   "ix86_hardreg_mov_ok (operands[0], operands[1])"
@@ -5189,7 +5189,19 @@ (define_insn_and_split "*lea<mode>"
   else 
     return "lea{<imodesuffix>}\t{%E1, %0|%0, %E1}";
 }
-  "reload_completed && ix86_avoid_lea_for_addr (insn, operands)"
+  [(set_attr "type" "lea")
+   (set (attr "mode")
+     (if_then_else
+       (match_operand 1 "SImode_address_operand")
+       (const_string "SI")
+       (const_string "<MODE>")))])
+
+(define_peephole2
+  [(set (match_operand:SWI48 0 "register_operand")
+       (match_operand:SWI48 1 "address_no_seg_operand"))]
+  "ix86_hardreg_mov_ok (operands[0], operands[1])
+   && peep2_regno_dead_p (0, FLAGS_REG)
+   && ix86_avoid_lea_for_addr (peep2_next_insn (0), operands)"
   [(const_int 0)]
 {
   machine_mode mode = <MODE>mode;
@@ -5197,7 +5209,7 @@ (define_insn_and_split "*lea<mode>"
 
   /* ix86_avoid_lea_for_addr re-recognizes insn and may
      change operands[] array behind our back.  */
-  pat = PATTERN (curr_insn);
+  pat = PATTERN (peep2_next_insn (0));
 
   operands[0] = SET_DEST (pat);
   operands[1] = SET_SRC (pat);
@@ -5206,7 +5218,7 @@ (define_insn_and_split "*lea<mode>"
   if (SImode_address_operand (operands[1], VOIDmode))
     mode = SImode;
 
-  ix86_split_lea_for_addr (curr_insn, operands, mode);
+  ix86_split_lea_for_addr (peep2_next_insn (0), operands, mode);
 
   /* Zero-extend return register to DImode for zero-extended addresses.  */
   if (mode != <MODE>mode)
@@ -5214,13 +5226,7 @@ (define_insn_and_split "*lea<mode>"
               (operands[0], gen_lowpart (mode, operands[0])));
 
   DONE;
-}
-  [(set_attr "type" "lea")
-   (set (attr "mode")
-     (if_then_else
-       (match_operand 1 "SImode_address_operand")
-       (const_string "SI")
-       (const_string "<MODE>")))])
+})
 
 ;; Add instructions
 
--- gcc/testsuite/gcc.dg/pr99104.c.jj   2021-02-16 11:47:08.793255200 +0100
+++ gcc/testsuite/gcc.dg/pr99104.c      2021-02-16 11:47:08.793255200 +0100
@@ -0,0 +1,15 @@
+/* PR target/99104 */
+/* { dg-do compile { target int128 } } */
+/* { dg-options "-O2 -fsel-sched-pipelining -fselective-scheduling2 
-funroll-loops" } */
+
+__int128 a;
+int b;
+int foo (void);
+
+int __attribute__ ((simd))
+bar (void)
+{
+  a = ~a;
+  if (foo ())
+    b = 0;
+}


        Jakub

Reply via email to