On Mon, Jun 21, 2021 at 12:14:09PM +0200, Richard Biener wrote:
> > But we could do what I've done in
> > r11-7694-gd55ce33a34a8e33d17285228b32cf1e564241a70
> > - have int ix86_last_zero_store_uid;
> > set to INSN_UID of the last store emitted by the peephole2s and
> > then check that INSN_UID against the var.
> 
> Hmm, or have reg_nonzero_bits_for_peephole2 () and maintain
> that somehow ... (conservatively drop it when a SET is seen).

Maintaining something in peephole2 wouldn't be that easy because
of peephole2's rolling window, plus it would need to be done
in the generic code even when nothing but a single target in a specific case
needs that.

The following seems to work.

2021-06-21  Jakub Jelinek  <ja...@redhat.com>

        PR target/11877
        * config/i386/i386-protos.h (ix86_last_zero_store_uid): Declare.
        * config/i386/i386-expand.c (ix86_last_zero_store_uid): New variable.
        * config/i386/i386.c (ix86_expand_prologue): Clear it.
        * config/i386/i386.md (peephole2s for 1/2/4 stores of const0_rtx):
        Remove "" from match_operand.  Emit new insns using emit_move_insn and
        set ix86_last_zero_store_uid to INSN_UID of the last store.
        Add peephole2s for 1/2/4 stores of const0_rtx following previous
        successful peep2s.

--- gcc/config/i386/i386-protos.h.jj    2021-06-21 11:59:16.769693735 +0200
+++ gcc/config/i386/i386-protos.h       2021-06-21 12:01:47.875691930 +0200
@@ -111,6 +111,7 @@ extern bool ix86_use_lea_for_mov (rtx_in
 extern bool ix86_avoid_lea_for_addr (rtx_insn *, rtx[]);
 extern void ix86_split_lea_for_addr (rtx_insn *, rtx[], machine_mode);
 extern bool ix86_lea_for_add_ok (rtx_insn *, rtx[]);
+extern int ix86_last_zero_store_uid;
 extern bool ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high);
 extern bool ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn);
 extern bool ix86_agi_dependent (rtx_insn *set_insn, rtx_insn *use_insn);
--- gcc/config/i386/i386-expand.c.jj    2021-06-21 09:39:21.604488082 +0200
+++ gcc/config/i386/i386-expand.c       2021-06-21 12:21:33.017977951 +0200
@@ -1316,6 +1316,9 @@ find_nearest_reg_def (rtx_insn *insn, in
   return false;
 }
 
+/* INSN_UID of the last insn emitted by zero store peephole2s.  */
+int ix86_last_zero_store_uid;
+
 /* Split lea instructions into a sequence of instructions
    which are executed on ALU to avoid AGU stalls.
    It is assumed that it is allowed to clobber flags register
--- gcc/config/i386/i386.c.jj   2021-06-21 09:39:21.622487840 +0200
+++ gcc/config/i386/i386.c      2021-06-21 12:06:54.049634337 +0200
@@ -8196,6 +8196,7 @@ ix86_expand_prologue (void)
   bool save_stub_call_needed;
   rtx static_chain = NULL_RTX;
 
+  ix86_last_zero_store_uid = 0;
   if (ix86_function_naked (current_function_decl))
     {
       if (flag_stack_usage_info)
--- gcc/config/i386/i386.md.jj  2021-06-21 09:42:04.086303699 +0200
+++ gcc/config/i386/i386.md     2021-06-21 12:14:10.411847549 +0200
@@ -19360,37 +19360,96 @@ (define_peephole2
 ;; When optimizing for size, zeroing memory should use a register.
 (define_peephole2
   [(match_scratch:SWI48 0 "r")
-   (set (match_operand:SWI48 1 "memory_operand" "") (const_int 0))
-   (set (match_operand:SWI48 2 "memory_operand" "") (const_int 0))
-   (set (match_operand:SWI48 3 "memory_operand" "") (const_int 0))
-   (set (match_operand:SWI48 4 "memory_operand" "") (const_int 0))]
+   (set (match_operand:SWI48 1 "memory_operand") (const_int 0))
+   (set (match_operand:SWI48 2 "memory_operand") (const_int 0))
+   (set (match_operand:SWI48 3 "memory_operand") (const_int 0))
+   (set (match_operand:SWI48 4 "memory_operand") (const_int 0))]
   "optimize_insn_for_size_p () && peep2_regno_dead_p (0, FLAGS_REG)"
-  [(set (match_dup 1) (match_dup 0))
-   (set (match_dup 2) (match_dup 0))
-   (set (match_dup 3) (match_dup 0))
-   (set (match_dup 4) (match_dup 0))]
+  [(const_int 0)]
 {
   ix86_expand_clear (operands[0]);
+  emit_move_insn (operands[1], operands[0]);
+  emit_move_insn (operands[2], operands[0]);
+  emit_move_insn (operands[3], operands[0]);
+  ix86_last_zero_store_uid
+    = INSN_UID (emit_move_insn (operands[4], operands[0]));
+  DONE;
 })
 
 (define_peephole2
   [(match_scratch:SWI48 0 "r")
-   (set (match_operand:SWI48 1 "memory_operand" "") (const_int 0))
-   (set (match_operand:SWI48 2 "memory_operand" "") (const_int 0))]
+   (set (match_operand:SWI48 1 "memory_operand") (const_int 0))
+   (set (match_operand:SWI48 2 "memory_operand") (const_int 0))]
   "optimize_insn_for_size_p () && peep2_regno_dead_p (0, FLAGS_REG)"
-  [(set (match_dup 1) (match_dup 0))
-   (set (match_dup 2) (match_dup 0))]
+  [(const_int 0)]
 {
   ix86_expand_clear (operands[0]);
+  emit_move_insn (operands[1], operands[0]);
+  ix86_last_zero_store_uid
+    = INSN_UID (emit_move_insn (operands[2], operands[0]));
+  DONE;
 })
 
 (define_peephole2
   [(match_scratch:SWI48 0 "r")
-   (set (match_operand:SWI48 1 "memory_operand" "") (const_int 0))]
+   (set (match_operand:SWI48 1 "memory_operand") (const_int 0))]
   "optimize_insn_for_size_p () && peep2_regno_dead_p (0, FLAGS_REG)"
-  [(set (match_dup 1) (match_dup 0))]
+  [(const_int 0)]
 {
   ix86_expand_clear (operands[0]);
+  ix86_last_zero_store_uid
+    = INSN_UID (emit_move_insn (operands[1], operands[0]));
+  DONE;
+})
+
+(define_peephole2
+  [(set (match_operand:SWI48 5 "memory_operand")
+       (match_operand:SWI48 0 "general_reg_operand"))
+   (set (match_operand:SWI48 1 "memory_operand") (const_int 0))
+   (set (match_operand:SWI48 2 "memory_operand") (const_int 0))
+   (set (match_operand:SWI48 3 "memory_operand") (const_int 0))
+   (set (match_operand:SWI48 4 "memory_operand") (const_int 0))]
+  "optimize_insn_for_size_p ()
+   && INSN_UID (peep2_next_insn (0)) == ix86_last_zero_store_uid"
+  [(const_int 0)]
+{
+  emit_move_insn (operands[5], operands[0]);
+  emit_move_insn (operands[1], operands[0]);
+  emit_move_insn (operands[2], operands[0]);
+  emit_move_insn (operands[3], operands[0]);
+  ix86_last_zero_store_uid
+    = INSN_UID (emit_move_insn (operands[4], operands[0]));
+  DONE;
+})
+
+(define_peephole2
+  [(set (match_operand:SWI48 3 "memory_operand")
+       (match_operand:SWI48 0 "general_reg_operand"))
+   (set (match_operand:SWI48 1 "memory_operand") (const_int 0))
+   (set (match_operand:SWI48 2 "memory_operand") (const_int 0))]
+  "optimize_insn_for_size_p ()
+   && INSN_UID (peep2_next_insn (0)) == ix86_last_zero_store_uid"
+  [(const_int 0)]
+{
+  emit_move_insn (operands[3], operands[0]);
+  emit_move_insn (operands[1], operands[0]);
+  ix86_last_zero_store_uid
+    = INSN_UID (emit_move_insn (operands[2], operands[0]));
+  DONE;
+})
+
+(define_peephole2
+  [(set (match_operand:SWI48 2 "memory_operand")
+       (match_operand:SWI48 0 "general_reg_operand"))
+   (set (match_operand:SWI48 1 "memory_operand") (const_int 0))]
+  "optimize_insn_for_size_p ()
+   && INSN_UID (peep2_next_insn (0)) == ix86_last_zero_store_uid"
+  [(const_int 0)]
+{
+  emit_move_insn (operands[2], operands[0]);
+  ix86_last_zero_store_uid
+    = INSN_UID (emit_move_insn (operands[1], operands[0]));
+  DONE;
 })
 
 ;; Reload dislikes loading constants directly into class_likely_spilled


        Jakub

Reply via email to