On Mon, Jun 21, 2021 at 12:28 PM Jakub Jelinek <ja...@redhat.com> wrote: > > On Mon, Jun 21, 2021 at 12:14:09PM +0200, Richard Biener wrote: > > > But we could do what I've done in > > > r11-7694-gd55ce33a34a8e33d17285228b32cf1e564241a70 > > > - have int ix86_last_zero_store_uid; > > > set to INSN_UID of the last store emitted by the peephole2s and > > > then check that INSN_UID against the var. > > > > Hmm, or have reg_nonzero_bits_for_peephole2 () and maintain > > that somehow ... (conservatively drop it when a SET is seen). > > Maintaining something in peephole2 wouldn't be that easy because > of peephole2's rolling window, plus it would need to be done > in the generic code even when nothing but a single target in a specific case > needs that. > > The following seems to work. > > 2021-06-21 Jakub Jelinek <ja...@redhat.com> > > PR target/11877 > * config/i386/i386-protos.h (ix86_last_zero_store_uid): Declare. > * config/i386/i386-expand.c (ix86_last_zero_store_uid): New variable. > * config/i386/i386.c (ix86_expand_prologue): Clear it. > * config/i386/i386.md (peephole2s for 1/2/4 stores of const0_rtx): > Remove "" from match_operand. Emit new insns using emit_move_insn and > set ix86_last_zero_store_uid to INSN_UID of the last store. > Add peephole2s for 1/2/4 stores of const0_rtx following previous > successful peep2s.
LGTM. Thanks, Uros. > > --- gcc/config/i386/i386-protos.h.jj 2021-06-21 11:59:16.769693735 +0200 > +++ gcc/config/i386/i386-protos.h 2021-06-21 12:01:47.875691930 +0200 > @@ -111,6 +111,7 @@ extern bool ix86_use_lea_for_mov (rtx_in > extern bool ix86_avoid_lea_for_addr (rtx_insn *, rtx[]); > extern void ix86_split_lea_for_addr (rtx_insn *, rtx[], machine_mode); > extern bool ix86_lea_for_add_ok (rtx_insn *, rtx[]); > +extern int ix86_last_zero_store_uid; > extern bool ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool > high); > extern bool ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn); > extern bool ix86_agi_dependent (rtx_insn *set_insn, rtx_insn *use_insn); > --- gcc/config/i386/i386-expand.c.jj 2021-06-21 09:39:21.604488082 +0200 > +++ gcc/config/i386/i386-expand.c 2021-06-21 12:21:33.017977951 +0200 > @@ -1316,6 +1316,9 @@ find_nearest_reg_def (rtx_insn *insn, in > return false; > } > > +/* INSN_UID of the last insn emitted by zero store peephole2s. */ > +int ix86_last_zero_store_uid; > + > /* Split lea instructions into a sequence of instructions > which are executed on ALU to avoid AGU stalls. > It is assumed that it is allowed to clobber flags register > --- gcc/config/i386/i386.c.jj 2021-06-21 09:39:21.622487840 +0200 > +++ gcc/config/i386/i386.c 2021-06-21 12:06:54.049634337 +0200 > @@ -8196,6 +8196,7 @@ ix86_expand_prologue (void) > bool save_stub_call_needed; > rtx static_chain = NULL_RTX; > > + ix86_last_zero_store_uid = 0; > if (ix86_function_naked (current_function_decl)) > { > if (flag_stack_usage_info) > --- gcc/config/i386/i386.md.jj 2021-06-21 09:42:04.086303699 +0200 > +++ gcc/config/i386/i386.md 2021-06-21 12:14:10.411847549 +0200 > @@ -19360,37 +19360,96 @@ (define_peephole2 > ;; When optimizing for size, zeroing memory should use a register. > (define_peephole2 > [(match_scratch:SWI48 0 "r") > - (set (match_operand:SWI48 1 "memory_operand" "") (const_int 0)) > - (set (match_operand:SWI48 2 "memory_operand" "") (const_int 0)) > - (set (match_operand:SWI48 3 "memory_operand" "") (const_int 0)) > - (set (match_operand:SWI48 4 "memory_operand" "") (const_int 0))] > + (set (match_operand:SWI48 1 "memory_operand") (const_int 0)) > + (set (match_operand:SWI48 2 "memory_operand") (const_int 0)) > + (set (match_operand:SWI48 3 "memory_operand") (const_int 0)) > + (set (match_operand:SWI48 4 "memory_operand") (const_int 0))] > "optimize_insn_for_size_p () && peep2_regno_dead_p (0, FLAGS_REG)" > - [(set (match_dup 1) (match_dup 0)) > - (set (match_dup 2) (match_dup 0)) > - (set (match_dup 3) (match_dup 0)) > - (set (match_dup 4) (match_dup 0))] > + [(const_int 0)] > { > ix86_expand_clear (operands[0]); > + emit_move_insn (operands[1], operands[0]); > + emit_move_insn (operands[2], operands[0]); > + emit_move_insn (operands[3], operands[0]); > + ix86_last_zero_store_uid > + = INSN_UID (emit_move_insn (operands[4], operands[0])); > + DONE; > }) > > (define_peephole2 > [(match_scratch:SWI48 0 "r") > - (set (match_operand:SWI48 1 "memory_operand" "") (const_int 0)) > - (set (match_operand:SWI48 2 "memory_operand" "") (const_int 0))] > + (set (match_operand:SWI48 1 "memory_operand") (const_int 0)) > + (set (match_operand:SWI48 2 "memory_operand") (const_int 0))] > "optimize_insn_for_size_p () && peep2_regno_dead_p (0, FLAGS_REG)" > - [(set (match_dup 1) (match_dup 0)) > - (set (match_dup 2) (match_dup 0))] > + [(const_int 0)] > { > ix86_expand_clear (operands[0]); > + emit_move_insn (operands[1], operands[0]); > + ix86_last_zero_store_uid > + = INSN_UID (emit_move_insn (operands[2], operands[0])); > + DONE; > }) > > (define_peephole2 > [(match_scratch:SWI48 0 "r") > - (set (match_operand:SWI48 1 "memory_operand" "") (const_int 0))] > + (set (match_operand:SWI48 1 "memory_operand") (const_int 0))] > "optimize_insn_for_size_p () && peep2_regno_dead_p (0, FLAGS_REG)" > - [(set (match_dup 1) (match_dup 0))] > + [(const_int 0)] > { > ix86_expand_clear (operands[0]); > + ix86_last_zero_store_uid > + = INSN_UID (emit_move_insn (operands[1], operands[0])); > + DONE; > +}) > + > +(define_peephole2 > + [(set (match_operand:SWI48 5 "memory_operand") > + (match_operand:SWI48 0 "general_reg_operand")) > + (set (match_operand:SWI48 1 "memory_operand") (const_int 0)) > + (set (match_operand:SWI48 2 "memory_operand") (const_int 0)) > + (set (match_operand:SWI48 3 "memory_operand") (const_int 0)) > + (set (match_operand:SWI48 4 "memory_operand") (const_int 0))] > + "optimize_insn_for_size_p () > + && INSN_UID (peep2_next_insn (0)) == ix86_last_zero_store_uid" > + [(const_int 0)] > +{ > + emit_move_insn (operands[5], operands[0]); > + emit_move_insn (operands[1], operands[0]); > + emit_move_insn (operands[2], operands[0]); > + emit_move_insn (operands[3], operands[0]); > + ix86_last_zero_store_uid > + = INSN_UID (emit_move_insn (operands[4], operands[0])); > + DONE; > +}) > + > +(define_peephole2 > + [(set (match_operand:SWI48 3 "memory_operand") > + (match_operand:SWI48 0 "general_reg_operand")) > + (set (match_operand:SWI48 1 "memory_operand") (const_int 0)) > + (set (match_operand:SWI48 2 "memory_operand") (const_int 0))] > + "optimize_insn_for_size_p () > + && INSN_UID (peep2_next_insn (0)) == ix86_last_zero_store_uid" > + [(const_int 0)] > +{ > + emit_move_insn (operands[3], operands[0]); > + emit_move_insn (operands[1], operands[0]); > + ix86_last_zero_store_uid > + = INSN_UID (emit_move_insn (operands[2], operands[0])); > + DONE; > +}) > + > +(define_peephole2 > + [(set (match_operand:SWI48 2 "memory_operand") > + (match_operand:SWI48 0 "general_reg_operand")) > + (set (match_operand:SWI48 1 "memory_operand") (const_int 0))] > + "optimize_insn_for_size_p () > + && INSN_UID (peep2_next_insn (0)) == ix86_last_zero_store_uid" > + [(const_int 0)] > +{ > + emit_move_insn (operands[2], operands[0]); > + ix86_last_zero_store_uid > + = INSN_UID (emit_move_insn (operands[1], operands[0])); > + DONE; > }) > > ;; Reload dislikes loading constants directly into class_likely_spilled > > > Jakub >