On Fri, Feb 14, 2025 at 2:11 PM Uros Bizjak <ubiz...@gmail.com> wrote: > > On Fri, Feb 14, 2025 at 4:56 AM H.J. Lu <hjl.to...@gmail.com> wrote: > > > > On Thu, Feb 13, 2025 at 5:17 PM Uros Bizjak <ubiz...@gmail.com> wrote: > > > > > > On Thu, Feb 13, 2025 at 9:31 AM H.J. Lu <hjl.to...@gmail.com> wrote: > > > > > > > > Don't assume that stack slots can only be accessed by stack or frame > > > > registers. We first find all registers defined by stack or frame > > > > registers. Then check memory accesses by such registers, including > > > > stack and frame registers. > > > > > > > > gcc/ > > > > > > > > PR target/109780 > > > > PR target/109093 > > > > * config/i386/i386.cc (ix86_update_stack_alignment): New. > > > > (ix86_find_all_reg_use_1): Likewise. > > > > (ix86_find_all_reg_use): Likewise. > > > > (ix86_find_max_used_stack_alignment): Also check memory accesses > > > > from registers defined by stack or frame registers. > > > > > > > > gcc/testsuite/ > > > > > > > > PR target/109780 > > > > PR target/109093 > > > > * g++.target/i386/pr109780-1.C: New test. > > > > * gcc.target/i386/pr109093-1.c: Likewise. > > > > * gcc.target/i386/pr109780-1.c: Likewise. > > > > * gcc.target/i386/pr109780-2.c: Likewise. > > > > * gcc.target/i386/pr109780-3.c: Likewise. > > > > > > Some non-algorithmical changes below, otherwise LGTM. Please also get > > > someone to review dataflow infrastructure usage, I am not well versed > > > with it. > > > > > > +/* Helper function for ix86_find_all_reg_use. */ > > > + > > > +static void > > > +ix86_find_all_reg_use_1 (rtx set, HARD_REG_SET &stack_slot_access, > > > + auto_bitmap &worklist) > > > +{ > > > + rtx src = SET_SRC (set); > > > + if (MEM_P (src)) > > > > > > Also reject assignment from CONST_SCALAR_INT? > > > > Done. > > > > > + return; > > > + > > > + rtx dest = SET_DEST (set); > > > + if (!REG_P (dest)) > > > + return; > > > > > > Can we switch these two so the test for REG_P (dest) will be first? We > > > are not interested in anything that doesn't assign to a register. > > > > Done. > > > > > +/* Find all registers defined with REG. */ > > > + > > > +static void > > > +ix86_find_all_reg_use (HARD_REG_SET &stack_slot_access, > > > + unsigned int reg, auto_bitmap &worklist) > > > +{ > > > + for (df_ref ref = DF_REG_USE_CHAIN (reg); > > > + ref != NULL; > > > + ref = DF_REF_NEXT_REG (ref)) > > > + { > > > + if (DF_REF_IS_ARTIFICIAL (ref)) > > > + continue; > > > + > > > + rtx_insn *insn = DF_REF_INSN (ref); > > > + if (!NONDEBUG_INSN_P (insn)) > > > + continue; > > > > > > Here we pass only NONJUMP_INSN_P (X) || JUMP_P (X) || CALL_P (X) > > > > > > + if (CALL_P (insn) || JUMP_P (insn)) > > > + continue; > > > > > > And here remains only NONJUMP_INSN_P (X), so both above conditions > > > could be substituted with: > > > > > > if (!NONJUMP_INSN_P (X)) > > > continue; > > > > Done. > > > > > + > > > + rtx set = single_set (insn); > > > + if (set) > > > + ix86_find_all_reg_use_1 (set, stack_slot_access, worklist); > > > + > > > + rtx pat = PATTERN (insn); > > > + if (GET_CODE (pat) != PARALLEL) > > > + continue; > > > + > > > + for (int i = 0; i < XVECLEN (pat, 0); i++) > > > + { > > > + rtx exp = XVECEXP (pat, 0, i); > > > + switch (GET_CODE (exp)) > > > + { > > > + case ASM_OPERANDS: > > > + case CLOBBER: > > > + case PREFETCH: > > > + case USE: > > > + break; > > > + case UNSPEC: > > > + case UNSPEC_VOLATILE: > > > + for (int j = XVECLEN (exp, 0) - 1; j >= 0; j--) > > > + { > > > + rtx x = XVECEXP (exp, 0, j); > > > + if (GET_CODE (x) == SET) > > > + ix86_find_all_reg_use_1 (x, stack_slot_access, > > > + worklist); > > > + } > > > + break; > > > + case SET: > > > + ix86_find_all_reg_use_1 (exp, stack_slot_access, > > > + worklist); > > > + break; > > > + default: > > > + debug_rtx (exp); > > > > > > Stray debug remaining?
Some more looking in the above loop... A comment in rtlanal.cc says that: --q-- /* All the other possibilities never store and can use a normal rtx walk. This includes: - USE - TRAP_IF - PREFETCH - UNSPEC - UNSPEC_VOLATILE. */ --/q-- So, based on the above, it is possible to considerably simplify the loop above, all the way to: --cut here-- for (int i = 0; i < XVECLEN (pat, 0); i++) { rtx exp = XVECEXP (pat, 0, i); if (GET_CODE (exp) == SET) ix86_find_all_reg_use_1 (exp, stack_slot_access, worklist); } --cut here-- We are only interested in SETs that set a register from a stack/frame pointer, not in their general uses. Attached patch implements this simplification. Uros.
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index fafd4a511a3..560e6525b56 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -8538,31 +8538,9 @@ ix86_find_all_reg_use (HARD_REG_SET &stack_slot_access, for (int i = 0; i < XVECLEN (pat, 0); i++) { rtx exp = XVECEXP (pat, 0, i); - switch (GET_CODE (exp)) - { - case ASM_OPERANDS: - case CLOBBER: - case PREFETCH: - case USE: - break; - case UNSPEC: - case UNSPEC_VOLATILE: - for (int j = XVECLEN (exp, 0) - 1; j >= 0; j--) - { - rtx x = XVECEXP (exp, 0, j); - if (GET_CODE (x) == SET) - ix86_find_all_reg_use_1 (x, stack_slot_access, - worklist); - } - break; - case SET: - ix86_find_all_reg_use_1 (exp, stack_slot_access, - worklist); - break; - default: - gcc_unreachable (); - break; - } + + if (GET_CODE (exp) == SET) + ix86_find_all_reg_use_1 (exp, stack_slot_access, worklist); } } }