On Fri, Feb 14, 2025 at 4:56 AM H.J. Lu <hjl.to...@gmail.com> wrote: > > On Thu, Feb 13, 2025 at 5:17 PM Uros Bizjak <ubiz...@gmail.com> wrote: > > > > On Thu, Feb 13, 2025 at 9:31 AM H.J. Lu <hjl.to...@gmail.com> wrote: > > > > > > Don't assume that stack slots can only be accessed by stack or frame > > > registers. We first find all registers defined by stack or frame > > > registers. Then check memory accesses by such registers, including > > > stack and frame registers. > > > > > > gcc/ > > > > > > PR target/109780 > > > PR target/109093 > > > * config/i386/i386.cc (ix86_update_stack_alignment): New. > > > (ix86_find_all_reg_use_1): Likewise. > > > (ix86_find_all_reg_use): Likewise. > > > (ix86_find_max_used_stack_alignment): Also check memory accesses > > > from registers defined by stack or frame registers. > > > > > > gcc/testsuite/ > > > > > > PR target/109780 > > > PR target/109093 > > > * g++.target/i386/pr109780-1.C: New test. > > > * gcc.target/i386/pr109093-1.c: Likewise. > > > * gcc.target/i386/pr109780-1.c: Likewise. > > > * gcc.target/i386/pr109780-2.c: Likewise. > > > * gcc.target/i386/pr109780-3.c: Likewise. > > > > Some non-algorithmical changes below, otherwise LGTM. Please also get > > someone to review dataflow infrastructure usage, I am not well versed > > with it. > > > > +/* Helper function for ix86_find_all_reg_use. */ > > + > > +static void > > +ix86_find_all_reg_use_1 (rtx set, HARD_REG_SET &stack_slot_access, > > + auto_bitmap &worklist) > > +{ > > + rtx src = SET_SRC (set); > > + if (MEM_P (src)) > > > > Also reject assignment from CONST_SCALAR_INT? > > Done. > > > + return; > > + > > + rtx dest = SET_DEST (set); > > + if (!REG_P (dest)) > > + return; > > > > Can we switch these two so the test for REG_P (dest) will be first? We > > are not interested in anything that doesn't assign to a register. > > Done. > > > +/* Find all registers defined with REG. */ > > + > > +static void > > +ix86_find_all_reg_use (HARD_REG_SET &stack_slot_access, > > + unsigned int reg, auto_bitmap &worklist) > > +{ > > + for (df_ref ref = DF_REG_USE_CHAIN (reg); > > + ref != NULL; > > + ref = DF_REF_NEXT_REG (ref)) > > + { > > + if (DF_REF_IS_ARTIFICIAL (ref)) > > + continue; > > + > > + rtx_insn *insn = DF_REF_INSN (ref); > > + if (!NONDEBUG_INSN_P (insn)) > > + continue; > > > > Here we pass only NONJUMP_INSN_P (X) || JUMP_P (X) || CALL_P (X) > > > > + if (CALL_P (insn) || JUMP_P (insn)) > > + continue; > > > > And here remains only NONJUMP_INSN_P (X), so both above conditions > > could be substituted with: > > > > if (!NONJUMP_INSN_P (X)) > > continue; > > Done. > > > + > > + rtx set = single_set (insn); > > + if (set) > > + ix86_find_all_reg_use_1 (set, stack_slot_access, worklist); > > + > > + rtx pat = PATTERN (insn); > > + if (GET_CODE (pat) != PARALLEL) > > + continue; > > + > > + for (int i = 0; i < XVECLEN (pat, 0); i++) > > + { > > + rtx exp = XVECEXP (pat, 0, i); > > + switch (GET_CODE (exp)) > > + { > > + case ASM_OPERANDS: > > + case CLOBBER: > > + case PREFETCH: > > + case USE: > > + break; > > + case UNSPEC: > > + case UNSPEC_VOLATILE: > > + for (int j = XVECLEN (exp, 0) - 1; j >= 0; j--) > > + { > > + rtx x = XVECEXP (exp, 0, j); > > + if (GET_CODE (x) == SET) > > + ix86_find_all_reg_use_1 (x, stack_slot_access, > > + worklist); > > + } > > + break; > > + case SET: > > + ix86_find_all_reg_use_1 (exp, stack_slot_access, > > + worklist); > > + break; > > + default: > > + debug_rtx (exp); > > > > Stray debug remaining? > > Removed. > > > + HARD_REG_SET stack_slot_access; > > + CLEAR_HARD_REG_SET (stack_slot_access); > > + > > + /* Stack slot can be accessed by stack pointer, frame pointer or > > + registers defined by stack pointer or frame pointer. */ > > + auto_bitmap worklist; > > > > Please put a line of vertical space here ... > > Done. > > > + add_to_hard_reg_set (&stack_slot_access, Pmode, > > + STACK_POINTER_REGNUM); > > + bitmap_set_bit (worklist, STACK_POINTER_REGNUM); > > > > ... here ... > > Done. > > > + if (frame_pointer_needed) > > + { > > + add_to_hard_reg_set (&stack_slot_access, Pmode, > > + HARD_FRAME_POINTER_REGNUM); > > + bitmap_set_bit (worklist, HARD_FRAME_POINTER_REGNUM); > > + } > > > > ... here ... > > > > Done. > > > + unsigned int reg; > > > > ... here ... > > Done. > > > + do > > + { > > + reg = bitmap_clear_first_set_bit (worklist); > > + ix86_find_all_reg_use (stack_slot_access, reg, worklist); > > + } > > + while (!bitmap_empty_p (worklist)); > > + > > + hard_reg_set_iterator hrsi; > > > > ... here ... > > Done. > > > + EXECUTE_IF_SET_IN_HARD_REG_SET (stack_slot_access, 0, reg, hrsi) > > + for (df_ref ref = DF_REG_USE_CHAIN (reg); > > + ref != NULL; > > + ref = DF_REF_NEXT_REG (ref)) > > + { > > + if (DF_REF_IS_ARTIFICIAL (ref)) > > + continue; > > + > > + rtx_insn *insn = DF_REF_INSN (ref); > > > > ... and here. > > > > Done. > > > + if (!NONDEBUG_INSN_P (insn)) > > > > !NONJUMP_INSN_P ? > > Changed. > > > + continue; > > > > Also some vertical space here. > > Done. > > > + note_stores (insn, ix86_update_stack_alignment, > > + &stack_alignment); > > + } > > } > > > > diff --git a/gcc/testsuite/gcc.target/i386/pr109093-1.c > > b/gcc/testsuite/gcc.target/i386/pr109093-1.c > > new file mode 100644 > > index 00000000000..0459d1947f9 > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/i386/pr109093-1.c > > @@ -0,0 +1,39 @@ > > +/* { dg-do run } */ > > +/* { dg-options "-O2 -mavx2 -mtune=znver1 > > -ftrivial-auto-var-init=zero -fno-stack-protector" } */ > > + > > > > Please use > > > > /* { dg-do run { target avx2_runtime } } */ > > > Done. > > > +{ > > + __builtin_cpu_init (); > > + > > + if (__builtin_cpu_supports ("avx2")) > > + run (); > > + > > > > And remove the above code from the test. > > > Done. > > > diff --git a/gcc/testsuite/gcc.target/i386/pr109780-3.c > > b/gcc/testsuite/gcc.target/i386/pr109780-3.c > > new file mode 100644 > > index 00000000000..f26fdd79af1 > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/i386/pr109780-3.c > > @@ -0,0 +1,52 @@ > > +/* { dg-do run } */ > > +/* { dg-options "-O2 -mavx2 -mtune=znver1 -fno-stack-protector > > -fno-stack-clash-protection" } */ > > + > > > > Also here. > > > > Done. > > Here is the v3 patch.
LGTM, modulo DF part that would need another pair of eyes. Thanks, Uros.