On Thu, Feb 13, 2025 at 9:31 AM H.J. Lu <hjl.to...@gmail.com> wrote: > > Don't assume that stack slots can only be accessed by stack or frame > registers. We first find all registers defined by stack or frame > registers. Then check memory accesses by such registers, including > stack and frame registers. > > gcc/ > > PR target/109780 > PR target/109093 > * config/i386/i386.cc (ix86_update_stack_alignment): New. > (ix86_find_all_reg_use_1): Likewise. > (ix86_find_all_reg_use): Likewise. > (ix86_find_max_used_stack_alignment): Also check memory accesses > from registers defined by stack or frame registers. > > gcc/testsuite/ > > PR target/109780 > PR target/109093 > * g++.target/i386/pr109780-1.C: New test. > * gcc.target/i386/pr109093-1.c: Likewise. > * gcc.target/i386/pr109780-1.c: Likewise. > * gcc.target/i386/pr109780-2.c: Likewise. > * gcc.target/i386/pr109780-3.c: Likewise.
Some non-algorithmical changes below, otherwise LGTM. Please also get someone to review dataflow infrastructure usage, I am not well versed with it. +/* Helper function for ix86_find_all_reg_use. */ + +static void +ix86_find_all_reg_use_1 (rtx set, HARD_REG_SET &stack_slot_access, + auto_bitmap &worklist) +{ + rtx src = SET_SRC (set); + if (MEM_P (src)) Also reject assignment from CONST_SCALAR_INT? + return; + + rtx dest = SET_DEST (set); + if (!REG_P (dest)) + return; Can we switch these two so the test for REG_P (dest) will be first? We are not interested in anything that doesn't assign to a register. +/* Find all registers defined with REG. */ + +static void +ix86_find_all_reg_use (HARD_REG_SET &stack_slot_access, + unsigned int reg, auto_bitmap &worklist) +{ + for (df_ref ref = DF_REG_USE_CHAIN (reg); + ref != NULL; + ref = DF_REF_NEXT_REG (ref)) + { + if (DF_REF_IS_ARTIFICIAL (ref)) + continue; + + rtx_insn *insn = DF_REF_INSN (ref); + if (!NONDEBUG_INSN_P (insn)) + continue; Here we pass only NONJUMP_INSN_P (X) || JUMP_P (X) || CALL_P (X) + if (CALL_P (insn) || JUMP_P (insn)) + continue; And here remains only NONJUMP_INSN_P (X), so both above conditions could be substituted with: if (!NONJUMP_INSN_P (X)) continue; + + rtx set = single_set (insn); + if (set) + ix86_find_all_reg_use_1 (set, stack_slot_access, worklist); + + rtx pat = PATTERN (insn); + if (GET_CODE (pat) != PARALLEL) + continue; + + for (int i = 0; i < XVECLEN (pat, 0); i++) + { + rtx exp = XVECEXP (pat, 0, i); + switch (GET_CODE (exp)) + { + case ASM_OPERANDS: + case CLOBBER: + case PREFETCH: + case USE: + break; + case UNSPEC: + case UNSPEC_VOLATILE: + for (int j = XVECLEN (exp, 0) - 1; j >= 0; j--) + { + rtx x = XVECEXP (exp, 0, j); + if (GET_CODE (x) == SET) + ix86_find_all_reg_use_1 (x, stack_slot_access, + worklist); + } + break; + case SET: + ix86_find_all_reg_use_1 (exp, stack_slot_access, + worklist); + break; + default: + debug_rtx (exp); Stray debug remaining? + HARD_REG_SET stack_slot_access; + CLEAR_HARD_REG_SET (stack_slot_access); + + /* Stack slot can be accessed by stack pointer, frame pointer or + registers defined by stack pointer or frame pointer. */ + auto_bitmap worklist; Please put a line of vertical space here ... + add_to_hard_reg_set (&stack_slot_access, Pmode, + STACK_POINTER_REGNUM); + bitmap_set_bit (worklist, STACK_POINTER_REGNUM); ... here ... + if (frame_pointer_needed) + { + add_to_hard_reg_set (&stack_slot_access, Pmode, + HARD_FRAME_POINTER_REGNUM); + bitmap_set_bit (worklist, HARD_FRAME_POINTER_REGNUM); + } ... here ... + unsigned int reg; ... here ... + do + { + reg = bitmap_clear_first_set_bit (worklist); + ix86_find_all_reg_use (stack_slot_access, reg, worklist); + } + while (!bitmap_empty_p (worklist)); + + hard_reg_set_iterator hrsi; ... here ... + EXECUTE_IF_SET_IN_HARD_REG_SET (stack_slot_access, 0, reg, hrsi) + for (df_ref ref = DF_REG_USE_CHAIN (reg); + ref != NULL; + ref = DF_REF_NEXT_REG (ref)) + { + if (DF_REF_IS_ARTIFICIAL (ref)) + continue; + + rtx_insn *insn = DF_REF_INSN (ref); ... and here. + if (!NONDEBUG_INSN_P (insn)) !NONJUMP_INSN_P ? + continue; Also some vertical space here. + note_stores (insn, ix86_update_stack_alignment, + &stack_alignment); + } } diff --git a/gcc/testsuite/gcc.target/i386/pr109093-1.c b/gcc/testsuite/gcc.target/i386/pr109093-1.c new file mode 100644 index 00000000000..0459d1947f9 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr109093-1.c @@ -0,0 +1,39 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx2 -mtune=znver1 -ftrivial-auto-var-init=zero -fno-stack-protector" } */ + Please use /* { dg-do run { target avx2_runtime } } */ +{ + __builtin_cpu_init (); + + if (__builtin_cpu_supports ("avx2")) + run (); + And remove the above code from the test. diff --git a/gcc/testsuite/gcc.target/i386/pr109780-3.c b/gcc/testsuite/gcc.target/i386/pr109780-3.c new file mode 100644 index 00000000000..f26fdd79af1 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr109780-3.c @@ -0,0 +1,52 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx2 -mtune=znver1 -fno-stack-protector -fno-stack-clash-protection" } */ + Also here. Thanks, Uros.