On Thu, Feb 13, 2025 at 9:31 AM H.J. Lu <hjl.to...@gmail.com> wrote:
>
> Don't assume that stack slots can only be accessed by stack or frame
> registers.  We first find all registers defined by stack or frame
> registers.  Then check memory accesses by such registers, including
> stack and frame registers.
>
> gcc/
>
> PR target/109780
> PR target/109093
> * config/i386/i386.cc (ix86_update_stack_alignment): New.
> (ix86_find_all_reg_use_1): Likewise.
> (ix86_find_all_reg_use): Likewise.
> (ix86_find_max_used_stack_alignment): Also check memory accesses
> from registers defined by stack or frame registers.
>
> gcc/testsuite/
>
> PR target/109780
> PR target/109093
> * g++.target/i386/pr109780-1.C: New test.
> * gcc.target/i386/pr109093-1.c: Likewise.
> * gcc.target/i386/pr109780-1.c: Likewise.
> * gcc.target/i386/pr109780-2.c: Likewise.
> * gcc.target/i386/pr109780-3.c: Likewise.

Some non-algorithmical changes below, otherwise LGTM. Please also get
someone to review dataflow infrastructure usage, I am not well versed
with it.

+/* Helper function for ix86_find_all_reg_use.  */
+
+static void
+ix86_find_all_reg_use_1 (rtx set, HARD_REG_SET &stack_slot_access,
+             auto_bitmap &worklist)
+{
+  rtx src = SET_SRC (set);
+  if (MEM_P (src))

Also reject assignment from CONST_SCALAR_INT?

+    return;
+
+  rtx dest = SET_DEST (set);
+  if (!REG_P (dest))
+    return;

Can we switch these two so the test for REG_P (dest) will be first? We
are not interested in anything that doesn't assign to a register.

+/* Find all registers defined with REG.  */
+
+static void
+ix86_find_all_reg_use (HARD_REG_SET &stack_slot_access,
+               unsigned int reg, auto_bitmap &worklist)
+{
+  for (df_ref ref = DF_REG_USE_CHAIN (reg);
+       ref != NULL;
+       ref = DF_REF_NEXT_REG (ref))
+    {
+      if (DF_REF_IS_ARTIFICIAL (ref))
+    continue;
+
+      rtx_insn *insn = DF_REF_INSN (ref);
+      if (!NONDEBUG_INSN_P (insn))
+    continue;

Here we pass only NONJUMP_INSN_P (X) || JUMP_P (X) || CALL_P (X)

+      if (CALL_P (insn) || JUMP_P (insn))
+    continue;

And here remains only NONJUMP_INSN_P (X), so both above conditions
could be substituted with:

if (!NONJUMP_INSN_P (X))
  continue;
+
+      rtx set = single_set (insn);
+      if (set)
+    ix86_find_all_reg_use_1 (set, stack_slot_access, worklist);
+
+      rtx pat = PATTERN (insn);
+      if (GET_CODE (pat) != PARALLEL)
+    continue;
+
+      for (int i = 0; i < XVECLEN (pat, 0); i++)
+    {
+      rtx exp = XVECEXP (pat, 0, i);
+      switch (GET_CODE (exp))
+        {
+        case ASM_OPERANDS:
+        case CLOBBER:
+        case PREFETCH:
+        case USE:
+          break;
+        case UNSPEC:
+        case UNSPEC_VOLATILE:
+          for (int j = XVECLEN (exp, 0) - 1; j >= 0; j--)
+        {
+          rtx x = XVECEXP (exp, 0, j);
+          if (GET_CODE (x) == SET)
+            ix86_find_all_reg_use_1 (x, stack_slot_access,
+                         worklist);
+        }
+          break;
+        case SET:
+          ix86_find_all_reg_use_1 (exp, stack_slot_access,
+                       worklist);
+          break;
+        default:
+          debug_rtx (exp);

Stray debug remaining?

+  HARD_REG_SET stack_slot_access;
+  CLEAR_HARD_REG_SET (stack_slot_access);
+
+  /* Stack slot can be accessed by stack pointer, frame pointer or
+     registers defined by stack pointer or frame pointer.  */
+  auto_bitmap worklist;

Please put a line of vertical space here ...

+  add_to_hard_reg_set (&stack_slot_access, Pmode,
+               STACK_POINTER_REGNUM);
+  bitmap_set_bit (worklist, STACK_POINTER_REGNUM);

... here ...

+  if (frame_pointer_needed)
+    {
+      add_to_hard_reg_set (&stack_slot_access, Pmode,
+               HARD_FRAME_POINTER_REGNUM);
+      bitmap_set_bit (worklist, HARD_FRAME_POINTER_REGNUM);
+    }

... here ...

+  unsigned int reg;

... here ...

+  do
+    {
+      reg = bitmap_clear_first_set_bit (worklist);
+      ix86_find_all_reg_use (stack_slot_access, reg, worklist);
+    }
+  while (!bitmap_empty_p (worklist));
+
+  hard_reg_set_iterator hrsi;

... here ...

+  EXECUTE_IF_SET_IN_HARD_REG_SET (stack_slot_access, 0, reg, hrsi)
+    for (df_ref ref = DF_REG_USE_CHAIN (reg);
+     ref != NULL;
+     ref = DF_REF_NEXT_REG (ref))
+      {
+    if (DF_REF_IS_ARTIFICIAL (ref))
+      continue;
+
+    rtx_insn *insn = DF_REF_INSN (ref);

... and here.

+    if (!NONDEBUG_INSN_P (insn))

!NONJUMP_INSN_P ?

+      continue;

Also some vertical space here.

+    note_stores (insn, ix86_update_stack_alignment,
+             &stack_alignment);
+      }
 }

diff --git a/gcc/testsuite/gcc.target/i386/pr109093-1.c
b/gcc/testsuite/gcc.target/i386/pr109093-1.c
new file mode 100644
index 00000000000..0459d1947f9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr109093-1.c
@@ -0,0 +1,39 @@
+/* { dg-do run }  */
+/* { dg-options "-O2 -mavx2 -mtune=znver1
-ftrivial-auto-var-init=zero -fno-stack-protector" } */
+

Please use

/* { dg-do run { target avx2_runtime } } */

+{
+  __builtin_cpu_init ();
+
+  if (__builtin_cpu_supports ("avx2"))
+    run ();
+

And remove the above code from the test.

diff --git a/gcc/testsuite/gcc.target/i386/pr109780-3.c
b/gcc/testsuite/gcc.target/i386/pr109780-3.c
new file mode 100644
index 00000000000..f26fdd79af1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr109780-3.c
@@ -0,0 +1,52 @@
+/* { dg-do run }  */
+/* { dg-options "-O2 -mavx2 -mtune=znver1 -fno-stack-protector
-fno-stack-clash-protection" } */
+

Also here.

Thanks,
Uros.

Reply via email to