Since we may delete stores that are found to be redundant in
postreload cse, we need cselib to invalidate argument stores at calls,
and to that end we need CALL_INSN_FUNCTION_USAGE to mention all MEM
stack space that may be legitimately modified by a const/pure callee,
i.e., all arguments passed to it on the stack.

When ACCUMULATE_OUTGOING_ARGS, each on-stack argument gets its own
usage information, but when it's not, each argument is pushed
incrementally, without precomputed stack slots.

Since we only mentioned such precomputed stack slots in
CALL_INSN_FUNCTION_USAGE, non-ACCUMULATE_OUTGOING_ARGS configurations
miss the stack usage data, and cselib fails to invalidate the stores.

Stores in such slots are anonymous, and they often invalidate other
anonymous slots, even part of the same object, but as the testcase
demonstrates, we may occasionally be unlucky that consecutive calls
have the stores to multi-word objects reordered by scheduling in such
a way that the last store for the first call survives the call in the
cselib tables, and then it is found to be redundant with the first
store for the subsequent call, as in the testcase.

So, if we haven't preallocated outgoing arguments for a call (which
would give us preassigned stack slots), and we have used any stack
space, add function call usage covering the entire stack range where
arguments were stored.

Regstrapped on x86_64-linux-gnu.  Ok to install?


for  gcc/ChangeLog

        PR rtl-optimization/122947
        * calls.cc (expand_call): Add stack function usage in
        non-ACCUMULATE_OUTGOING_ARGS configurations.

for  gcc/testsuite/ChangeLog

        PR rtl-optimization/122947
        * gcc.dg/pr122947.c: New.
---
 gcc/calls.cc                    |   29 ++++++++++++++++++++++++-
 gcc/testsuite/gcc.dg/pr122947.c |   45 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 73 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.dg/pr122947.c

diff --git a/gcc/calls.cc b/gcc/calls.cc
index bb8a6d09f8257..85d6ea4134373 100644
--- a/gcc/calls.cc
+++ b/gcc/calls.cc
@@ -3579,7 +3579,8 @@ expand_call (tree exp, rtx target, int ignore)
                      && check_sibcall_argument_overlap (before_arg,
                                                         &args[i], true)))
                sibcall_failure = true;
-             }
+             gcc_checking_assert (!args[i].stack || argblock);
+           }
 
          if (args[i].stack)
            call_fusage
@@ -3676,6 +3677,32 @@ expand_call (tree exp, rtx target, int ignore)
          && !must_preallocate && reg_parm_stack_space > 0)
        anti_adjust_stack (GEN_INT (reg_parm_stack_space));
 
+      /* Cover pushed arguments with call usage, so that cselib knows to
+        invalidate the stores in them at the call insn.  */
+      if (pass == 1 && !argblock
+         && (maybe_ne (adjusted_args_size.constant, 0)
+             || adjusted_args_size.var))
+       {
+         rtx addr = virtual_outgoing_args_rtx;
+         poly_int64 size = adjusted_args_size.constant;
+         if (!STACK_GROWS_DOWNWARD)
+           {
+             if (adjusted_args_size.var)
+               /* ??? We can't compute the exact base address.  */
+               addr = gen_rtx_PLUS (GET_MODE (addr), addr,
+                                    gen_rtx_SCRATCH (GET_MODE (addr)));
+             else
+               addr = plus_constant (GET_MODE (addr), addr, -size);
+           }
+         rtx fu = gen_rtx_MEM (BLKmode, addr);
+         if (adjusted_args_size.var == 0)
+           set_mem_size (fu, size);
+         call_fusage
+           = gen_rtx_EXPR_LIST (BLKmode,
+                                gen_rtx_USE (VOIDmode, fu),
+                                call_fusage);
+       }
+
       /* Pass the function the address in which to return a
         structure value.  */
       if (pass != 0 && structure_value_addr && ! structure_value_addr_parm)
diff --git a/gcc/testsuite/gcc.dg/pr122947.c b/gcc/testsuite/gcc.dg/pr122947.c
new file mode 100644
index 0000000000000..945a61a4c177f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr122947.c
@@ -0,0 +1,45 @@
+/* PR rtl-optimization/122947 based on PR 117239 */
+/* { dg-do run } */
+/* { dg-options "-fno-inline -O2" } */
+/* { dg-additional-options "-fschedule-insns -mno-accumulate-outgoing-args" { 
target x86 } } */
+
+int c = 1;
+
+struct A {
+  int e, f, g, h;
+  short i;
+  int j;
+};
+
+void
+bar (int x, struct A y)
+{
+  if (y.j == 1)
+    c = 0;
+}
+
+/* Simplest pure way to force baz's x.j back to memory.  
+   So simple that IPA "inlines" it, so we disable IPA and mark as pure.  */
+int __attribute__ ((noipa, pure))
+bad (struct A const *x)
+{
+  return x->j;
+}
+
+int
+baz (struct A x)
+{
+  x.j = 0;
+  return bad (&x);
+}
+
+int
+main ()
+{
+  struct A k = { 0, 0, 0, 0, 0, 1 };
+  int d = baz (k);
+  bar (0, k);
+  if (c + d != 0)
+    __builtin_abort ();
+  return 0;
+}


-- 
Alexandre Oliva, happy hacker            https://blog.lx.oliva.nom.br/
Free Software Activist     FSFLA co-founder     GNU Toolchain Engineer
More tolerance and less prejudice are key for inclusion and diversity.
Excluding neuro-others for not behaving ""normal"" is *not* inclusive!

Reply via email to