I was looking into why I couldn't trivially move cris-elf to
"use init_array". It appeared that it wasn't the hooks into
that machinery that went wrong, but that a compiler bug is
plaguing __libc_init_array. It's been there since at least
4.7-era, hiding under the covers of the __init_array being empty
(everything being in .init).
Looking into it, it seems that IRA is treating this insn:
(call_insn 16 14 17 4 (parallel [
(call (mem:QI (mem/f:SI (post_inc:SI (reg:SI 27 [ ivtmp.7 ])) [1
MEM[base: _8, offset: 0B]+0 S4 A8]) [0 *_1 S1 A8])
(const_int 0 [0]))
(clobber (reg:SI 16 srp))
]) "t.c":7:5 220 {*expanded_call_non_v32}
(expr_list:REG_INC (reg:SI 27 [ ivtmp.7 ])
(expr_list:REG_CALL_DECL (nil)
(nil)))
(nil))
...as if the read-part of the post-increment happens before the
call, and the write-part to happen after the call, supposedly
with the value magically kept unclobbered or treated as some
kind of return-value. Looking around, it seems only the VAX
port would also be affected; grepping around I see no other port
having a call instruction capable of loading a value indirectly,
with a side-effect.
Now, I'm ok with deliberately forbidding autoinc and other
side-effect constructs on call insns during register allocation
and will do the documentation legwork of that part (and the more
involved target-fixing) if there's consensus for that, but it
seems that for IRA the fix is as simple as follows.
LRA seems to have the same issue, but I have no way to reproduce
it there; I'll just have to watch out when I move the port to
LRA. I don't know if reload is affected, but I believe
autoincdec doesn't count as an output reload. (Please correct
me if I'm wrong! An output-reload on a call insn is not
allowed, says a comment in find_reloads, but AFAICS that's still
undocumented.)
So, is this ok? Regtested on cris-elf and x86_64-linux-gnu
(though the latter uses LRA). Note that this does *not* cause
the return-value for f3 and f4 in the test-case to be allocated
a call-saved register after the value.
gcc:
* lra-lives.c (process_bb_node_lives): Consider defs
for a call insn to be die before the call, not after.
--- gcc/ira-lives.c.orig Fri Apr 19 03:22:15 2019
+++ gcc/ira-lives.c Sun May 19 07:31:33 2019
@@ -1241,11 +1241,6 @@ process_bb_node_lives (ira_loop_tree_nod
preprocess_constraints (insn);
process_single_reg_class_operands (false, freq);
- /* See which defined values die here. */
- FOR_EACH_INSN_DEF (def, insn)
- if (!call_p || !DF_REF_FLAGS_IS_SET (def, DF_REF_MAY_CLOBBER))
- mark_ref_dead (def);
-
if (call_p)
{
/* Try to find a SET in the CALL_INSN_FUNCTION_USAGE, and from
@@ -1308,6 +1303,17 @@ process_bb_node_lives (ira_loop_tree_nod
ALLOCNO_CHEAP_CALLS_CROSSED_NUM (a)++;
}
}
+
+ /* See which defined values die here. Note that we include
+ the call insn in the lifetimes of these values, so we don't
+ mistakenly consider, for e.g. an addressing mode with a
+ side-effect like a post-increment fetching the address,
+ that the use happens before the call, and the def to happen
+ after the call: we believe both to happen before the actual
+ call. (We don't handle return-values here.) */
+ FOR_EACH_INSN_DEF (def, insn)
+ if (!call_p || !DF_REF_FLAGS_IS_SET (def, DF_REF_MAY_CLOBBER))
+ mark_ref_dead (def);
make_early_clobber_and_input_conflicts ();
For the test-suite, a few variants on the same theme, where the
functions called are found to clobber the pre-patch chosen
call-clobbered register:
gcc/testsuite:
* gcc.dg/torture/pr90553.c: New test.
--- /dev/null Tue Oct 29 15:57:07 2002
+++ gcc/testsuite/gcc.dg/torture/pr90553.c Mon May 20 05:26:43 2019
@@ -0,0 +1,128 @@
+/* { dg-do run } */
+
+__attribute__((__noipa__))
+void f1(int x, void (*p1 []) (int, int))
+{
+ int i;
+ for (i = 0; i < x; i++)
+ p1[i](42, 666);
+}
+
+int z1_called = 0;
+int w1_called = 0;
+
+__attribute__((__noipa__))
+void z1(int a, int b)
+{
+ if (w1_called || z1_called)
+ __builtin_abort();
+ z1_called++;
+}
+
+__attribute__((__noipa__))
+void w1(int a, int b)
+{
+ if (w1_called || !z1_called)
+ __builtin_abort();
+ w1_called++;
+}
+
+int z2_called = 0;
+int w2_called = 0;
+
+__attribute__((__noipa__))
+void z2(void)
+{
+ if (w2_called || z2_called)
+ __builtin_abort();
+ z2_called++;
+}
+
+__attribute__((__noipa__))
+void w2(void)
+{
+ if (w2_called || !z2_called)
+ __builtin_abort();
+ w2_called++;
+}
+
+void (*p2 []) () = { w2, z2 };
+
+__attribute__((__noipa__))
+void f2(int x)
+{
+ void (**q) (void) = p2 + x;
+ int i;
+ for (i = 0; i < x; i++)
+ (*(--q))();
+}
+
+__attribute__((__noipa__))
+void f3(int x, int (*p3 []) (int))
+{
+ int i;
+ int next = x;
+ for (i = 0; i < x; i++)
+ next = p3[i](next);
+}
+
+int z3_called = 0;
+int w3_called = 0;
+
+__attribute__((__noipa__))
+int z3(int a)
+{
+ if (w3_called || z3_called || a != 2)
+ __builtin_abort();
+ z3_called++;
+ return 42;
+}
+
+__attribute__((__noipa__))
+int w3(int a)
+{
+ if (w3_called || !z3_called || a != 42)
+ __builtin_abort();
+ w3_called++;
+ return 4096;
+}
+
+int (*p4 []) (int) = { z3, w3 };
+
+__attribute__((__noipa__))
+void f4(int x)
+{
+ int (**q) (int) = p4;
+ int (**r) (int) = p4 + x;
+
+ int next = x;
+ for (; q < r; q++)
+ next = (*q)(next);
+}
+
+int main(void)
+{
+ static int (*p3 []) (int) = { z3, w3 };
+
+ static void (*p1 []) (int, int) = { z1, w1 };
+
+ f1(2, p1);
+ if (z1_called != 1 || w1_called != 1)
+ __builtin_abort();
+
+ f2(2);
+ if (z2_called != 1 || w2_called != 1)
+ __builtin_abort();
+
+ f3(2, p3);
+ if (z3_called != 1 || w3_called != 1)
+ __builtin_abort();
+
+ z3_called = 0;
+ w3_called = 0;
+ f4(2);
+ if (z3_called != 1 || w3_called != 1)
+ __builtin_abort();
+
+ __builtin_exit(0);
+}
brgds, H-P