When we expand our MMA built-ins into gimple, we (me!) erroneously reused the accumulator memory reference for both the source input value as well as the destination output value. This led to a tree sharing error. The solution is to create separate memory references for the input and output values.
I'll note that the old build1 call is not actually needed. I believe I started using that first and then noticed that didn't work in call cases, so I used build_simple_mem_ref in the other cases. I should have noticed that build_simple_mem_ref worked for all cases. :-) This passed bootstrap and regtesting on powerpc64le-linux with no regressions. Raji has also done some runtime testing of her MMA tests using this patch and they all showed no regressions either. Ok for trunk and GCC 10 after a couple of days of burn in? Peter gcc/ PR target/96808 * config/rs6000/rs6000-call.c (rs6000_gimple_fold_mma_builtin): Do not reuse accumulator memory reference for source and destination accesses. gcc/testsuite/ PR target/96808 * gcc.target/powerpc/pr96808.c: New test. diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c index 26388762c5f..b6b45687aae 100644 --- a/gcc/config/rs6000/rs6000-call.c +++ b/gcc/config/rs6000/rs6000-call.c @@ -11471,12 +11471,8 @@ rs6000_gimple_fold_mma_builtin (gimple_stmt_iterator *gsi) /* Convert this built-in into an internal version that uses pass-by-value arguments. The internal built-in follows immediately after this one. */ new_decl = rs6000_builtin_decls[fncode + 1]; - tree lhs, mem, op[MAX_MMA_OPERANDS]; + tree lhs, op[MAX_MMA_OPERANDS]; tree acc = gimple_call_arg (stmt, 0); - if (TREE_CODE (acc) == PARM_DECL) - mem = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (acc)), acc); - else - mem = build_simple_mem_ref (acc); push_gimplify_context (true); if ((attr & RS6000_BTC_QUAD) != 0) @@ -11486,7 +11482,7 @@ rs6000_gimple_fold_mma_builtin (gimple_stmt_iterator *gsi) op[0] = make_ssa_name (vector_quad_type_node); for (unsigned i = 1; i < nopnds; i++) op[i] = gimple_call_arg (stmt, i); - gimplify_assign (op[0], mem, &new_seq); + gimplify_assign (op[0], build_simple_mem_ref (acc), &new_seq); } else { @@ -11536,7 +11532,7 @@ rs6000_gimple_fold_mma_builtin (gimple_stmt_iterator *gsi) lhs = make_ssa_name (vector_quad_type_node); gimple_call_set_lhs (new_call, lhs); gimple_seq_add_stmt (&new_seq, new_call); - gimplify_assign (mem, lhs, &new_seq); + gimplify_assign (build_simple_mem_ref (acc), lhs, &new_seq); pop_gimplify_context (NULL); gsi_replace_with_seq (gsi, new_seq, true); diff --git a/gcc/testsuite/gcc.target/powerpc/pr96808.c b/gcc/testsuite/gcc.target/powerpc/pr96808.c new file mode 100644 index 00000000000..2d44bd51b20 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr96808.c @@ -0,0 +1,59 @@ +/* PR target/96808 */ +/* { dg-do compile } */ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-O2 -mdejagnu-cpu=power10" } */ + +/* Verify we do not ICE on the tests below. */ + +void +old_ok (__vector_quad *dst, vector unsigned char vc) +{ + __vector_quad vq; + __builtin_mma_xxsetaccz(&vq); + __builtin_mma_xvf32gerpp(&vq, vc, vc); + *dst = vq; +} + +void +test0 (__vector_quad *dst, vector unsigned char vc) +{ + __vector_quad vq[2]; + __builtin_mma_xxsetaccz(&vq[1]); + __builtin_mma_xvf32gerpp(&vq[1], vc, vc); + *dst = vq[1]; +} + +void +test1 (__vector_quad *dst, vector unsigned char vc) +{ + __vector_quad vq[2][2]; + __builtin_mma_xxsetaccz(&vq[1][1]); + __builtin_mma_xvf32gerpp(&vq[1][1], vc, vc); + *dst = vq[1][1]; +} + +void +test2 (__vector_quad *dst, vector unsigned char vc) +{ + struct { + __vector_quad dummy; + __vector_quad acc; + } vq; + __builtin_mma_xxsetaccz(&vq.acc); + __builtin_mma_xvf32gerpp(&vq.acc, vc, vc); + *dst = vq.acc; +} + +void +test3 (__vector_quad *dst, vector unsigned char vc) +{ + __builtin_mma_xxsetaccz(&dst[1]); + __builtin_mma_xvf32gerpp(&dst[1], vc, vc); +} + +void +test4 (__vector_quad *dst[], vector unsigned char vc) +{ + __builtin_mma_xxsetaccz(&dst[1][2]); + __builtin_mma_xvf32gerpp(&dst[1][2], vc, vc); +}