PR102976 shows a test case where we generate wrong code when building
a vector pair from 2 vector registers.  The bug here is that with unlucky
register assignments, we can clobber one of the input operands before
we write both registers of the output operand.  The solution is to use
early-clobbers in the assemble pair and accumulator patterns.

This passed bootstrap and regtesting with no regressions and our
OpenBLAS team has confirmed it fixes the issues they reported.
Ok for mainline?

Ok for GCC 11 too after a few days on trunk?

Peter


gcc/
        PR target/102976
        * config/rs6000/mma.md (*vsx_assemble_pair): Add early-clobber for
        output operand.
        (*mma_assemble_acc): Likewise.

gcc/testsuite/
        PR target/102976
        * gcc.target/powerpc/pr102976.c: New test.

diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
index 1990a2183f6..f0ea99963f7 100644
--- a/gcc/config/rs6000/mma.md
+++ b/gcc/config/rs6000/mma.md
@@ -339,7 +339,7 @@ (define_expand "vsx_assemble_pair"
 })
 
 (define_insn_and_split "*vsx_assemble_pair"
-  [(set (match_operand:OO 0 "vsx_register_operand" "=wa")
+  [(set (match_operand:OO 0 "vsx_register_operand" "=&wa")
        (unspec:OO [(match_operand:V16QI 1 "mma_assemble_input_operand" "mwa")
                    (match_operand:V16QI 2 "mma_assemble_input_operand" "mwa")]
                    UNSPEC_MMA_ASSEMBLE))]
@@ -405,7 +405,7 @@ (define_expand "mma_assemble_acc"
 })
 
 (define_insn_and_split "*mma_assemble_acc"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=d")
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
        (unspec:XO [(match_operand:V16QI 1 "mma_assemble_input_operand" "mwa")
                    (match_operand:V16QI 2 "mma_assemble_input_operand" "mwa")
                    (match_operand:V16QI 3 "mma_assemble_input_operand" "mwa")
diff --git a/gcc/testsuite/gcc.target/powerpc/pr102976.c 
b/gcc/testsuite/gcc.target/powerpc/pr102976.c
new file mode 100644
index 00000000000..a8de8f056f1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr102976.c
@@ -0,0 +1,14 @@
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-O2 -mdejagnu-cpu=power10" } */
+
+#include <altivec.h>
+void
+bug (__vector_pair *dst)
+{
+  register vector unsigned char vec0 asm ("vs44");
+  register vector unsigned char vec1 asm ("vs32");
+  __builtin_vsx_build_pair (dst, vec0, vec1);
+}
+
+/* { dg-final { scan-assembler-times {xxlor[^,]*,44,44} 1 } } */
+/* { dg-final { scan-assembler-times {xxlor[^,]*,32,32} 1 } } */

Reply via email to