This patch addresses an issue when compiling the MMA optimized DGEMM kernel
in OpenBLAS.  The MMA code uses all 8 accumulators, which overlap all vs0-vs31
vector registers.  Current trunk assigns one of the normal vector inputs to
one of the MMA instructions, which forces us to spill one of the accumulators
to memory, leading to poor performance.  The solution here is to replace the
"wa" constraints for the vector input operands in the MMA instruction patterns
with "v,?d" so that we disparage using vs0-vs31 and prefer using the altivec
registers vs32-vs63 instead, which fixes the dgemm performance issue.

This passed bootstrap and regtesting with no regressions on powerpc64le-linux.
Ok for trunk and after a few days of burn-in to the GCC12 release branch?

Technically, the same issue exists in GCC11 and GCC10, but the RA
assignment is OK with the current code, so unless/until we have a
test case that exhibits the issue, I'm only asking for a backport to
GCC12 which does show the performance problem.

Peter


gcc/
        PR target/105556
        * config/rs6000/mma.md (mma_<vv>, mma_<avv>, mma_<pv>, mma_<apv>,
        mma_<vvi4i4i8>, mma_<avvi4i4i8>, mma_<vvi4i4i2>, mma_<avvi4i4i2>,
        mma_<vvi4i4>, mma_<avvi4i4>, mma_<pvi4i2>, mma_<apvi4i2>,
        mma_<vvi4i4i4>, mma_<avvi4i4i4>): Replace "wa" constraint with "v,?d".

diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
index 907c9d6d516..9c9920870e4 100644
--- a/gcc/config/rs6000/mma.md
+++ b/gcc/config/rs6000/mma.md
@@ -490,50 +490,50 @@ (define_insn "mma_xxsetaccz"
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<vv>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
-       (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa")
-                   (match_operand:V16QI 2 "vsx_register_operand" "wa")]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d,&d,&d")
+       (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,v,?d,?d")
+                   (match_operand:V16QI 2 "vsx_register_operand" "v,?d,v,?d")]
                    MMA_VV))]
   "TARGET_MMA"
   "<vv> %A0,%x1,%x2"
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<avv>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
-       (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
-                   (match_operand:V16QI 2 "vsx_register_operand" "wa")
-                   (match_operand:V16QI 3 "vsx_register_operand" "wa")]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d,&d,&d")
+       (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0,0,0")
+                   (match_operand:V16QI 2 "vsx_register_operand" "v,v,?d,?d")
+                   (match_operand:V16QI 3 "vsx_register_operand" "v,?d,v,?d")]
                    MMA_AVV))]
   "TARGET_MMA"
   "<avv> %A0,%x2,%x3"
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<pv>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
-       (unspec:XO [(match_operand:OO 1 "vsx_register_operand" "wa")
-                   (match_operand:V16QI 2 "vsx_register_operand" "wa")]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d,&d,&d")
+       (unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,v,?d,?d")
+                   (match_operand:V16QI 2 "vsx_register_operand" "v,?d,v,?d")]
                    MMA_PV))]
   "TARGET_MMA"
   "<pv> %A0,%x1,%x2"
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<apv>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
-       (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
-                   (match_operand:OO 2 "vsx_register_operand" "wa")
-                   (match_operand:V16QI 3 "vsx_register_operand" "wa")]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d,&d,&d")
+       (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0,0,0")
+                   (match_operand:OO 2 "vsx_register_operand" "v,v,?d,?d")
+                   (match_operand:V16QI 3 "vsx_register_operand" "v,?d,v,?d")]
                    MMA_APV))]
   "TARGET_MMA"
   "<apv> %A0,%x2,%x3"
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<vvi4i4i8>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
-       (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa")
-                   (match_operand:V16QI 2 "vsx_register_operand" "wa")
-                   (match_operand:SI 3 "const_0_to_15_operand" "n")
-                   (match_operand:SI 4 "const_0_to_15_operand" "n")
-                   (match_operand:SI 5 "u8bit_cint_operand" "n")]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d,&d,&d")
+       (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,v,?d,?d")
+                   (match_operand:V16QI 2 "vsx_register_operand" "v,?d,v,?d")
+                   (match_operand:SI 3 "const_0_to_15_operand" "n,n,n,n")
+                   (match_operand:SI 4 "const_0_to_15_operand" "n,n,n,n")
+                   (match_operand:SI 5 "u8bit_cint_operand" "n,n,n,n")]
                    MMA_VVI4I4I8))]
   "TARGET_MMA"
   "<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5"
@@ -541,13 +541,13 @@ (define_insn "mma_<vvi4i4i8>"
    (set_attr "prefixed" "yes")])
 
 (define_insn "mma_<avvi4i4i8>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
-       (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
-                   (match_operand:V16QI 2 "vsx_register_operand" "wa")
-                   (match_operand:V16QI 3 "vsx_register_operand" "wa")
-                   (match_operand:SI 4 "const_0_to_15_operand" "n")
-                   (match_operand:SI 5 "const_0_to_15_operand" "n")
-                   (match_operand:SI 6 "u8bit_cint_operand" "n")]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d,&d,&d")
+       (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0,0,0")
+                   (match_operand:V16QI 2 "vsx_register_operand" "v,v,?d,?d")
+                   (match_operand:V16QI 3 "vsx_register_operand" "v,?d,v,?d")
+                   (match_operand:SI 4 "const_0_to_15_operand" "n,n,n,n")
+                   (match_operand:SI 5 "const_0_to_15_operand" "n,n,n,n")
+                   (match_operand:SI 6 "u8bit_cint_operand" "n,n,n,n")]
                    MMA_AVVI4I4I8))]
   "TARGET_MMA"
   "<avvi4i4i8> %A0,%x2,%x3,%4,%5,%6"
@@ -555,12 +555,12 @@ (define_insn "mma_<avvi4i4i8>"
    (set_attr "prefixed" "yes")])
 
 (define_insn "mma_<vvi4i4i2>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
-       (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa")
-                   (match_operand:V16QI 2 "vsx_register_operand" "wa")
-                   (match_operand:SI 3 "const_0_to_15_operand" "n")
-                   (match_operand:SI 4 "const_0_to_15_operand" "n")
-                   (match_operand:SI 5 "const_0_to_3_operand" "n")]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d,&d,&d")
+       (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,v,?d,?d")
+                   (match_operand:V16QI 2 "vsx_register_operand" "v,?d,v,?d")
+                   (match_operand:SI 3 "const_0_to_15_operand" "n,n,n,n")
+                   (match_operand:SI 4 "const_0_to_15_operand" "n,n,n,n")
+                   (match_operand:SI 5 "const_0_to_3_operand" "n,n,n,n")]
                    MMA_VVI4I4I2))]
   "TARGET_MMA"
   "<vvi4i4i2> %A0,%x1,%x2,%3,%4,%5"
@@ -568,13 +568,13 @@ (define_insn "mma_<vvi4i4i2>"
    (set_attr "prefixed" "yes")])
 
 (define_insn "mma_<avvi4i4i2>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
-       (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
-                   (match_operand:V16QI 2 "vsx_register_operand" "wa")
-                   (match_operand:V16QI 3 "vsx_register_operand" "wa")
-                   (match_operand:SI 4 "const_0_to_15_operand" "n")
-                   (match_operand:SI 5 "const_0_to_15_operand" "n")
-                   (match_operand:SI 6 "const_0_to_3_operand" "n")]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d,&d,&d")
+       (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0,0,0")
+                   (match_operand:V16QI 2 "vsx_register_operand" "v,v,?d,?d")
+                   (match_operand:V16QI 3 "vsx_register_operand" "v,?d,v,?d")
+                   (match_operand:SI 4 "const_0_to_15_operand" "n,n,n,n")
+                   (match_operand:SI 5 "const_0_to_15_operand" "n,n,n,n")
+                   (match_operand:SI 6 "const_0_to_3_operand" "n,n,n,n")]
                    MMA_AVVI4I4I2))]
   "TARGET_MMA"
   "<avvi4i4i2> %A0,%x2,%x3,%4,%5,%6"
@@ -582,11 +582,11 @@ (define_insn "mma_<avvi4i4i2>"
    (set_attr "prefixed" "yes")])
 
 (define_insn "mma_<vvi4i4>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
-       (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa")
-                   (match_operand:V16QI 2 "vsx_register_operand" "wa")
-                   (match_operand:SI 3 "const_0_to_15_operand" "n")
-                   (match_operand:SI 4 "const_0_to_15_operand" "n")]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d,&d,&d")
+       (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,v,?d,?d")
+                   (match_operand:V16QI 2 "vsx_register_operand" "v,?d,v,?d")
+                   (match_operand:SI 3 "const_0_to_15_operand" "n,n,n,n")
+                   (match_operand:SI 4 "const_0_to_15_operand" "n,n,n,n")]
                    MMA_VVI4I4))]
   "TARGET_MMA"
   "<vvi4i4> %A0,%x1,%x2,%3,%4"
@@ -594,12 +594,12 @@ (define_insn "mma_<vvi4i4>"
    (set_attr "prefixed" "yes")])
 
 (define_insn "mma_<avvi4i4>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
-       (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
-                   (match_operand:V16QI 2 "vsx_register_operand" "wa")
-                   (match_operand:V16QI 3 "vsx_register_operand" "wa")
-                   (match_operand:SI 4 "const_0_to_15_operand" "n")
-                   (match_operand:SI 5 "const_0_to_15_operand" "n")]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d,&d,&d")
+       (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0,0,0")
+                   (match_operand:V16QI 2 "vsx_register_operand" "v,v,?d,?d")
+                   (match_operand:V16QI 3 "vsx_register_operand" "v,?d,v,?d")
+                   (match_operand:SI 4 "const_0_to_15_operand" "n,n,n,n")
+                   (match_operand:SI 5 "const_0_to_15_operand" "n,n,n,n")]
                    MMA_AVVI4I4))]
   "TARGET_MMA"
   "<avvi4i4> %A0,%x2,%x3,%4,%5"
@@ -607,11 +607,11 @@ (define_insn "mma_<avvi4i4>"
    (set_attr "prefixed" "yes")])
 
 (define_insn "mma_<pvi4i2>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
-       (unspec:XO [(match_operand:OO 1 "vsx_register_operand" "wa")
-                   (match_operand:V16QI 2 "vsx_register_operand" "wa")
-                   (match_operand:SI 3 "const_0_to_15_operand" "n")
-                   (match_operand:SI 4 "const_0_to_3_operand" "n")]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d,&d,&d")
+       (unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,v,?d,?d")
+                   (match_operand:V16QI 2 "vsx_register_operand" "v,?d,v,?d")
+                   (match_operand:SI 3 "const_0_to_15_operand" "n,n,n,n")
+                   (match_operand:SI 4 "const_0_to_3_operand" "n,n,n,n")]
                    MMA_PVI4I2))]
   "TARGET_MMA"
   "<pvi4i2> %A0,%x1,%x2,%3,%4"
@@ -619,12 +619,12 @@ (define_insn "mma_<pvi4i2>"
    (set_attr "prefixed" "yes")])
 
 (define_insn "mma_<apvi4i2>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
-       (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
-                   (match_operand:OO 2 "vsx_register_operand" "wa")
-                   (match_operand:V16QI 3 "vsx_register_operand" "wa")
-                   (match_operand:SI 4 "const_0_to_15_operand" "n")
-                   (match_operand:SI 5 "const_0_to_3_operand" "n")]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d,&d,&d")
+       (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0,0,0")
+                   (match_operand:OO 2 "vsx_register_operand" "v,v,?d,?d")
+                   (match_operand:V16QI 3 "vsx_register_operand" "v,?d,v,?d")
+                   (match_operand:SI 4 "const_0_to_15_operand" "n,n,n,n")
+                   (match_operand:SI 5 "const_0_to_3_operand" "n,n,n,n")]
                    MMA_APVI4I2))]
   "TARGET_MMA"
   "<apvi4i2> %A0,%x2,%x3,%4,%5"
@@ -632,12 +632,12 @@ (define_insn "mma_<apvi4i2>"
    (set_attr "prefixed" "yes")])
 
 (define_insn "mma_<vvi4i4i4>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
-       (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa")
-                   (match_operand:V16QI 2 "vsx_register_operand" "wa")
-                   (match_operand:SI 3 "const_0_to_15_operand" "n")
-                   (match_operand:SI 4 "const_0_to_15_operand" "n")
-                   (match_operand:SI 5 "const_0_to_15_operand" "n")]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d,&d,&d")
+       (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,v,?d,?d")
+                   (match_operand:V16QI 2 "vsx_register_operand" "v,?d,v,?d")
+                   (match_operand:SI 3 "const_0_to_15_operand" "n,n,n,n")
+                   (match_operand:SI 4 "const_0_to_15_operand" "n,n,n,n")
+                   (match_operand:SI 5 "const_0_to_15_operand" "n,n,n,n")]
                    MMA_VVI4I4I4))]
   "TARGET_MMA"
   "<vvi4i4i4> %A0,%x1,%x2,%3,%4,%5"
@@ -645,13 +645,13 @@ (define_insn "mma_<vvi4i4i4>"
    (set_attr "prefixed" "yes")])
 
 (define_insn "mma_<avvi4i4i4>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
-       (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
-                   (match_operand:V16QI 2 "vsx_register_operand" "wa")
-                   (match_operand:V16QI 3 "vsx_register_operand" "wa")
-                   (match_operand:SI 4 "const_0_to_15_operand" "n")
-                   (match_operand:SI 5 "const_0_to_15_operand" "n")
-                   (match_operand:SI 6 "const_0_to_15_operand" "n")]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d,&d,&d")
+       (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0,0,0")
+                   (match_operand:V16QI 2 "vsx_register_operand" "v,v,?d,?d")
+                   (match_operand:V16QI 3 "vsx_register_operand" "v,?d,v,?d")
+                   (match_operand:SI 4 "const_0_to_15_operand" "n,n,n,n")
+                   (match_operand:SI 5 "const_0_to_15_operand" "n,n,n,n")
+                   (match_operand:SI 6 "const_0_to_15_operand" "n,n,n,n")]
                    MMA_AVVI4I4I4))]
   "TARGET_MMA"
   "<avvi4i4i4> %A0,%x2,%x3,%4,%5,%6"

Reply via email to