mmaplus2)] Make the MMA instructions support -mdense-math.

Michael Meissner via Gcc-cvs Tue, 03 Mar 2026 19:52:25 -0800

https://gcc.gnu.org/g:9f84f3f254e11735e2b34ef6cd61aa1ba6b13dbd


commit 9f84f3f254e11735e2b34ef6cd61aa1ba6b13dbd
Author: Michael Meissner <[email protected]>
Date:   Tue Mar 3 20:10:03 2026 -0500

    Make the MMA instructions support -mdense-math.
    
    This patch completes support for the dense math registes with 512-bit types.
    The MMA insns have been modfiied to use the 'wD' constraint and the
    accumulator_operand predicate.
    
    The insn (mma_xxsetaccz) that clears accumulators has been changed to be a
    normal unspec when -mdense-math.  If -mno-dense-math is in effect, the insn
    remains an unspec_volatile due to register constraints and the need to 
issue a
    de-prime operation.
    
    I added a comment in front of each insn to say which instructions are 
generated
    by the insns.
    
    I set -mcpu=future to turn on -mdense-math.
    
    I added 2 tests to the testsuite for -mdense-math support.
    
    A future path will add support for 1,024-bit dense registers.
    
    The patches have been tested on both little and big endian systems.  Can I 
check
    it into the master branch?
    
    This is version 4 of the patches.  The previous patches were:
    
     * https://gcc.gnu.org/pipermail/gcc-patches/2026-February/707452.html
     * https://gcc.gnu.org/pipermail/gcc-patches/2026-February/707453.html
     * https://gcc.gnu.org/pipermail/gcc-patches/2026-February/707454.html
     * https://gcc.gnu.org/pipermail/gcc-patches/2026-February/707455.html
     * https://gcc.gnu.org/pipermail/gcc-patches/2026-February/707456.html
    
    gcc/
    
    2026-03-03   Michael Meissner  <[email protected]>
    
            * config/rs6000/mma.md (UNSPEC_MMA_DMSETDMRZ): New unspec.
            (mma_xxsetaccz) Convert to being a define_expand that can handle 
both
            the original MMA support without dense math registers, and support 
with
            dense math register support.
            (mma_xxsetaccz_nodm): Rename original mma_xxsetaccz, and restrict 
this
            to when we do not have dense math registers.
            (mma_xxsetaccz_dm): New insn for clearing dense math registers.
            (mma_<acc>): Add support for dense registers.
            Document which instructions are generated by each insn.
            (mma_<vv>): Likewise.
            (mma_<avv>): Likewise.
            (mma_<pv>): Likewise.
            (mma_<apv>): Likewise.
            (mma_<vvi4i4i8>): Likewise.
            (mma_<avvi4i4i8>): Likewise.
            (mma_<vvi4i4i2>): Likewise.
            (mma_<avvi4i4i2): Likewise.
            (mma_<vvi4i4>): Likewise.
            (mma_<avvi4i4>): Likewise.
            (mma_<pvi4i2>): Likewise.
            (mma_<apvi4i2>): Likewise.
            (mma_<vvi4i4i4>): Likewise.
            (mma_<avvi4i4i4>): Likewise.
            * config/rs6000/rs6000-builtin.cc (rs6000_gimple_fold_mma_builtin): 
Do
            not issue a xxmfacc instruction if we support dense math registers.
            * config/rs6000/rs6000-cpu.def (FUTURE_MASKS_SERVER): If 
-mcpu=future,
            turn on -mdense-math.
            (POWERPC_MASKS): Mark -mdense-math as being set by -mcpu=<xxx> 
options.
    
    gcc/testsuite/
    
    2026-03-03   Michael Meissner  <[email protected]>
    
            * gcc.target/powerpc/mma-dm-1.c: New test.
            * gcc.target/powerpc/mma-dm-1.c: Likewise.
            * lib/target-supports.exp
            (check_effective_target_powerpc_dense_math_ok): New powerpc target
            support.

Diff:
---
 gcc/config/rs6000/mma.md                    | 128 ++++++++++++++++++++++------
 gcc/config/rs6000/rs6000-builtin.cc         |   5 +-
 gcc/config/rs6000/rs6000-cpus.def           |   2 +
 gcc/testsuite/gcc.target/powerpc/mma-dm-1.c |  67 +++++++++++++++
 gcc/testsuite/gcc.target/powerpc/mma-dm-2.c |  67 +++++++++++++++
 gcc/testsuite/lib/target-supports.exp       |  19 +++++
 6 files changed, 258 insertions(+), 30 deletions(-)

diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
index 1813adbecd31..f8793a4e72b3 100644
--- a/gcc/config/rs6000/mma.md
+++ b/gcc/config/rs6000/mma.md
@@ -90,6 +90,7 @@
    UNSPEC_MMA_XVI8GER4SPP
    UNSPEC_MMA_XXMFACC
    UNSPEC_MMA_XXMTACC
+   UNSPEC_MMA_DMSETDMRZ
   ])
 
 (define_c_enum "unspecv"
@@ -487,31 +488,68 @@
   DONE;
 })
 
-;; MMA instructions that do not use their accumulators as an input, still
-;; must not allow their vector operands to overlap the registers used by
-;; the accumulator.  We enforce this by marking the output as early clobber.
+;; If dense math registers are not available, MMA instructions that do
+;; not use their accumulators that overlap with FPR registers as an
+;; input, still must not allow their vector operands to overlap the
+;; registers used by the accumulator.  We enforce this by marking the
+;; output as early clobber.  The prime and de-prime instructions are
+;; not needed on systems with dense math registers.
 
 (define_insn "mma_<acc>"
   [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
        (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")]
                    MMA_ACC))]
-  "TARGET_MMA"
+  "TARGET_MMA && !TARGET_DENSE_MATH"
   "<acc> %A0"
   [(set_attr "type" "mma")])
 
 ;; We can't have integer constants in XOmode so we wrap this in an
-;; UNSPEC_VOLATILE.
+;; UNSPEC_VOLATILE.  If we have dense math registers, we can just use a normal
+;; UNSPEC instead of UNSPEC_VOLATILE.
 
-(define_insn "mma_xxsetaccz"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=d")
+(define_expand "mma_xxsetaccz"
+  [(set (match_operand:XO 0 "accumulator_operand")
        (unspec_volatile:XO [(const_int 0)]
                            UNSPECV_MMA_XXSETACCZ))]
   "TARGET_MMA"
+{
+  if (TARGET_DENSE_MATH)
+    {
+      emit_insn (gen_mma_xxsetaccz_dm (operands[0]));
+      DONE;
+    }
+})
+
+;; Clear accumulator without dense math registers
+(define_insn "*mma_xxsetaccz_nodm"
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=d")
+       (unspec_volatile:XO [(const_int 0)]
+                           UNSPECV_MMA_XXSETACCZ))]
+  "TARGET_MMA && !TARGET_DENSE_MATH"
   "xxsetaccz %A0"
   [(set_attr "type" "mma")])
 
+;; Clear accumulator when dense math registers are available.
+(define_insn "mma_xxsetaccz_dm"
+  [(set (match_operand:XO 0 "accumulator_operand" "=wD")
+       (unspec [(const_int 0)]
+               UNSPEC_MMA_DMSETDMRZ))]
+  "TARGET_DENSE_MATH"
+  "dmsetdmrz %A0"
+  [(set_attr "type" "mma")])
+
+
+;; MMA operations below.  If dense math registers are available, these
+;; operations will use the 8 accumultors which are separate registers.
+;; If dense math registers are not available, these operations will use
+;; accumulators that are overlaid on top of the FPR registers.
+
+;; Instructions:
+;; xvi4ger8   xvi8ger4 xvi16ger2 xvi16ger2s xvf16ger2
+;; xvbf16ger2 xvf32ger
+
 (define_insn "mma_<vv>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
        (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
                    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
                    MMA_VV))]
@@ -519,9 +557,15 @@
   "<vv> %A0,%x1,%x2"
   [(set_attr "type" "mma")])
 
+;; Instructions:
+;; xvi4ger8pp   xvi8ger4pp  xvi8ger4spp   xvi16ger2pp xvi16ger2spp
+;; xvf16ger2pp  xvf16ger2pn  xvf16ger2np  xvf16ger2nn xvbf16ger2pp
+;; xvbf16ger2pn xvbf16ger2np xvbf16ger2nn xvf32gerpp  xvf32gerpn
+;; xvf32gernp   xvf32gernn
+
 (define_insn "mma_<avv>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-       (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+       (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
                    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
                    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
                    MMA_AVV))]
@@ -529,8 +573,10 @@
   "<avv> %A0,%x2,%x3"
   [(set_attr "type" "mma")])
 
+;; Instruction: xvf64ger
+
 (define_insn "mma_<pv>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
        (unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa")
                    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
                    MMA_PV))]
@@ -538,9 +584,11 @@
   "<pv> %A0,%x1,%x2"
   [(set_attr "type" "mma")])
 
+;; Instructions: xvf64gerpp xvf64gerpn xvf64gernp xvf64gernn
+
 (define_insn "mma_<apv>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-       (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+       (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
                    (match_operand:OO 2 "vsx_register_operand" "v,?wa")
                    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
                    MMA_APV))]
@@ -548,8 +596,10 @@
   "<apv> %A0,%x2,%x3"
   [(set_attr "type" "mma")])
 
+;; Instruction: pmxvi4ger8
+
 (define_insn "mma_<vvi4i4i8>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
        (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
                    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
                    (match_operand:SI 3 "const_0_to_15_operand" "n,n")
@@ -561,9 +611,11 @@
   [(set_attr "type" "mma")
    (set_attr "prefixed" "yes")])
 
+;; Instruction: pmxvi4ger8pp
+
 (define_insn "mma_<avvi4i4i8>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-       (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+       (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
                    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
                    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
                    (match_operand:SI 4 "const_0_to_15_operand" "n,n")
@@ -575,8 +627,11 @@
   [(set_attr "type" "mma")
    (set_attr "prefixed" "yes")])
 
+;; Instructions:
+;; pmxvi16ger2 pmxvi16ger2s pmxvf16ger2 pmxvbf16ger2
+
 (define_insn "mma_<vvi4i4i2>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
        (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
                    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
                    (match_operand:SI 3 "const_0_to_15_operand" "n,n")
@@ -588,9 +643,14 @@
   [(set_attr "type" "mma")
    (set_attr "prefixed" "yes")])
 
+;; Instructions:
+;; pmxvi16ger2pp  pmxvi16ger2spp pmxvf16ger2pp  pmxvf16ger2pn
+;; pmxvf16ger2np  pmxvf16ger2nn  pmxvbf16ger2pp pmxvbf16ger2pn
+;; pmxvbf16ger2np pmxvbf16ger2nn
+
 (define_insn "mma_<avvi4i4i2>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-       (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+       (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
                    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
                    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
                    (match_operand:SI 4 "const_0_to_15_operand" "n,n")
@@ -602,8 +662,10 @@
   [(set_attr "type" "mma")
    (set_attr "prefixed" "yes")])
 
+;; Instruction: pmxvf32ger
+
 (define_insn "mma_<vvi4i4>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
        (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
                    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
                    (match_operand:SI 3 "const_0_to_15_operand" "n,n")
@@ -614,9 +676,11 @@
   [(set_attr "type" "mma")
    (set_attr "prefixed" "yes")])
 
+;; Instructions: pmxvf32gerpp pmxvf32gerpn pmxvf32gernp pmxvf32gernn
+
 (define_insn "mma_<avvi4i4>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-       (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+       (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
                    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
                    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
                    (match_operand:SI 4 "const_0_to_15_operand" "n,n")
@@ -627,8 +691,10 @@
   [(set_attr "type" "mma")
    (set_attr "prefixed" "yes")])
 
+;; Instruction: pmxvf64ger
+
 (define_insn "mma_<pvi4i2>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
        (unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa")
                    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
                    (match_operand:SI 3 "const_0_to_15_operand" "n,n")
@@ -639,9 +705,11 @@
   [(set_attr "type" "mma")
    (set_attr "prefixed" "yes")])
 
+;; Instructions: pmxvf64gerpp pmxvf64gerpn pmxvf64gernp pmxvf64gernn
+
 (define_insn "mma_<apvi4i2>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-       (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+       (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
                    (match_operand:OO 2 "vsx_register_operand" "v,?wa")
                    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
                    (match_operand:SI 4 "const_0_to_15_operand" "n,n")
@@ -652,8 +720,10 @@
   [(set_attr "type" "mma")
    (set_attr "prefixed" "yes")])
 
+;; Instruction: pmxvi8ger4
+
 (define_insn "mma_<vvi4i4i4>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
        (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
                    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
                    (match_operand:SI 3 "const_0_to_15_operand" "n,n")
@@ -665,9 +735,11 @@
   [(set_attr "type" "mma")
    (set_attr "prefixed" "yes")])
 
+;; Instructions: pmxvi8ger4pp pmxvi8ger4spp
+
 (define_insn "mma_<avvi4i4i4>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-       (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+       (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
                    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
                    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
                    (match_operand:SI 4 "const_0_to_15_operand" "n,n")
diff --git a/gcc/config/rs6000/rs6000-builtin.cc 
b/gcc/config/rs6000/rs6000-builtin.cc
index 45c88fe063b1..084eaab5b96a 100644
--- a/gcc/config/rs6000/rs6000-builtin.cc
+++ b/gcc/config/rs6000/rs6000-builtin.cc
@@ -1125,8 +1125,9 @@ rs6000_gimple_fold_mma_builtin (gimple_stmt_iterator *gsi,
        }
 
       /* If we're disassembling an accumulator into a different type, we need
-        to emit a xxmfacc instruction now, since we cannot do it later.  */
-      if (fncode == RS6000_BIF_DISASSEMBLE_ACC)
+        to emit a xxmfacc instruction now, since we cannot do it later.  If we
+        have dense math registers, we don't need to do this.  */
+      if (fncode == RS6000_BIF_DISASSEMBLE_ACC && !TARGET_DENSE_MATH)
        {
          new_decl = rs6000_builtin_decls[RS6000_BIF_XXMFACC_INTERNAL];
          new_call = gimple_build_call (new_decl, 1, src);
diff --git a/gcc/config/rs6000/rs6000-cpus.def 
b/gcc/config/rs6000/rs6000-cpus.def
index dc67e287672e..3e51848481f4 100644
--- a/gcc/config/rs6000/rs6000-cpus.def
+++ b/gcc/config/rs6000/rs6000-cpus.def
@@ -91,6 +91,7 @@
    will be fixed in potential future machines.  */
 #define FUTURE_MASKS_SERVER    (POWER11_MASKS_SERVER                   \
                                 | OPTION_MASK_BLOCK_OPS_VECTOR_PAIR    \
+                                | OPTION_MASK_DENSE_MATH               \
                                 | OPTION_MASK_FUTURE)
 
 /* Flags that need to be turned off if -mno-vsx.  */
@@ -124,6 +125,7 @@
                                 | OPTION_MASK_BLOCK_OPS_VECTOR_PAIR    \
                                 | OPTION_MASK_CMPB                     \
                                 | OPTION_MASK_CRYPTO                   \
+                                | OPTION_MASK_DENSE_MATH               \
                                 | OPTION_MASK_DFP                      \
                                 | OPTION_MASK_DLMZB                    \
                                 | OPTION_MASK_EFFICIENT_UNALIGNED_VSX  \
diff --git a/gcc/testsuite/gcc.target/powerpc/mma-dm-1.c 
b/gcc/testsuite/gcc.target/powerpc/mma-dm-1.c
new file mode 100644
index 000000000000..deea87a0aa31
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/mma-dm-1.c
@@ -0,0 +1,67 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_dense_math_ok } */
+/* { dg-options "-mdejagnu-cpu=future -O2" } */
+
+/* Test basic dense math support for MMA.  */
+
+void
+move_simple (__vector_quad *a, __vector_quad *b)
+{
+  /* 2 lxvp, 2 stxvp.   */
+  __vector_quad c = *a;
+  *b = c;
+}
+
+void
+move_constraint_d (__vector_quad *a, __vector_quad *b)
+{
+  /* 2 lxvp, 2 stxvp.   */
+  __vector_quad c = *a;
+  __asm__ (" # %x0 (d constraint)" : "+d" (c));
+  *b = c;
+}
+
+void
+move_constraint_wD (__vector_quad *a, __vector_quad *b)
+{
+  /* 2 lxvp, dmxxinstdmr512, dmxxextfdmr512, 2 stxvp.   */
+  __vector_quad c = *a;
+  __asm__ (" # %A0 (wD constraint)" : "+wD" (c));
+  *b = c;
+}
+
+void
+clear_simple (__vector_quad *a)
+{
+  /* dmsetdmrz, dmxxextfdmr512, 2 stxvp.  */
+  __builtin_mma_xxsetaccz (a);
+}
+
+void
+clear_constraint_d (__vector_quad *a)
+{
+  __vector_quad z;
+
+  /* dmsetdmrz, dmxxextfdmr512, 2 stxvp.  */
+  __builtin_mma_xxsetaccz (&z);
+  __asm__ (" # %x0 (d constraint)" : "+d" (z));
+  *a = z;
+}
+
+void
+clear_constraint_wD (__vector_quad *a)
+{
+  __vector_quad z;
+
+  /* dmsetdmrz, dmxxextfdmr512, 2 stxvp.  */
+  __builtin_mma_xxsetaccz (&z);
+  __asm__ (" # %A0 (d constraint)" : "+wD" (z));
+  *a = z;
+}
+
+/* { dg-final { scan-assembler-times {\mdmsetdmrz\M}       3 } } */
+/* { dg-final { scan-assembler-times {\mdmxxextfdmr512\M}  4 } } */
+/* { dg-final { scan-assembler-times {\mdmxxinstdmr512\M}  1 } } */
+/* { dg-final { scan-assembler-not   {\mxxmfacc\M}           } } */
+/* { dg-final { scan-assembler-not   {\mxxmtacc\M}           } } */
+/* { dg-final { scan-assembler-not   {\mxxsetaccz\M}         } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/mma-dm-2.c 
b/gcc/testsuite/gcc.target/powerpc/mma-dm-2.c
new file mode 100644
index 000000000000..091b71c94f71
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/mma-dm-2.c
@@ -0,0 +1,67 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_dense_math_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -mno-dense-math -O2" } */
+
+/* Test basic dense math support for MMA.  */
+
+void
+move_simple (__vector_quad *a, __vector_quad *b)
+{
+  /* 2 lxvp, xxmtacc, xxftacc 2 stxvp.   */
+  __vector_quad c = *a;
+  *b = c;
+}
+
+void
+move_constraint_d (__vector_quad *a, __vector_quad *b)
+{
+  /* 2 lxvp, xxmtacc, xxftacc, 2 stxvp.   */
+  __vector_quad c = *a;
+  __asm__ (" # %x0 (d constraint)" : "+d" (c));
+  *b = c;
+}
+
+void
+move_constraint_wD (__vector_quad *a, __vector_quad *b)
+{
+  /* 2 lxvp, xxmtacc, xxftacc, 2 stxvp.   */
+  __vector_quad c = *a;
+  __asm__ (" # %A0 (wD constraint)" : "+wD" (c));
+  *b = c;
+}
+
+void
+clear_simple (__vector_quad *a)
+{
+  /* xxsetaccz, xxmfacc, 2 stxvp.  */
+  __builtin_mma_xxsetaccz (a);
+}
+
+void
+clear_constraint_d (__vector_quad *a)
+{
+  __vector_quad z;
+
+  /* xxsetaccz, xxmfacc, 2 stxvp.  */
+  __builtin_mma_xxsetaccz (&z);
+  __asm__ (" # %x0 (d constraint)" : "+d" (z));
+  *a = z;
+}
+
+void
+clear_constraint_wD (__vector_quad *a)
+{
+  __vector_quad z;
+
+  /* xxsetaccz, xxmfacc, 2 stxvp.  */
+  __builtin_mma_xxsetaccz (&z);
+  __asm__ (" # %A0 (d constraint)" : "+wD" (z));
+  *a = z;
+}
+
+/* { dg-final { scan-assembler-not   {\mdmsetdmrz\M}        } } */
+/* { dg-final { scan-assembler-not   {\mdmxxextfdmr512\M}   } } */
+/* { dg-final { scan-assembler-not   {\mdmxxinstdmr512\M}   } } */
+/* { dg-final { scan-assembler-times {\mxxmfacc\M}        6 } } */
+/* { dg-final { scan-assembler-times {\mxxmtacc\M}        3 } } */
+/* { dg-final { scan-assembler-times {\mxxsetaccz\M}      3 } } */
diff --git a/gcc/testsuite/lib/target-supports.exp 
b/gcc/testsuite/lib/target-supports.exp
index fade04379342..59c7a3f22b49 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -7990,6 +7990,25 @@ proc check_effective_target_power10_ok { } {
     }
 }
 
+# Return 1 if this is a PowerPC target supporting -mcpu=future which enables
+# the dense math operations.
+proc check_effective_target_powerpc_dense_math_ok { } {
+    if { ([istarget powerpc*-*-*]) } {
+       return [check_no_compiler_messages powerpc_dense_math_ok object {
+           __vector_quad vq;
+           int main (void) {
+               /* Make sure we have dense math support.  */
+                 __vector_quad dmr;
+                 __asm__ ("dmsetaccz %A0" : "=wD" (dmr));
+                 vq = dmr;
+               return 0;
+           }
+       } "-mcpu=future"]
+    } else {
+       return 0;
+    }
+}
+
 # Return 1 if this is a PowerPC target supporting -mfloat128 via either
 # software emulation on power7/power8 systems or hardware support on power9.

[gcc(refs/vendors/ibm/heads/mmaplus2)] Make the MMA instructions support -mdense-math.

Reply via email to