See the previous post for a longer explanation of the motavations for this
patch:

    https://gcc.gnu.org/pipermail/gcc-patches/2024-October/664694.html

This patch adds a new include file (vector-pair.h) that implements a series of
functions that allows people implementing high performance libraries to
optimize their code to use the vector pair load/store instructions on power10
computers to enhance the memory bandwidth.

I have tested this on both big endian and little endian servers.  Can I check
this into the GCC trunk?

2024-10-07  Michael Meissner  <meiss...@linux.ibm.com>

gcc/

        * config.gcc (powerpc*-*-*): Add vector-pair.h to extra headers.
        * config/rs6000/vector-pair.h: New file.
        * doc/extend.texi (PowerPC Vector Pair Support): Document the vector
        pair support functions.

gcc/testsuite/

        * gcc.target/powerpc/vpair-1.c: New test or include file.
        * gcc.target/powerpc/vpair-2.c: Likewise.
        * gcc.target/powerpc/vpair-3-not-p10.c: Likewise.
        * gcc.target/powerpc/vpair-3-p10.c: Likewise.
        * gcc.target/powerpc/vpair-3.h: Likewise.
        * gcc.target/powerpc/vpair-4-not-p10.c: Likewise.
        * gcc.target/powerpc/vpair-4-p10.c: Likewise.
        * gcc.target/powerpc/vpair-4.h: Likewise.
---
 gcc/config.gcc                                |   2 +-
 gcc/config/rs6000/rs6000-c.cc                 |   8 +-
 gcc/config/rs6000/vector-pair.h               | 519 ++++++++++++++++++
 gcc/doc/extend.texi                           |  98 ++++
 gcc/testsuite/gcc.target/powerpc/vpair-1.c    | 141 +++++
 gcc/testsuite/gcc.target/powerpc/vpair-2.c    | 141 +++++
 .../gcc.target/powerpc/vpair-3-not-p10.c      |  15 +
 .../gcc.target/powerpc/vpair-3-p10.c          |  14 +
 gcc/testsuite/gcc.target/powerpc/vpair-3.h    | 435 +++++++++++++++
 .../gcc.target/powerpc/vpair-4-not-p10.c      |  15 +
 .../gcc.target/powerpc/vpair-4-p10.c          |  14 +
 gcc/testsuite/gcc.target/powerpc/vpair-4.h    | 435 +++++++++++++++
 12 files changed, 1834 insertions(+), 3 deletions(-)
 create mode 100644 gcc/config/rs6000/vector-pair.h
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vpair-1.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vpair-2.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vpair-3-not-p10.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vpair-3-p10.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vpair-3.h
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vpair-4-not-p10.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vpair-4-p10.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vpair-4.h

diff --git a/gcc/config.gcc b/gcc/config.gcc
index 0b794e977f6..3627bed8b86 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -537,7 +537,7 @@ powerpc*-*-*)
        extra_headers="${extra_headers} pmmintrin.h tmmintrin.h smmintrin.h"
        extra_headers="${extra_headers} nmmintrin.h immintrin.h x86gprintrin.h"
        extra_headers="${extra_headers} ppu_intrinsics.h spu2vmx.h vec_types.h 
si2vmx.h"
-       extra_headers="${extra_headers} amo.h"
+       extra_headers="${extra_headers} amo.h vector-pair.h"
        case x$with_cpu in
            
xpowerpc64|xdefault64|x6[23]0|x970|xG5|xpower[3456789]|xpower1[01]|xpower6x|xrs64a|xcell|xa2|xe500mc64|xe5500|xe6500|xfuture)
                cpu_is_64bit=yes
diff --git a/gcc/config/rs6000/rs6000-c.cc b/gcc/config/rs6000/rs6000-c.cc
index 82826f96a8e..77bee8fc878 100644
--- a/gcc/config/rs6000/rs6000-c.cc
+++ b/gcc/config/rs6000/rs6000-c.cc
@@ -590,9 +590,13 @@ rs6000_target_modify_macros (bool define_p, HOST_WIDE_INT 
flags,
   if (rs6000_cpu == PROCESSOR_CELL)
     rs6000_define_or_undefine_macro (define_p, "__PPU__");
 
-  /* Tell the user if we support the MMA instructions.  */
+  /* Tell the user if we support the MMA instructions.  Also tell vector-pair.h
+     that we have the vector pair built-in function support.  */
   if ((flags & OPTION_MASK_MMA) != 0)
-    rs6000_define_or_undefine_macro (define_p, "__MMA__");
+    {
+      rs6000_define_or_undefine_macro (define_p, "__MMA__");
+      rs6000_define_or_undefine_macro (define_p, "__VPAIR__");
+    }
   /* Whether pc-relative code is being generated.  */
   if ((flags & OPTION_MASK_PCREL) != 0)
     rs6000_define_or_undefine_macro (define_p, "__PCREL__");
diff --git a/gcc/config/rs6000/vector-pair.h b/gcc/config/rs6000/vector-pair.h
new file mode 100644
index 00000000000..ceb28c4e974
--- /dev/null
+++ b/gcc/config/rs6000/vector-pair.h
@@ -0,0 +1,519 @@
+/* PowerPC vector pair include file.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   Contributed by Aldy Hernandez (al...@redhat.com).
+   Rewritten by Paolo Bonzini (bonz...@gnu.org).
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Provide support for vector pairs, even on systems that do not have native
+   support for loading and storing pairs of vectors.  */
+
+#ifndef _VECTOR_PAIR_H
+#define _VECTOR_PAIR_H 1
+
+/* Union of the various vector pair types.  */
+union __vpair_union {
+
+#ifdef __MMA__
+  __vector_pair                __vpair;
+#endif
+
+  vector double                __vp_f64[2];
+  vector float         __vp_f32[2];
+  vector unsigned char __vp_uc[2];
+};
+
+typedef union __vpair_union    vector_pair_f64_t;
+typedef union __vpair_union    vector_pair_f32_t;
+
+#if !__VPAIR_BUILTIN__ && !__VPAIR_ASM__ && !__VPAIR_NOP10__
+#if __MMA__
+#define __VPAIR_ASM__          1
+
+#else
+#define __VPAIR_NOP10__                1
+#endif
+#endif
+
+/* Macros to simplify creation of the various operations.
+ *
+ * The __VPAIR_FP_{UNARY,BINARY,FMA} macros are the base macros, and takes:
+ *     R:         The  argument for the output vector pair
+ *     A, B, C:   1-3 arguments for the inputs
+ *     OPCODE:    The assembler opcode for __asm__ on power10
+ *     VEC:       Either __vp_f64 or __vp_f32 for the union field
+ *     VEC_FUNC:  128-bit vector function for use on power8/power9
+ *
+ * The __VPAIR_FP_splat macro takes:
+ *     R:         The  argument for the output vector pair
+ *     X:         The scalar that is to be splat-ed to the vector pair
+ *     VEC:       Either __vp_f64 or __vp_f32 for the union field
+ *
+ * The __VPAIR_F32_<...> and __VPAIR_F64_<...> macros call the above macros
+ * with the appropriate structure field to use.
+ */
+
+#undef  __VPAIR_FP_SPLAT
+#undef  __VPAIR_FP_UNARY
+#undef  __VPAIR_FP_BINARY
+#undef  __VPAIR_FP_FMA
+
+#undef  __VPAIR_F64_UNARY
+#undef  __VPAIR_F64_BINARY
+#undef  __VPAIR_F64_FMA
+
+#undef  __VPAIR_F32_UNARY
+#undef  __VPAIR_F32_BINARY
+#undef  __VPAIR_F32_FMA
+
+/* Operations using a vector pair and __asm__operations.  */
+#if __MMA__ && !__VPAIR_NOP10__
+
+/* When using __asm__, we need to access the second register.  Due to the way
+   VSX registers were formed by combining the traditional floating point
+   registers and Altivec registers, we can't use the output modifier %L<n> to
+   refer to the second register if the VSX register was a traditional Altivec
+   register.  If the value is in VSX registers 34 & 35, %x0 would give 34, but
+   %L0 would give 1, since 'Altivec' registers start at 0.
+
+   If we are using GAS under Linux, we can use %x0+1 to access the second
+   register and use the full VSX register set.
+
+   If this include file is used on non-Linux systems, or with a non-GCC
+   compiler, limit the registers used to the traditional FPR registers so that
+   we can use %L0.  */
+
+#if __VPAIR__USE_FPR__ || !__GNUC__ || (!__linux__ && !__ELF__)
+
+/* Use %0 and %L0 on traditional FPR registers.  */
+#define __VPAIR_FP_SPLAT(R, X, VEC)                                    \
+  __asm__ ("xxlor %L0,%0,%0"                                           \
+           : "=d" ((R)->__vpair)                                       \
+           : "0" (__builtin_vec_splats ((X))))
+
+#define __VPAIR_FP_UNARY(R, A, OPCODE, VEC, VEC_FUNC)                  \
+  __asm__ (OPCODE " %0,%1\n\t" OPCODE " %L0,%L1"                       \
+           : "=d" ((R)->__vpair)                                       \
+           : "d" ((A)->__vpair))
+
+#define __VPAIR_FP_BINARY(R, A, B, OPCODE, VEC, VEC_FUNC)              \
+  __asm__ (OPCODE " %0,%1,$1\n\t" OPCODE " %L0,%L1,%L2"                        
\
+           : "=d" ((R)->__vpair)                                       \
+           : "d" ((A)->__vpair), "d" ((B)->__vpair))
+
+/* Note the 'a' form of the fma instructions must be used.  */
+#define __VPAIR_FP_FMA(R, A, B, C, OPCODE, VEC, VEC_FUNC)              \
+  __asm__ (OPCODE " %0,%1,%2\n\t" OPCODE " %L0,%L1,%L2"                        
\
+           : "=d" ((R)->__vpair)                                       \
+           : "d" ((A)->__vpair), "d" ((B)->__vpair), "0" ((C)->__vpair))
+
+#else
+
+/* Use %x0 and %x0+1 on VSX reigsters.  */
+#define __VPAIR_FP_SPLAT(R, X, VEC)                                    \
+  __asm__ ("xxlor %x0+1,%x0,%x0"                                       \
+           : "=wa" ((R)->__vpair)                                      \
+           : "0" (__builtin_vec_splats ((X))))
+
+#define __VPAIR_FP_UNARY(R, A, OPCODE, VEC, VEC_FUNC)                  \
+  __asm__ (OPCODE " %x0,%x1\n\t" OPCODE " %x0+1,%x1+1"                 \
+           : "=wa" ((R)->__vpair)                                      \
+           : "wa" ((A)->__vpair))
+
+#define __VPAIR_FP_BINARY(R, A, B, OPCODE, VEC, VEC_FUNC)              \
+  __asm__ (OPCODE " %x0,%x1,%x2\n\t" OPCODE " %x0+1,%x1+1,%x2+1"       \
+           : "=wa" ((R)->__vpair)                                      \
+           : "wa" ((A)->__vpair), "wa" ((B)->__vpair))
+
+/* Note the 'a' form of the fma instructions must be used.  */
+#define __VPAIR_FP_FMA(R, A, B, C, OPCODE, VEC, VEC_FUNC)              \
+  __asm__ (OPCODE " %x0,%x1,%x2\n\t" OPCODE " %x0+1,%x1+1,%x2+1"       \
+           : "=wa" ((R)->__vpair)                                      \
+           : "wa" ((A)->__vpair), "wa" ((B)->__vpair), "0" ((C)->__vpair))
+#endif /* Select whether to use %0/%L0 or %x0/%x0+1.  */
+
+#else  /* vpair support on power8/power9.  */
+
+/* Pair of vector operations using a built-in function.  */
+
+#define __VPAIR_FP_SPLAT(R, X, VEC)                                    \
+  (R)->VEC[0] = (R)->VEC[1] = __builtin_vec_splats ((X))
+
+#define __VPAIR_FP_UNARY(R, A, OPCODE, VEC, VEC_FUNC)                  \
+  do                                                                   \
+    {                                                                  \
+      (R)->VEC[0] = VEC_FUNC ((A)->VEC[0]);                            \
+      (R)->VEC[1] = VEC_FUNC ((A)->VEC[1]);                            \
+    }                                                                  \
+  while (0)
+
+#define __VPAIR_FP_BINARY(R, A, B, OPCODE, VEC, VEC_FUNC)              \
+  do                                                                   \
+    {                                                                  \
+      (R)->VEC[0] = VEC_FUNC ((A)->VEC[0], (B)->VEC[0]);               \
+      (R)->VEC[1] = VEC_FUNC ((A)->VEC[1], (B)->VEC[1]);               \
+    }                                                                  \
+  while (0)
+
+#define __VPAIR_FP_FMA(R, A, B, C, OPCODE, VEC, VEC_FUNC)              \
+  do                                                                   \
+    {                                                                  \
+      (R)->VEC[0] = VEC_FUNC ((A)->VEC[0], (B)->VEC[0], (C)->VEC[0]);  \
+      (R)->VEC[1] = VEC_FUNC ((A)->VEC[1], (B)->VEC[1], (C)->VEC[1]);  \
+    }                                                                  \
+  while (0)
+
+#endif
+
+/* 64-bit version of the macros.  */
+#define __VPAIR_F64_UNARY(R, A, OPCODE, VEC_FUNC)                      \
+  __VPAIR_FP_UNARY(R, A, OPCODE, __vp_f64, VEC_FUNC)
+
+#define __VPAIR_F64_BINARY(R, A, B, OPCODE, VEC_FUNC)                  \
+  __VPAIR_FP_BINARY(R, A, B, OPCODE, __vp_f64, VEC_FUNC)
+
+#define __VPAIR_F64_FMA(R, A, B, C, OPCODE, VEC_FUNC)                  \
+  __VPAIR_FP_FMA(R, A, B, C, OPCODE, __vp_f64, VEC_FUNC)
+
+
+/* 32-bit version of the macros.  */
+#define __VPAIR_F32_UNARY(R, A, OPCODE, VEC_FUNC)                      \
+  __VPAIR_FP_UNARY(R, A, OPCODE, __vp_f32, VEC_FUNC)
+
+#define __VPAIR_F32_BINARY(R, A, B, OPCODE, VEC_FUNC)                  \
+  __VPAIR_FP_BINARY(R, A, B, OPCODE, __vp_f32, VEC_FUNC)
+
+#define __VPAIR_F32_FMA(R, A, B, C, OPCODE, VEC_FUNC)                  \
+  __VPAIR_FP_FMA(R, A, B, C, OPCODE, __vp_f32, VEC_FUNC)
+
+
+/* Splat functions.  */
+
+/* 64-bit splat to vector pair.  */
+
+static inline void
+vpair_f64_splat (vector_pair_f64_t *__r, double __x)
+{
+  __VPAIR_FP_SPLAT (__r, __x, __vp_f64);
+}
+
+/* 32-bit splat to vector pair.  */
+
+static inline void
+vpair_f32_splat (vector_pair_f32_t *__r, float __x)
+{
+  __VPAIR_FP_SPLAT (__r, __x, __vp_f32);
+}
+
+
+/* 64-bit unary functions.  */
+
+static inline void
+vpair_f64_abs (vector_pair_f64_t       *__r,
+              const vector_pair_f64_t *__a)
+{
+  __VPAIR_F64_UNARY (__r, __a,
+                    "xvabsdp",
+                    __builtin_vec_abs);
+}
+
+static inline void
+vpair_f64_nabs (vector_pair_f64_t       *__r,
+               const vector_pair_f64_t *__a)
+{
+  __VPAIR_F64_UNARY (__r, __a,
+                    "xvnabsdp",
+                    __builtin_vec_nabs);
+}
+
+static inline void
+vpair_f64_neg (vector_pair_f64_t       *__r,
+              const vector_pair_f64_t *__a)
+{
+  __VPAIR_F64_UNARY (__r, __a,
+                    "xvnegdp",
+                    __builtin_vec_neg);
+}
+
+static inline void
+vpair_f64_sqrt (vector_pair_f64_t       *__r,
+               const vector_pair_f64_t *__a)
+{
+  __VPAIR_F64_UNARY (__r, __a,
+                    "xvsqrtdp",
+                    __builtin_vec_sqrt);
+}
+
+/* 32-bit unary functions.  */
+
+static inline void
+vpair_f32_abs (vector_pair_f32_t       *__r,
+              const vector_pair_f32_t *__a)
+{
+  __VPAIR_F32_UNARY (__r, __a,
+                    "xvabssp",
+                    __builtin_vec_abs);
+}
+
+static inline void
+vpair_f32_nabs (vector_pair_f32_t       *__r,
+               const vector_pair_f32_t *__a)
+{
+  __VPAIR_F32_UNARY (__r, __a,
+                    "xvnabssp",
+                    __builtin_vec_nabs);
+}
+
+static inline void
+vpair_f32_neg (vector_pair_f32_t       *__r,
+              const vector_pair_f32_t *__a)
+{
+  __VPAIR_F32_UNARY (__r, __a,
+                    "xvnegsp",
+                    __builtin_vec_neg);
+}
+
+static inline void
+vpair_f32_sqrt (vector_pair_f32_t       *__r,
+               const vector_pair_f32_t *__a)
+{
+  __VPAIR_F32_UNARY (__r, __a,
+                    "xvsqrtsp",
+                    __builtin_vec_sqrt);
+}
+
+
+/* 64-bit binary functions.  */
+
+static inline void
+vpair_f64_add (vector_pair_f64_t       *__r,
+              const vector_pair_f64_t *__a,
+              const vector_pair_f64_t *__b)
+{
+  __VPAIR_F64_BINARY (__r, __a, __b,
+                     "xvadddp",
+                     __builtin_vec_add);
+}
+
+static inline void
+vpair_f64_div (vector_pair_f64_t       *__r,
+              const vector_pair_f64_t *__a,
+              const vector_pair_f64_t *__b)
+{
+  __VPAIR_F64_BINARY (__r, __a, __b,
+                     "xvdivdp",
+                     __builtin_vec_div);
+}
+
+static inline void
+vpair_f64_max (vector_pair_f64_t       *__r,
+              const vector_pair_f64_t *__a,
+              const vector_pair_f64_t *__b)
+{
+  __VPAIR_F64_BINARY (__r, __a, __b,
+                     "xvmaxdp",
+                     __builtin_vec_max);
+}
+
+static inline void
+vpair_f64_min (vector_pair_f64_t       *__r,
+              const vector_pair_f64_t *__a,
+              const vector_pair_f64_t *__b)
+{
+  __VPAIR_F64_BINARY (__r, __a, __b,
+                     "xvmindp",
+                     __builtin_vec_min);
+}
+
+static inline void
+vpair_f64_mul (vector_pair_f64_t       *__r,
+              const vector_pair_f64_t *__a,
+              const vector_pair_f64_t *__b)
+{
+  __VPAIR_F64_BINARY (__r, __a, __b,
+                     "xvmuldp",
+                     __builtin_vec_mul);
+}
+
+static inline void
+vpair_f64_sub (vector_pair_f64_t       *__r,
+              const vector_pair_f64_t *__a,
+              const vector_pair_f64_t *__b)
+{
+  __VPAIR_F64_BINARY (__r, __a, __b,
+                     "xvsubdp",
+                     __builtin_vec_sub);
+}
+
+/* 32-bit binary functions.  */
+
+static inline void
+vpair_f32_add (vector_pair_f32_t       *__r,
+              const vector_pair_f32_t *__a,
+              const vector_pair_f32_t *__b)
+{
+  __VPAIR_F32_BINARY (__r, __a, __b,
+                     "xvaddsp",
+                     __builtin_vec_add);
+}
+
+static inline void
+vpair_f32_div (vector_pair_f32_t       *__r,
+              const vector_pair_f32_t *__a,
+              const vector_pair_f32_t *__b)
+{
+  __VPAIR_F32_BINARY (__r, __a, __b,
+                     "xvdivsp",
+                     __builtin_vec_div);
+}
+
+static inline void
+vpair_f32_max (vector_pair_f32_t       *__r,
+              const vector_pair_f32_t *__a,
+              const vector_pair_f32_t *__b)
+{
+  __VPAIR_F32_BINARY (__r, __a, __b,
+                     "xvmaxsp",
+                     __builtin_vec_max);
+}
+
+static inline void
+vpair_f32_min (vector_pair_f32_t       *__r,
+              const vector_pair_f32_t *__a,
+              const vector_pair_f32_t *__b)
+{
+  __VPAIR_F32_BINARY (__r, __a, __b,
+                     "xvminsp",
+                     __builtin_vec_min);
+}
+
+static inline void
+vpair_f32_mul (vector_pair_f32_t       *__r,
+              const vector_pair_f32_t *__a,
+              const vector_pair_f32_t *__b)
+{
+  __VPAIR_F32_BINARY (__r, __a, __b,
+                     "xvmulsp",
+                     __builtin_vec_mul);
+}
+
+static inline void
+vpair_f32_sub (vector_pair_f32_t       *__r,
+              const vector_pair_f32_t *__a,
+              const vector_pair_f32_t *__b)
+{
+  __VPAIR_F32_BINARY (__r, __a, __b,
+                     "xvsubsp",
+                     __builtin_vec_sub);
+}
+
+/* 64-bit fma operations.  */
+
+static inline void
+vpair_f64_fma (vector_pair_f64_t       *__r,
+              const vector_pair_f64_t *__a,
+              const vector_pair_f64_t *__b,
+              const vector_pair_f64_t *__c)
+{
+  __VPAIR_F64_FMA (__r, __a, __b, __c,
+                  "xvmaddadp",
+                  __builtin_vsx_xvmadddp);
+}
+
+static inline void
+vpair_f64_fms (vector_pair_f64_t       *__r,
+              const vector_pair_f64_t *__a,
+              const vector_pair_f64_t *__b,
+              const vector_pair_f64_t *__c)
+{
+  __VPAIR_F64_FMA (__r, __a, __b, __c,
+                  "xvmsubadp",
+                  __builtin_vsx_xvmsubdp);
+}
+
+static inline void
+vpair_f64_nfma (vector_pair_f64_t       *__r,
+               const vector_pair_f64_t *__a,
+               const vector_pair_f64_t *__b,
+               const vector_pair_f64_t *__c)
+{
+  __VPAIR_F64_FMA (__r, __a, __b, __c,
+                  "xvnmaddadp",
+                  __builtin_vsx_xvnmadddp);
+}
+
+static inline void
+vpair_f64_nfms (vector_pair_f64_t       *__r,
+               const vector_pair_f64_t *__a,
+               const vector_pair_f64_t *__b,
+               const vector_pair_f64_t *__c)
+{
+  __VPAIR_F64_FMA (__r, __a, __b, __c,
+                  "xvnmsubadp",
+                  __builtin_vsx_xvnmsubdp);
+}
+/* 32-bit fma operations.  */
+
+static inline void
+vpair_f32_fma (vector_pair_f32_t       *__r,
+              const vector_pair_f32_t *__a,
+              const vector_pair_f32_t *__b,
+              const vector_pair_f32_t *__c)
+{
+  __VPAIR_F32_FMA (__r, __a, __b, __c,
+                  "xvmaddasp",
+                  __builtin_vsx_xvmaddsp);
+}
+
+static inline void
+vpair_f32_fms (vector_pair_f32_t       *__r,
+              const vector_pair_f32_t *__a,
+              const vector_pair_f32_t *__b,
+              const vector_pair_f32_t *__c)
+{
+  __VPAIR_F32_FMA (__r, __a, __b, __c,
+                  "xvmsubasp",
+                  __builtin_vsx_xvmsubsp);
+}
+
+static inline void
+vpair_f32_nfma (vector_pair_f32_t       *__r,
+               const vector_pair_f32_t *__a,
+               const vector_pair_f32_t *__b,
+               const vector_pair_f32_t *__c)
+{
+  __VPAIR_F32_FMA (__r, __a, __b, __c,
+                  "xvnmaddasp",
+                  __builtin_vsx_xvnmaddsp);
+}
+
+static inline void
+vpair_f32_nfms (vector_pair_f32_t       *__r,
+               const vector_pair_f32_t *__a,
+               const vector_pair_f32_t *__b,
+               const vector_pair_f32_t *__c)
+{
+  __VPAIR_F32_FMA (__r, __a, __b, __c,
+                  "xvnmsubasp",
+                  __builtin_vsx_xvnmsubsp);
+}
+#endif /* _VECTOR_PAIR_H.  */
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index f46c3df3303..4c9e8c2e313 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -16157,6 +16157,7 @@ instructions, but allow the compiler to schedule those 
calls.
 * PowerPC Hardware Transactional Memory Built-in Functions::
 * PowerPC Atomic Memory Operation Functions::
 * PowerPC Matrix-Multiply Assist Built-in Functions::
+* PowerPC Vector Pair Support::
 * PRU Built-in Functions::
 * RISC-V Built-in Functions::
 * RISC-V Vector Intrinsics::
@@ -24673,6 +24674,103 @@ __vector_pair __builtin_vsx_lxvp (size_t, 
__vector_pair *);
 void __builtin_vsx_stxvp (__vector_pair, size_t, __vector_pair *);
 @end smallexample
 
+@node PowerPC Vector Pair Support
+@subsection PowerPC Vector Pair Support
+ISA 3.1 (power10) added instructions to load and store pairs of
+vectors with a single instruction.
+
+GCC now provides an include file (@file{vector-pair.h}) on PowerPC
+systems that allows users to write code that can write 32-bit and
+64-bit floating point code that processes data in 256-bit chunks
+rather than 128-bit chunks.
+
+If the code is compiled on an ISA 3.1 system with MMA enabled, the
+vector pair functions will use the @code{__vector_pair} type to have
+values in adjacent vectors and do the operation as a pair of
+operations.
+
+If the code is compiled on a VSX system, but not one with MMA enabled, the 
vector
+pair functions will use 2 separate vectors to do the operation.
+
+Two types are provided: @code{vector_pair_f64_t} is for vector pairs
+that will operate on units of 4 64-bit floating point values, and
+@code{vector_pair_f32_t} for operating on units of 8 32-bit floating
+point values.
+
+@node PowerPC Vector Pair Support for 64-bit floating point
+@subsection PowerPC Vector Pair Support for 64-bit floating point.
+
+The following functions are provided for operating on vector pairs
+that consist of 4 64-bit floating point values:
+
+@smallexample
+void vpair_f64_splat (vector_pair_f64_t *, double);
+
+void vpair_f64_abs (vector_pair_f64_t *, vector_pair_f64_t *);
+void vpair_f64_nabs (vector_pair_f64_t *, vector_pair_f64_t *);
+void vpair_f64_neg (vector_pair_f64_t *, vector_pair_f64_t *);
+void vpair_f64_sqrt (vector_pair_f64_t *, vector_pair_f64_t *);
+
+void vpair_f64_add (vector_pair_f64_t *, vector_pair_f64_t *,
+                    vector_pair_f64_t *);
+void vpair_f64_div (vector_pair_f64_t *, vector_pair_f64_t *,
+                    vector_pair_f64_t *);
+void vpair_f64_max (vector_pair_f64_t *, vector_pair_f64_t *,
+                    vector_pair_f64_t *);
+void vpair_f64_min (vector_pair_f64_t *, vector_pair_f64_t *,
+                    vector_pair_f64_t *);
+void vpair_f64_mul (vector_pair_f64_t *, vector_pair_f64_t *,
+                    vector_pair_f64_t *);
+void vpair_f64_sub (vector_pair_f64_t *, vector_pair_f64_t *,
+                    vector_pair_f64_t *);
+
+void vpair_f64_fma (vector_pair_f64_t *, vector_pair_f64_t *,
+                    vector_pair_f64_t *, vector_pair_f64_t *);
+void vpair_f64_fms (vector_pair_f64_t *, vector_pair_f64_t *,
+                    vector_pair_f64_t *, vector_pair_f64_t *);
+void vpair_f64_nfma (vector_pair_f64_t *, vector_pair_f64_t *,
+                     vector_pair_f64_t *, vector_pair_f64_t *);
+void vpair_f64_nfms (vector_pair_f64_t *, vector_pair_f64_t *,
+                     vector_pair_f64_t *, vector_pair_f64_t *);
+@end smallexample
+
+@node PowerPC Vector Pair Support for 32-bit floating point
+@subsection PowerPC Vector Pair Support for 32-bit floating point.
+
+The following functions are provided for operating on vector pairs
+that consist of 8 32-bit floating point values:
+
+@smallexample
+void vpair_f32_splat (vector_pair_f32_t *, float);
+
+void vpair_f32_abs (vector_pair_f32_t *, vector_pair_f32_t *);
+void vpair_f32_nabs (vector_pair_f32_t *, vector_pair_f32_t *);
+void vpair_f32_neg (vector_pair_f32_t *, vector_pair_f32_t *);
+void vpair_f32_sqrt (vector_pair_f32_t *, vector_pair_f32_t *);
+
+void vpair_f32_add (vector_pair_f32_t *, vector_pair_f32_t *,
+                    vector_pair_f32_t *);
+void vpair_f32_div (vector_pair_f32_t *, vector_pair_f32_t *,
+                    vector_pair_f32_t *);
+void vpair_f32_max (vector_pair_f32_t *, vector_pair_f32_t *,
+                    vector_pair_f32_t *);
+void vpair_f32_min (vector_pair_f32_t *, vector_pair_f32_t *,
+                    vector_pair_f32_t *);
+void vpair_f32_mul (vector_pair_f32_t *, vector_pair_f32_t *,
+                    vector_pair_f32_t *);
+void vpair_f32_sub (vector_pair_f32_t *, vector_pair_f32_t *,
+                    vector_pair_f32_t *);
+
+void vpair_f32_fma (vector_pair_f32_t *, vector_pair_f32_t *,
+                    vector_pair_f32_t *, vector_pair_f32_t *);
+void vpair_f32_fms (vector_pair_f32_t *, vector_pair_f32_t *,
+                    vector_pair_f32_t *, vector_pair_f32_t *);
+void vpair_f32_nfma (vector_pair_f32_t *, vector_pair_f32_t *,
+                     vector_pair_f32_t *, vector_pair_f32_t *);
+void vpair_f32_nfms (vector_pair_f32_t *, vector_pair_f32_t *,
+                     vector_pair_f32_t *, vector_pair_f32_t *);
+@end smallexample
+
 @node PRU Built-in Functions
 @subsection PRU Built-in Functions
 
diff --git a/gcc/testsuite/gcc.target/powerpc/vpair-1.c 
b/gcc/testsuite/gcc.target/powerpc/vpair-1.c
new file mode 100644
index 00000000000..55772cc44e3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vpair-1.c
@@ -0,0 +1,141 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+/* Test whether the vector builtin code generates the expected instructions for
+   vector pairs with 4 double elements.  */
+
+#include <vector-pair.h>
+
+void
+test_add (vector_pair_f64_t *dest,
+         vector_pair_f64_t *x,
+         vector_pair_f64_t *y)
+{
+  /* 2 lxvp, 2 xvadddp, 1 stxvp.  */
+  vpair_f64_add (dest, x, y);
+}
+
+void
+test_sub (vector_pair_f64_t *dest,
+         vector_pair_f64_t *x,
+         vector_pair_f64_t *y)
+{
+  /* 2 lxvp, 2 xvsubdp, 1 stxvp.  */
+  vpair_f64_sub (dest, x, y);
+}
+
+void
+test_multiply (vector_pair_f64_t *dest,
+              vector_pair_f64_t *x,
+              vector_pair_f64_t *y)
+{
+  /* 2 lxvp, 2 xvmuldp, 1 stxvp.  */
+  vpair_f64_mul (dest, x, y);
+}
+
+void
+test_min (vector_pair_f64_t *dest,
+         vector_pair_f64_t *x,
+         vector_pair_f64_t *y)
+{
+  /* 2 lxvp, 2 xvmindp, 1 stxvp.  */
+  vpair_f64_min (dest, x, y);
+}
+
+void
+test_max (vector_pair_f64_t *dest,
+         vector_pair_f64_t *x,
+         vector_pair_f64_t *y)
+{
+  /* 2 lxvp, 2 xvmaxdp, 1 stxvp.  */
+  vpair_f64_max (dest, x, y);
+}
+
+void
+test_negate (vector_pair_f64_t *dest,
+            vector_pair_f64_t *x)
+{
+  /* 1 lxvp, 2 xvnegdp, 1 stxvp.  */
+  vpair_f64_neg (dest, x);
+}
+
+void
+test_abs (vector_pair_f64_t *dest,
+         vector_pair_f64_t *x)
+{
+  /* 1 lxvp, 2 xvabsdp, 1 stxvp.  */
+  vpair_f64_abs (dest, x);
+}
+
+void
+test_negative_abs (vector_pair_f64_t *dest,
+                  vector_pair_f64_t *x)
+{
+  /* 2 lxvp, 2 xvnabsdp, 1 stxvp.  */
+  vpair_f64_nabs (dest, x);
+}
+
+void
+test_sqrt (vector_pair_f64_t *dest,
+          vector_pair_f64_t *x)
+{
+  /* 1 lxvp, 2 xvabsdp, 1 stxvp.  */
+  vpair_f64_sqrt (dest, x);
+}
+
+void
+test_fma (vector_pair_f64_t *dest,
+         vector_pair_f64_t *x,
+         vector_pair_f64_t *y,
+         vector_pair_f64_t *z)
+{
+  /* 2 lxvp, 2 xvmadd{a,m}dp, 1 stxvp.  */
+  vpair_f64_fma (dest, x, y, z);
+}
+
+void
+test_fms (vector_pair_f64_t *dest,
+         vector_pair_f64_t *x,
+         vector_pair_f64_t *y,
+         vector_pair_f64_t *z)
+{
+  /* 2 lxvp, 2 xvmsub{a,m}dp, 1 stxvp.  */
+  vpair_f64_fms (dest, x, y, z);
+}
+
+void
+test_nfma (vector_pair_f64_t *dest,
+          vector_pair_f64_t *x,
+          vector_pair_f64_t *y,
+          vector_pair_f64_t *z)
+{
+  /* 2 lxvp, 2 xvnmadd{a,m}dp, 1 stxvp.  */
+  vpair_f64_nfma (dest, x, y, z);
+}
+
+void
+test_nfms (vector_pair_f64_t *dest,
+          vector_pair_f64_t *x,
+          vector_pair_f64_t *y,
+          vector_pair_f64_t *z)
+{
+  /* 2 lxvp, 2 xvnmsub{a,m}dp, 1 stxvp.  */
+  vpair_f64_nfms (dest, x, y, z);
+}
+
+/* { dg-final { scan-assembler-times {\mlxvp\M}       26 } } */
+/* { dg-final { scan-assembler-times {\mstxvp\M}      13 } } */
+/* { dg-final { scan-assembler-times {\mxvabsdp\M}     2 } } */
+/* { dg-final { scan-assembler-times {\mxvadddp\M}     2 } } */
+/* { dg-final { scan-assembler-times {\mxvmadd.dp\M}   2 } } */
+/* { dg-final { scan-assembler-times {\mxvmaxdp\M}     2 } } */
+/* { dg-final { scan-assembler-times {\mxvmindp\M}     2 } } */
+/* { dg-final { scan-assembler-times {\mxvmsub.dp\M}   2 } } */
+/* { dg-final { scan-assembler-times {\mxvmuldp\M}     2 } } */
+/* { dg-final { scan-assembler-times {\mxvnabsdp\M}    2 } } */
+/* { dg-final { scan-assembler-times {\mxvnegdp\M}     2 } } */
+/* { dg-final { scan-assembler-times {\mxvnmadd.dp\M}  2 } } */
+/* { dg-final { scan-assembler-times {\mxvnmsub.dp\M}  2 } } */
+/* { dg-final { scan-assembler-times {\mxvsqrtdp\M}    2 } } */
+/* { dg-final { scan-assembler-times {\mxvsubdp\M}     2 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/vpair-2.c 
b/gcc/testsuite/gcc.target/powerpc/vpair-2.c
new file mode 100644
index 00000000000..3030b0b3338
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vpair-2.c
@@ -0,0 +1,141 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+/* Test whether the vector builtin code generates the expected instructions for
+   vector pairs with 4 double elements.  */
+
+#include <vector-pair.h>
+
+void
+test_add (vector_pair_f32_t *dest,
+         vector_pair_f32_t *x,
+         vector_pair_f32_t *y)
+{
+  /* 2 lxvp, 2 xvaddsp, 1 stxvp.  */
+  vpair_f32_add (dest, x, y);
+}
+
+void
+test_sub (vector_pair_f32_t *dest,
+         vector_pair_f32_t *x,
+         vector_pair_f32_t *y)
+{
+  /* 2 lxvp, 2 xvsubsp, 1 stxvp.  */
+  vpair_f32_sub (dest, x, y);
+}
+
+void
+test_multiply (vector_pair_f32_t *dest,
+              vector_pair_f32_t *x,
+              vector_pair_f32_t *y)
+{
+  /* 2 lxvp, 2 xvmulsp, 1 stxvp.  */
+  vpair_f32_mul (dest, x, y);
+}
+
+void
+test_min (vector_pair_f32_t *dest,
+         vector_pair_f32_t *x,
+         vector_pair_f32_t *y)
+{
+  /* 2 lxvp, 2 xvminsp, 1 stxvp.  */
+  vpair_f32_min (dest, x, y);
+}
+
+void
+test_max (vector_pair_f32_t *dest,
+         vector_pair_f32_t *x,
+         vector_pair_f32_t *y)
+{
+  /* 2 lxvp, 2 xvmaxsp, 1 stxvp.  */
+  vpair_f32_max (dest, x, y);
+}
+
+void
+test_negate (vector_pair_f32_t *dest,
+            vector_pair_f32_t *x)
+{
+  /* 1 lxvp, 2 xvnegsp, 1 stxvp.  */
+  vpair_f32_neg (dest, x);
+}
+
+void
+test_abs (vector_pair_f32_t *dest,
+         vector_pair_f32_t *x)
+{
+  /* 1 lxvp, 2 xvabssp, 1 stxvp.  */
+  vpair_f32_abs (dest, x);
+}
+
+void
+test_negative_abs (vector_pair_f32_t *dest,
+                  vector_pair_f32_t *x)
+{
+  /* 2 lxvp, 2 xvnabssp, 1 stxvp.  */
+  vpair_f32_nabs (dest, x);
+}
+
+void
+test_sqrt (vector_pair_f32_t *dest,
+          vector_pair_f32_t *x)
+{
+  /* 1 lxvp, 2 xvabssp, 1 stxvp.  */
+  vpair_f32_sqrt (dest, x);
+}
+
+void
+test_fma (vector_pair_f32_t *dest,
+         vector_pair_f32_t *x,
+         vector_pair_f32_t *y,
+         vector_pair_f32_t *z)
+{
+  /* 2 lxvp, 2 xvmadd{a,m}sp, 1 stxvp.  */
+  vpair_f32_fma (dest, x, y, z);
+}
+
+void
+test_fms (vector_pair_f32_t *dest,
+         vector_pair_f32_t *x,
+         vector_pair_f32_t *y,
+         vector_pair_f32_t *z)
+{
+  /* 2 lxvp, 2 xvmsub{a,m}sp, 1 stxvp.  */
+  vpair_f32_fms (dest, x, y, z);
+}
+
+void
+test_nfma (vector_pair_f32_t *dest,
+          vector_pair_f32_t *x,
+          vector_pair_f32_t *y,
+          vector_pair_f32_t *z)
+{
+  /* 2 lxvp, 2 xvnmadd{a,m}sp, 1 stxvp.  */
+  vpair_f32_nfma (dest, x, y, z);
+}
+
+void
+test_nfms (vector_pair_f32_t *dest,
+          vector_pair_f32_t *x,
+          vector_pair_f32_t *y,
+          vector_pair_f32_t *z)
+{
+  /* 2 lxvp, 2 xvnmsub{a,m}sp, 1 stxvp.  */
+  vpair_f32_nfms (dest, x, y, z);
+}
+
+/* { dg-final { scan-assembler-times {\mlxvp\M}       26 } } */
+/* { dg-final { scan-assembler-times {\mstxvp\M}      13 } } */
+/* { dg-final { scan-assembler-times {\mxvabssp\M}     2 } } */
+/* { dg-final { scan-assembler-times {\mxvaddsp\M}     2 } } */
+/* { dg-final { scan-assembler-times {\mxvmadd.sp\M}   2 } } */
+/* { dg-final { scan-assembler-times {\mxvmaxsp\M}     2 } } */
+/* { dg-final { scan-assembler-times {\mxvminsp\M}     2 } } */
+/* { dg-final { scan-assembler-times {\mxvmsub.sp\M}   2 } } */
+/* { dg-final { scan-assembler-times {\mxvmulsp\M}     2 } } */
+/* { dg-final { scan-assembler-times {\mxvnabssp\M}    2 } } */
+/* { dg-final { scan-assembler-times {\mxvnegsp\M}     2 } } */
+/* { dg-final { scan-assembler-times {\mxvnmadd.sp\M}  2 } } */
+/* { dg-final { scan-assembler-times {\mxvnmsub.sp\M}  2 } } */
+/* { dg-final { scan-assembler-times {\mxvsqrtsp\M}    2 } } */
+/* { dg-final { scan-assembler-times {\mxvsubsp\M}     2 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/vpair-3-not-p10.c 
b/gcc/testsuite/gcc.target/powerpc/vpair-3-not-p10.c
new file mode 100644
index 00000000000..d1a1029417f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vpair-3-not-p10.c
@@ -0,0 +1,15 @@
+/* { dg-do run { target { vsx_hw } } } */
+/* { dg-options "-mvsx -O2 -ffast-math -mno-mma" } */
+
+/*
+ * This test of the double (f64) vector pair functions in vector-pair.h is run
+ * on VSX systems when the load/store vector pair instructions are not
+ * available.
+ *
+ * The -ffast-math option is used to just use the hardware sqrt, min, and max
+ * instructions without calling into the library.
+ *
+ * The -mno-mma option disables GCC from enabling the __vector_pair type.
+ */
+
+#include "vpair-3.h"
diff --git a/gcc/testsuite/gcc.target/powerpc/vpair-3-p10.c 
b/gcc/testsuite/gcc.target/powerpc/vpair-3-p10.c
new file mode 100644
index 00000000000..d78faf3fed4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vpair-3-p10.c
@@ -0,0 +1,14 @@
+/* { dg-do run { target { power10_hw } } } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2 -ffast-math -mmma" } */
+
+/*
+ * This test of the double (f64) vector pair functions in vector-pair.h is run
+ * on VSX systems when the load/store vector pair instructions are available.
+ *
+ * The -ffast-math option is used to just use the hardware sqrt, min, and max
+ * instructions without calling into the library.
+ *
+ * The -mmma option makes sure GC enables the __vector_pair type.
+ */
+
+#include "vpair-3.h"
diff --git a/gcc/testsuite/gcc.target/powerpc/vpair-3.h 
b/gcc/testsuite/gcc.target/powerpc/vpair-3.h
new file mode 100644
index 00000000000..e61ad23dd57
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vpair-3.h
@@ -0,0 +1,435 @@
+/* Common include file to test the vector pair double functions.  This is run
+   two times, once compiled for a non-power10 system that does not have the
+   vector pair load and store instructions, and once with power10 defaults that
+   has load/store vector pair.  */
+
+#include <stddef.h>
+#include <stdlib.h>
+#include <vector-pair.h>
+
+#ifdef DEBUG
+#include <stdio.h>
+#endif
+
+#ifndef NUM
+#define NUM    16
+#endif
+
+static double  result1[NUM];
+static double  result2[NUM];
+static double  in_a[NUM];
+static double  in_b[NUM];
+static double  in_c[NUM];
+
+/* vector pair tests.  */
+
+void
+vpair_abs (double *r, double *a, double *b, double *c, size_t num)
+{
+  vector_pair_f64_t *vr = (vector_pair_f64_t *)r;
+  vector_pair_f64_t *va = (vector_pair_f64_t *)a;
+
+  size_t i;
+  size_t num2 = num / (sizeof (vector_pair_f64_t) / sizeof (double));
+
+  for (i = 0; i < num2; i++)
+    vpair_f64_abs (vr + i, va + i);
+}
+
+void
+vpair_nabs (double *r, double *a, double *b, double *c, size_t num)
+{
+  vector_pair_f64_t *vr = (vector_pair_f64_t *)r;
+  vector_pair_f64_t *va = (vector_pair_f64_t *)a;
+
+  size_t i;
+  size_t num2 = num / (sizeof (vector_pair_f64_t) / sizeof (double));
+
+  for (i = 0; i < num2; i++)
+    vpair_f64_nabs (vr + i, va + i);
+}
+
+void
+vpair_neg (double *r, double *a, double *b, double *c, size_t num)
+{
+  vector_pair_f64_t *vr = (vector_pair_f64_t *)r;
+  vector_pair_f64_t *va = (vector_pair_f64_t *)a;
+
+  size_t i;
+  size_t num2 = num / (sizeof (vector_pair_f64_t) / sizeof (double));
+
+  for (i = 0; i < num2; i++)
+    vpair_f64_neg (vr + i, va + i);
+}
+
+void
+vpair_sqrt (double *r, double *a, double *b, double *c, size_t num)
+{
+  vector_pair_f64_t *vr = (vector_pair_f64_t *)r;
+  vector_pair_f64_t *va = (vector_pair_f64_t *)a;
+
+  size_t i;
+  size_t num2 = num / (sizeof (vector_pair_f64_t) / sizeof (double));
+
+  for (i = 0; i < num2; i++)
+    vpair_f64_sqrt (vr + i, va + i);
+}
+
+void
+vpair_add (double *r, double *a, double *b, double *c, size_t num)
+{
+  vector_pair_f64_t *vr = (vector_pair_f64_t *)r;
+  vector_pair_f64_t *va = (vector_pair_f64_t *)a;
+  vector_pair_f64_t *vb = (vector_pair_f64_t *)b;
+
+  size_t i;
+  size_t num2 = num / (sizeof (vector_pair_f64_t) / sizeof (double));
+
+  for (i = 0; i < num2; i++)
+    vpair_f64_add (vr + i, va + i, vb + i);
+}
+
+void
+vpair_sub (double *r, double *a, double *b, double *c, size_t num)
+{
+  vector_pair_f64_t *vr = (vector_pair_f64_t *)r;
+  vector_pair_f64_t *va = (vector_pair_f64_t *)a;
+  vector_pair_f64_t *vb = (vector_pair_f64_t *)b;
+
+  size_t i;
+  size_t num2 = num / (sizeof (vector_pair_f64_t) / sizeof (double));
+
+  for (i = 0; i < num2; i++)
+    vpair_f64_sub (vr + i, va + i, vb + i);
+}
+
+void
+vpair_mul (double *r, double *a, double *b, double *c, size_t num)
+{
+  vector_pair_f64_t *vr = (vector_pair_f64_t *)r;
+  vector_pair_f64_t *va = (vector_pair_f64_t *)a;
+  vector_pair_f64_t *vb = (vector_pair_f64_t *)b;
+
+  size_t i;
+  size_t num2 = num / (sizeof (vector_pair_f64_t) / sizeof (double));
+
+  for (i = 0; i < num2; i++)
+    vpair_f64_mul (vr + i, va + i, vb + i);
+}
+
+void
+vpair_div (double *r, double *a, double *b, double *c, size_t num)
+{
+  vector_pair_f64_t *vr = (vector_pair_f64_t *)r;
+  vector_pair_f64_t *va = (vector_pair_f64_t *)a;
+  vector_pair_f64_t *vb = (vector_pair_f64_t *)b;
+
+  size_t i;
+  size_t num2 = num / (sizeof (vector_pair_f64_t) / sizeof (double));
+
+  for (i = 0; i < num2; i++)
+    vpair_f64_div (vr + i, va + i, vb + i);
+}
+
+void
+vpair_min (double *r, double *a, double *b, double *c, size_t num)
+{
+  vector_pair_f64_t *vr = (vector_pair_f64_t *)r;
+  vector_pair_f64_t *va = (vector_pair_f64_t *)a;
+  vector_pair_f64_t *vb = (vector_pair_f64_t *)b;
+
+  size_t i;
+  size_t num2 = num / (sizeof (vector_pair_f64_t) / sizeof (double));
+
+  for (i = 0; i < num2; i++)
+    vpair_f64_min (vr + i, va + i, vb + i);
+}
+
+void
+vpair_max (double *r, double *a, double *b, double *c, size_t num)
+{
+  vector_pair_f64_t *vr = (vector_pair_f64_t *)r;
+  vector_pair_f64_t *va = (vector_pair_f64_t *)a;
+  vector_pair_f64_t *vb = (vector_pair_f64_t *)b;
+
+  size_t i;
+  size_t num2 = num / (sizeof (vector_pair_f64_t) / sizeof (double));
+
+  for (i = 0; i < num2; i++)
+    vpair_f64_max (vr + i, va + i, vb + i);
+}
+
+void
+vpair_fma (double *r, double *a, double *b, double *c, size_t num)
+{
+  vector_pair_f64_t *vr = (vector_pair_f64_t *)r;
+  vector_pair_f64_t *va = (vector_pair_f64_t *)a;
+  vector_pair_f64_t *vb = (vector_pair_f64_t *)b;
+  vector_pair_f64_t *vc = (vector_pair_f64_t *)c;
+
+  size_t i;
+  size_t num2 = num / (sizeof (vector_pair_f64_t) / sizeof (double));
+
+  for (i = 0; i < num2; i++)
+    vpair_f64_fma (vr + i, va + i, vb + i, vc + i);
+}
+
+void
+vpair_fms (double *r, double *a, double *b, double *c, size_t num)
+{
+  vector_pair_f64_t *vr = (vector_pair_f64_t *)r;
+  vector_pair_f64_t *va = (vector_pair_f64_t *)a;
+  vector_pair_f64_t *vb = (vector_pair_f64_t *)b;
+  vector_pair_f64_t *vc = (vector_pair_f64_t *)c;
+
+  size_t i;
+  size_t num2 = num / (sizeof (vector_pair_f64_t) / sizeof (double));
+
+  for (i = 0; i < num2; i++)
+    vpair_f64_fms (vr + i, va + i, vb + i, vc + i);
+}
+
+void
+vpair_nfma (double *r, double *a, double *b, double *c, size_t num)
+{
+  vector_pair_f64_t *vr = (vector_pair_f64_t *)r;
+  vector_pair_f64_t *va = (vector_pair_f64_t *)a;
+  vector_pair_f64_t *vb = (vector_pair_f64_t *)b;
+  vector_pair_f64_t *vc = (vector_pair_f64_t *)c;
+
+  size_t i;
+  size_t num2 = num / (sizeof (vector_pair_f64_t) / sizeof (double));
+
+  for (i = 0; i < num2; i++)
+    vpair_f64_nfma (vr + i, va + i, vb + i, vc + i);
+}
+
+void
+vpair_nfms (double *r, double *a, double *b, double *c, size_t num)
+{
+  vector_pair_f64_t *vr = (vector_pair_f64_t *)r;
+  vector_pair_f64_t *va = (vector_pair_f64_t *)a;
+  vector_pair_f64_t *vb = (vector_pair_f64_t *)b;
+  vector_pair_f64_t *vc = (vector_pair_f64_t *)c;
+
+  size_t i;
+  size_t num2 = num / (sizeof (vector_pair_f64_t) / sizeof (double));
+
+  for (i = 0; i < num2; i++)
+    vpair_f64_nfms (vr + i, va + i, vb + i, vc + i);
+}
+
+
+/* scalar tests.  */
+
+void
+scalar_abs (double *r, double *a, double *b, double *c, size_t num)
+{
+  size_t i;
+
+  for (i = 0; i < num; i++)
+    r[i] = (a[i] < 0.0) ? -a[i] : a[i];
+}
+
+void
+scalar_nabs (double *r, double *a, double *b, double *c, size_t num)
+{
+  size_t i;
+
+  for (i = 0; i < num; i++)
+    r[i] = (a[i] < 0.0) ? a[i] : -a[i];
+}
+
+void
+scalar_neg (double *r, double *a, double *b, double *c, size_t num)
+{
+  size_t i;
+
+  for (i = 0; i < num; i++)
+    r[i] = -a[i];
+}
+
+void
+scalar_sqrt (double *r, double *a, double *b, double *c, size_t num)
+{
+  size_t i;
+
+  for (i = 0; i < num; i++)
+    r[i] = __builtin_sqrt (a[i]);
+}
+
+void
+scalar_add (double *r, double *a, double *b, double *c, size_t num)
+{
+  size_t i;
+
+  for (i = 0; i < num; i++)
+    r[i] = a[i] + b[i];
+}
+
+void
+scalar_sub (double *r, double *a, double *b, double *c, size_t num)
+{
+  size_t i;
+
+  for (i = 0; i < num; i++)
+    r[i] = a[i] - b[i];
+}
+
+void
+scalar_mul (double *r, double *a, double *b, double *c, size_t num)
+{
+  size_t i;
+
+  for (i = 0; i < num; i++)
+    r[i] = a[i] * b[i];
+}
+
+void
+scalar_div (double *r, double *a, double *b, double *c, size_t num)
+{
+  size_t i;
+
+  for (i = 0; i < num; i++)
+    r[i] = a[i] / b[i];
+}
+
+void
+scalar_min (double *r, double *a, double *b, double *c, size_t num)
+{
+  size_t i;
+
+  for (i = 0; i < num; i++)
+    r[i] = (a[i] < b[i]) ? a[i] : b[i];
+}
+
+void
+scalar_max (double *r, double *a, double *b, double *c, size_t num)
+{
+  size_t i;
+
+  for (i = 0; i < num; i++)
+    r[i] = (a[i] > b[i]) ? a[i] : b[i];
+}
+
+void
+scalar_fma (double *r, double *a, double *b, double *c, size_t num)
+{
+  size_t i;
+
+  for (i = 0; i < num; i++)
+    r[i] = __builtin_fma (a[i], b[i], c[i]);
+}
+
+void
+scalar_fms (double *r, double *a, double *b, double *c, size_t num)
+{
+  size_t i;
+
+  for (i = 0; i < num; i++)
+    r[i] = __builtin_fma (a[i], b[i], -c[i]);
+}
+
+void
+scalar_nfma (double *r, double *a, double *b, double *c, size_t num)
+{
+  size_t i;
+
+  for (i = 0; i < num; i++)
+    r[i] = - __builtin_fma (a[i], b[i], c[i]);
+}
+
+void
+scalar_nfms (double *r, double *a, double *b, double *c, size_t num)
+{
+  size_t i;
+
+  for (i = 0; i < num; i++)
+    r[i] = - __builtin_fma (a[i], b[i], -c[i]);
+}
+
+
+/* Check results.  */
+void
+check (const char *name)
+{
+  size_t i;
+
+  for (i = 0; i < NUM; i++)
+    if (result1[i] != result2[i])
+      {
+#ifdef DEBUG
+       printf ("test #%ld failed, %g != %g, %s (%g, %g, %g).\n",
+               (long)i,
+               result1[i],
+               result2[i],
+               name,
+               in_a[i],
+               in_b[i],
+               in_c[i]);
+#endif
+       abort ();
+      }
+
+  return;
+}
+
+typedef void func_t (double *, double *, double *, double *, size_t);
+
+/* tests to run.  */
+struct
+{
+  func_t *vpair_test;
+  func_t *scalar_test;
+  const char *name;
+} tests[] = {
+  { vpair_abs,  scalar_abs,     "abs"  }, 
+  { vpair_nabs, scalar_nabs,    "nabs" }, 
+  { vpair_neg,  scalar_neg,     "neg"  }, 
+  { vpair_sqrt, scalar_sqrt,    "sqrt" }, 
+  { vpair_add,  scalar_add,     "add"  }, 
+  { vpair_sub,  scalar_sub,     "sub"  }, 
+  { vpair_mul,  scalar_mul,     "mul"  }, 
+  { vpair_div,  scalar_div,     "div"  }, 
+  { vpair_min,  scalar_min,     "min"  }, 
+  { vpair_max,  scalar_max,     "max"  }, 
+  { vpair_fma,  scalar_fma,     "fma"  }, 
+  { vpair_fms,  scalar_fms,     "fms"  }, 
+  { vpair_nfma, scalar_nfma,    "nfma" }, 
+  { vpair_nfms, scalar_nfms,    "nfms" }, 
+};
+
+/* Run tests.  */
+
+int
+main (void)
+{
+  size_t i;
+
+  /* Initialize the inputs.  */
+  for (i = 0; i < NUM; i++)
+    {
+      double d = (double)(i + 1);
+      in_a[i] = d * d;
+      in_b[i] = d;
+      in_c[i] = d + 2.0;
+    }
+
+#ifdef DEBUG
+  printf ("Start tests\n");
+#endif
+
+  /* Run the tests.  */
+  for (i = 0; i < sizeof (tests) / sizeof (tests[0]); i++)
+    {
+      tests[i].vpair_test  (result1, in_a, in_b, in_c, NUM);
+      tests[i].scalar_test (result2, in_a, in_b, in_c, NUM);
+      check (tests[i].name);
+    }
+
+#ifdef DEBUG
+  printf ("End tests\n");
+#endif
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/vpair-4-not-p10.c 
b/gcc/testsuite/gcc.target/powerpc/vpair-4-not-p10.c
new file mode 100644
index 00000000000..f57fbbf8b05
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vpair-4-not-p10.c
@@ -0,0 +1,15 @@
+/* { dg-do run { target { vsx_hw } } } */
+/* { dg-options "-mvsx -O2 -ffast-math -mno-mma" } */
+
+/*
+ * This test of the float (f32) vector pair functions in vector-pair.h is run
+ * on VSX systems when the load/store vector pair instructions are not
+ * available.
+ *
+ * The -ffast-math option is used to just use the hardware sqrt, min, and max
+ * instructions without calling into the library.
+ *
+ * The -mno-mma option disables GCC from enabling the __vector_pair type.
+ */
+
+#include "vpair-4.h"
diff --git a/gcc/testsuite/gcc.target/powerpc/vpair-4-p10.c 
b/gcc/testsuite/gcc.target/powerpc/vpair-4-p10.c
new file mode 100644
index 00000000000..12291202c16
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vpair-4-p10.c
@@ -0,0 +1,14 @@
+/* { dg-do run { target { power10_hw } } } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2 -ffast-math -mmma" } */
+
+/*
+ * This test of the float (f32) vector pair functions in vector-pair.h is run
+ * on VSX systems when the load/store vector pair instructions are available.
+ *
+ * The -ffast-math option is used to just use the hardware sqrt, min, and max
+ * instructions without calling into the library.
+ *
+ * The -mmma option makes sure GC enables the __vector_pair type.
+ */
+
+#include "vpair-4.h"
diff --git a/gcc/testsuite/gcc.target/powerpc/vpair-4.h 
b/gcc/testsuite/gcc.target/powerpc/vpair-4.h
new file mode 100644
index 00000000000..1a80cf5e639
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vpair-4.h
@@ -0,0 +1,435 @@
+/* Common include file to test the vector pair float functions.  This is run
+   two times, once compiled for a non-power10 system that does not have the
+   vector pair load and store instructions, and once with power10 defaults that
+   has load/store vector pair.  */
+
+#include <stddef.h>
+#include <stdlib.h>
+#include <vector-pair.h>
+
+#ifdef DEBUG
+#include <stdio.h>
+#endif
+
+#ifndef NUM
+#define NUM    16
+#endif
+
+static float   result1[NUM];
+static float   result2[NUM];
+static float   in_a[NUM];
+static float   in_b[NUM];
+static float   in_c[NUM];
+
+/* vector pair tests.  */
+
+void
+vpair_abs (float *r, float *a, float *b, float *c, size_t num)
+{
+  vector_pair_f32_t *vr = (vector_pair_f32_t *)r;
+  vector_pair_f32_t *va = (vector_pair_f32_t *)a;
+
+  size_t i;
+  size_t num2 = num / (sizeof (vector_pair_f32_t) / sizeof (float));
+
+  for (i = 0; i < num2; i++)
+    vpair_f32_abs (vr + i, va + i);
+}
+
+void
+vpair_nabs (float *r, float *a, float *b, float *c, size_t num)
+{
+  vector_pair_f32_t *vr = (vector_pair_f32_t *)r;
+  vector_pair_f32_t *va = (vector_pair_f32_t *)a;
+
+  size_t i;
+  size_t num2 = num / (sizeof (vector_pair_f32_t) / sizeof (float));
+
+  for (i = 0; i < num2; i++)
+    vpair_f32_nabs (vr + i, va + i);
+}
+
+void
+vpair_neg (float *r, float *a, float *b, float *c, size_t num)
+{
+  vector_pair_f32_t *vr = (vector_pair_f32_t *)r;
+  vector_pair_f32_t *va = (vector_pair_f32_t *)a;
+
+  size_t i;
+  size_t num2 = num / (sizeof (vector_pair_f32_t) / sizeof (float));
+
+  for (i = 0; i < num2; i++)
+    vpair_f32_neg (vr + i, va + i);
+}
+
+void
+vpair_sqrt (float *r, float *a, float *b, float *c, size_t num)
+{
+  vector_pair_f32_t *vr = (vector_pair_f32_t *)r;
+  vector_pair_f32_t *va = (vector_pair_f32_t *)a;
+
+  size_t i;
+  size_t num2 = num / (sizeof (vector_pair_f32_t) / sizeof (float));
+
+  for (i = 0; i < num2; i++)
+    vpair_f32_sqrt (vr + i, va + i);
+}
+
+void
+vpair_add (float *r, float *a, float *b, float *c, size_t num)
+{
+  vector_pair_f32_t *vr = (vector_pair_f32_t *)r;
+  vector_pair_f32_t *va = (vector_pair_f32_t *)a;
+  vector_pair_f32_t *vb = (vector_pair_f32_t *)b;
+
+  size_t i;
+  size_t num2 = num / (sizeof (vector_pair_f32_t) / sizeof (float));
+
+  for (i = 0; i < num2; i++)
+    vpair_f32_add (vr + i, va + i, vb + i);
+}
+
+void
+vpair_sub (float *r, float *a, float *b, float *c, size_t num)
+{
+  vector_pair_f32_t *vr = (vector_pair_f32_t *)r;
+  vector_pair_f32_t *va = (vector_pair_f32_t *)a;
+  vector_pair_f32_t *vb = (vector_pair_f32_t *)b;
+
+  size_t i;
+  size_t num2 = num / (sizeof (vector_pair_f32_t) / sizeof (float));
+
+  for (i = 0; i < num2; i++)
+    vpair_f32_sub (vr + i, va + i, vb + i);
+}
+
+void
+vpair_mul (float *r, float *a, float *b, float *c, size_t num)
+{
+  vector_pair_f32_t *vr = (vector_pair_f32_t *)r;
+  vector_pair_f32_t *va = (vector_pair_f32_t *)a;
+  vector_pair_f32_t *vb = (vector_pair_f32_t *)b;
+
+  size_t i;
+  size_t num2 = num / (sizeof (vector_pair_f32_t) / sizeof (float));
+
+  for (i = 0; i < num2; i++)
+    vpair_f32_mul (vr + i, va + i, vb + i);
+}
+
+void
+vpair_div (float *r, float *a, float *b, float *c, size_t num)
+{
+  vector_pair_f32_t *vr = (vector_pair_f32_t *)r;
+  vector_pair_f32_t *va = (vector_pair_f32_t *)a;
+  vector_pair_f32_t *vb = (vector_pair_f32_t *)b;
+
+  size_t i;
+  size_t num2 = num / (sizeof (vector_pair_f32_t) / sizeof (float));
+
+  for (i = 0; i < num2; i++)
+    vpair_f32_div (vr + i, va + i, vb + i);
+}
+
+void
+vpair_min (float *r, float *a, float *b, float *c, size_t num)
+{
+  vector_pair_f32_t *vr = (vector_pair_f32_t *)r;
+  vector_pair_f32_t *va = (vector_pair_f32_t *)a;
+  vector_pair_f32_t *vb = (vector_pair_f32_t *)b;
+
+  size_t i;
+  size_t num2 = num / (sizeof (vector_pair_f32_t) / sizeof (float));
+
+  for (i = 0; i < num2; i++)
+    vpair_f32_min (vr + i, va + i, vb + i);
+}
+
+void
+vpair_max (float *r, float *a, float *b, float *c, size_t num)
+{
+  vector_pair_f32_t *vr = (vector_pair_f32_t *)r;
+  vector_pair_f32_t *va = (vector_pair_f32_t *)a;
+  vector_pair_f32_t *vb = (vector_pair_f32_t *)b;
+
+  size_t i;
+  size_t num2 = num / (sizeof (vector_pair_f32_t) / sizeof (float));
+
+  for (i = 0; i < num2; i++)
+    vpair_f32_max (vr + i, va + i, vb + i);
+}
+
+void
+vpair_fma (float *r, float *a, float *b, float *c, size_t num)
+{
+  vector_pair_f32_t *vr = (vector_pair_f32_t *)r;
+  vector_pair_f32_t *va = (vector_pair_f32_t *)a;
+  vector_pair_f32_t *vb = (vector_pair_f32_t *)b;
+  vector_pair_f32_t *vc = (vector_pair_f32_t *)c;
+
+  size_t i;
+  size_t num2 = num / (sizeof (vector_pair_f32_t) / sizeof (float));
+
+  for (i = 0; i < num2; i++)
+    vpair_f32_fma (vr + i, va + i, vb + i, vc + i);
+}
+
+void
+vpair_fms (float *r, float *a, float *b, float *c, size_t num)
+{
+  vector_pair_f32_t *vr = (vector_pair_f32_t *)r;
+  vector_pair_f32_t *va = (vector_pair_f32_t *)a;
+  vector_pair_f32_t *vb = (vector_pair_f32_t *)b;
+  vector_pair_f32_t *vc = (vector_pair_f32_t *)c;
+
+  size_t i;
+  size_t num2 = num / (sizeof (vector_pair_f32_t) / sizeof (float));
+
+  for (i = 0; i < num2; i++)
+    vpair_f32_fms (vr + i, va + i, vb + i, vc + i);
+}
+
+void
+vpair_nfma (float *r, float *a, float *b, float *c, size_t num)
+{
+  vector_pair_f32_t *vr = (vector_pair_f32_t *)r;
+  vector_pair_f32_t *va = (vector_pair_f32_t *)a;
+  vector_pair_f32_t *vb = (vector_pair_f32_t *)b;
+  vector_pair_f32_t *vc = (vector_pair_f32_t *)c;
+
+  size_t i;
+  size_t num2 = num / (sizeof (vector_pair_f32_t) / sizeof (float));
+
+  for (i = 0; i < num2; i++)
+    vpair_f32_nfma (vr + i, va + i, vb + i, vc + i);
+}
+
+void
+vpair_nfms (float *r, float *a, float *b, float *c, size_t num)
+{
+  vector_pair_f32_t *vr = (vector_pair_f32_t *)r;
+  vector_pair_f32_t *va = (vector_pair_f32_t *)a;
+  vector_pair_f32_t *vb = (vector_pair_f32_t *)b;
+  vector_pair_f32_t *vc = (vector_pair_f32_t *)c;
+
+  size_t i;
+  size_t num2 = num / (sizeof (vector_pair_f32_t) / sizeof (float));
+
+  for (i = 0; i < num2; i++)
+    vpair_f32_nfms (vr + i, va + i, vb + i, vc + i);
+}
+
+
+/* scalar tests.  */
+
+void
+scalar_abs (float *r, float *a, float *b, float *c, size_t num)
+{
+  size_t i;
+
+  for (i = 0; i < num; i++)
+    r[i] = (a[i] < 0.0) ? -a[i] : a[i];
+}
+
+void
+scalar_nabs (float *r, float *a, float *b, float *c, size_t num)
+{
+  size_t i;
+
+  for (i = 0; i < num; i++)
+    r[i] = (a[i] < 0.0) ? a[i] : -a[i];
+}
+
+void
+scalar_neg (float *r, float *a, float *b, float *c, size_t num)
+{
+  size_t i;
+
+  for (i = 0; i < num; i++)
+    r[i] = -a[i];
+}
+
+void
+scalar_sqrt (float *r, float *a, float *b, float *c, size_t num)
+{
+  size_t i;
+
+  for (i = 0; i < num; i++)
+    r[i] = __builtin_sqrt (a[i]);
+}
+
+void
+scalar_add (float *r, float *a, float *b, float *c, size_t num)
+{
+  size_t i;
+
+  for (i = 0; i < num; i++)
+    r[i] = a[i] + b[i];
+}
+
+void
+scalar_sub (float *r, float *a, float *b, float *c, size_t num)
+{
+  size_t i;
+
+  for (i = 0; i < num; i++)
+    r[i] = a[i] - b[i];
+}
+
+void
+scalar_mul (float *r, float *a, float *b, float *c, size_t num)
+{
+  size_t i;
+
+  for (i = 0; i < num; i++)
+    r[i] = a[i] * b[i];
+}
+
+void
+scalar_div (float *r, float *a, float *b, float *c, size_t num)
+{
+  size_t i;
+
+  for (i = 0; i < num; i++)
+    r[i] = a[i] / b[i];
+}
+
+void
+scalar_min (float *r, float *a, float *b, float *c, size_t num)
+{
+  size_t i;
+
+  for (i = 0; i < num; i++)
+    r[i] = (a[i] < b[i]) ? a[i] : b[i];
+}
+
+void
+scalar_max (float *r, float *a, float *b, float *c, size_t num)
+{
+  size_t i;
+
+  for (i = 0; i < num; i++)
+    r[i] = (a[i] > b[i]) ? a[i] : b[i];
+}
+
+void
+scalar_fma (float *r, float *a, float *b, float *c, size_t num)
+{
+  size_t i;
+
+  for (i = 0; i < num; i++)
+    r[i] = __builtin_fma (a[i], b[i], c[i]);
+}
+
+void
+scalar_fms (float *r, float *a, float *b, float *c, size_t num)
+{
+  size_t i;
+
+  for (i = 0; i < num; i++)
+    r[i] = __builtin_fma (a[i], b[i], -c[i]);
+}
+
+void
+scalar_nfma (float *r, float *a, float *b, float *c, size_t num)
+{
+  size_t i;
+
+  for (i = 0; i < num; i++)
+    r[i] = - __builtin_fma (a[i], b[i], c[i]);
+}
+
+void
+scalar_nfms (float *r, float *a, float *b, float *c, size_t num)
+{
+  size_t i;
+
+  for (i = 0; i < num; i++)
+    r[i] = - __builtin_fma (a[i], b[i], -c[i]);
+}
+
+
+/* Check results.  */
+void
+check (const char *name)
+{
+  size_t i;
+
+  for (i = 0; i < NUM; i++)
+    if (result1[i] != result2[i])
+      {
+#ifdef DEBUG
+       printf ("test #%ld failed, %g != %g, %s (%g, %g, %g).\n",
+               (long)i,
+               result1[i],
+               result2[i],
+               name,
+               in_a[i],
+               in_b[i],
+               in_c[i]);
+#endif
+       abort ();
+      }
+
+  return;
+}
+
+typedef void func_t (float *, float *, float *, float *, size_t);
+
+/* tests to run.  */
+struct
+{
+  func_t *vpair_test;
+  func_t *scalar_test;
+  const char *name;
+} tests[] = {
+  { vpair_abs,  scalar_abs,     "abs"  }, 
+  { vpair_nabs, scalar_nabs,    "nabs" }, 
+  { vpair_neg,  scalar_neg,     "neg"  }, 
+  { vpair_sqrt, scalar_sqrt,    "sqrt" }, 
+  { vpair_add,  scalar_add,     "add"  }, 
+  { vpair_sub,  scalar_sub,     "sub"  }, 
+  { vpair_mul,  scalar_mul,     "mul"  }, 
+  { vpair_div,  scalar_div,     "div"  }, 
+  { vpair_min,  scalar_min,     "min"  }, 
+  { vpair_max,  scalar_max,     "max"  }, 
+  { vpair_fma,  scalar_fma,     "fma"  }, 
+  { vpair_fms,  scalar_fms,     "fms"  }, 
+  { vpair_nfma, scalar_nfma,    "nfma" }, 
+  { vpair_nfms, scalar_nfms,    "nfms" }, 
+};
+
+/* Run tests.  */
+
+int
+main (void)
+{
+  size_t i;
+
+  /* Initialize the inputs.  */
+  for (i = 0; i < NUM; i++)
+    {
+      float f = (float)(i + 1);
+      in_a[i] = f * f;
+      in_b[i] = f;
+      in_c[i] = f + 2.0f;
+    }
+
+#ifdef DEBUG
+  printf ("Start tests\n");
+#endif
+
+  /* Run the tests.  */
+  for (i = 0; i < sizeof (tests) / sizeof (tests[0]); i++)
+    {
+      tests[i].vpair_test  (result1, in_a, in_b, in_c, NUM);
+      tests[i].scalar_test (result2, in_a, in_b, in_c, NUM);
+      check (tests[i].name);
+    }
+
+#ifdef DEBUG
+  printf ("End tests\n");
+#endif
+
+  return 0;
+}
-- 
2.46.2



-- 
Michael Meissner, IBM
PO Box 98, Ayer, Massachusetts, USA, 01432
email: meiss...@linux.ibm.com

Reply via email to