https://gcc.gnu.org/g:4362442ef3a8738a62ba36dfab2829705de71044

commit 4362442ef3a8738a62ba36dfab2829705de71044
Author: Michael Meissner <meiss...@linux.ibm.com>
Date:   Wed Sep 4 22:18:39 2024 -0400

    Initial vector-pair.h support
    
    2024-09-03  Michael Meissner  <meiss...@linux.ibm.com>
    
    gcc/
    
            * config.gcc (powerpc*-*-*): Add vector-pair.h to extra headers.
            * config/rs6000/rs6000.cc (print_operand): Add %S output modifier.
            * config/rs6000/vector-pair.h: New file.

Diff:
---
 gcc/config.gcc                  |   2 +-
 gcc/config/rs6000/rs6000.cc     |  46 +++++
 gcc/config/rs6000/vector-pair.h | 430 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 477 insertions(+), 1 deletion(-)

diff --git a/gcc/config.gcc b/gcc/config.gcc
index 0b794e977f6..3627bed8b86 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -537,7 +537,7 @@ powerpc*-*-*)
        extra_headers="${extra_headers} pmmintrin.h tmmintrin.h smmintrin.h"
        extra_headers="${extra_headers} nmmintrin.h immintrin.h x86gprintrin.h"
        extra_headers="${extra_headers} ppu_intrinsics.h spu2vmx.h vec_types.h 
si2vmx.h"
-       extra_headers="${extra_headers} amo.h"
+       extra_headers="${extra_headers} amo.h vector-pair.h"
        case x$with_cpu in
            
xpowerpc64|xdefault64|x6[23]0|x970|xG5|xpower[3456789]|xpower1[01]|xpower6x|xrs64a|xcell|xa2|xe500mc64|xe5500|xe6500|xfuture)
                cpu_is_64bit=yes
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index f320d1762c3..fdcda403672 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -14414,6 +14414,52 @@ print_operand (FILE *file, rtx x, int code)
        fprintf (file, HOST_WIDE_INT_PRINT_DEC, (32 - INTVAL (x)) & 31);
       return;
 
+    case 'S':
+      /* Like %L<x>, but assume the second register is a VSX register.  This
+        works on VSX registers and memory addresses.  */
+      if (REG_P (x))
+       {
+         int reg = REGNO (x);
+         if (!VSX_REGNO_P (reg) || (reg & 1) != 0)
+           output_operand_lossage ("invalid %%S value");
+         else
+           {
+             int vsx_reg = (FP_REGNO_P (reg)
+                            ? reg - 32
+                            : reg - FIRST_ALTIVEC_REGNO + 32) + 1;
+
+#ifdef TARGET_REGNAMES      
+             if (TARGET_REGNAMES)
+               fprintf (file, "%%vs%d", vsx_reg);
+             else
+#endif
+               fprintf (file, "%d", vsx_reg);
+           }
+       }
+
+      else if (MEM_P (x))
+       {
+         machine_mode mode = GET_MODE (x);
+         /* Vectors and vector pairs can't have auto increment addreses.  */
+         if (GET_CODE (XEXP (x, 0)) == PRE_INC
+             || GET_CODE (XEXP (x, 0)) == PRE_DEC
+             || GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
+           output_operand_lossage ("invalid auto-increment %%S value");
+         else
+           output_address (mode, XEXP (adjust_address_nv (x, SImode,
+                                                          UNITS_PER_WORD),
+                                 0));
+
+         if (small_data_operand (x, GET_MODE (x)))
+           fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
+                    reg_names[SMALL_DATA_REG]);
+       }
+
+      else
+       output_operand_lossage ("invalid %%S value");
+
+      return;
+
     case 't':
       /* Like 'J' but get to the OVERFLOW/UNORDERED bit.  */
       if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
diff --git a/gcc/config/rs6000/vector-pair.h b/gcc/config/rs6000/vector-pair.h
new file mode 100644
index 00000000000..91725ef43ec
--- /dev/null
+++ b/gcc/config/rs6000/vector-pair.h
@@ -0,0 +1,430 @@
+/* PowerPC vector pair include file.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   Contributed by Aldy Hernandez (al...@redhat.com).
+   Rewritten by Paolo Bonzini (bonz...@gnu.org).
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Provide support for vector pairs, even on systems that do not have native
+   support for loading and storing pairs of vectors.  */
+
+#ifndef _VECTOR_PAIR_H
+#define _VECTOR_PAIR_H 1
+
+/* If we have MMA support, use power10 support.  */
+#if __MMA__
+typedef __vector_pair vector_pair_t;
+
+#define VPAIR_FP_CONSTRAINT    "wa"            /* Allow all VSX registers.  */
+#define VPAIR_FP_SECOND                "S"             /* Access 2nd VSX 
register.  */
+
+/* vector pair double operations on power10.  */
+#define vpair_f64_splat(R, A)                                          \
+  __asm__ ("xxpermdi %x0,%x1,%x1,0" "\n\t"                             \
+           "xxpermdi %" VPAIR_FP_SECOND "0,%x1,%x1,0"                  \
+           : "=" VPAIR_FP_CONSTRAINT (*R)                              \
+          : "wa" ((A)))
+
+#define vpair_f64_neg(R,A)                                             \
+  __asm__ ("xvnegdp %x0,%x1" "\n\t"                                    \
+           "xvnegdp %" VPAIR_FP_SECOND "0,%" VPAIR_FP_SECOND "1"       \
+           : "=" VPAIR_FP_CONSTRAINT (*R)                              \
+          : VPAIR_FP_CONSTRAINT ((*A)))
+
+#define vpair_f64_abs(R,A)                                             \
+  __asm__ ("xvabsdp %x0,%x1" "\n\t"                                    \
+           "xvabsdp %" VPAIR_FP_SECOND "0,%" VPAIR_FP_SECOND "1"       \
+           : "=" VPAIR_FP_CONSTRAINT (*R)                              \
+          : VPAIR_FP_CONSTRAINT ((*A)))
+
+#define vpair_f64_nabs(R,A)                                            \
+  __asm__ ("xvnabsdp %x0,%x1" "\n\t"                                   \
+           "xvnabsdp %" VPAIR_FP_SECOND "0,%" VPAIR_FP_SECOND "1"      \
+           : "=" VPAIR_FP_CONSTRAINT (*R)                              \
+          : VPAIR_FP_CONSTRAINT ((*A)))
+
+#define vpair_f64_sqrt(R,A)                                            \
+  __asm__ ("xvsqrtdp %x0,%x1" "\n\t"                                   \
+           "xvsqrtdp %" VPAIR_FP_SECOND "0,%" VPAIR_FP_SECOND "1"      \
+           : "=" VPAIR_FP_CONSTRAINT (*R)                              \
+          : VPAIR_FP_CONSTRAINT ((*A)))
+
+#define vpair_f64_add(R,A,B)                                           \
+  __asm__ ("xvadddp %x0,%x1,%x2" "\n\t"                                        
\
+           "xvadddp %" VPAIR_FP_SECOND "0,%" VPAIR_FP_SECOND "1,%" 
VPAIR_FP_SECOND "2" \
+           : "=" VPAIR_FP_CONSTRAINT (*R)                              \
+          : VPAIR_FP_CONSTRAINT ((*A)), VPAIR_FP_CONSTRAINT ((*B)))
+
+#define vpair_f64_div(R,A,B)                                           \
+  __asm__ ("xvdivdp %x0,%x1,%x2" "\n\t"                                        
\
+           "xvdivdp %" VPAIR_FP_SECOND "0,%" VPAIR_FP_SECOND "1,%" 
VPAIR_FP_SECOND "2" \
+           : "=" VPAIR_FP_CONSTRAINT (*R)                              \
+          : VPAIR_FP_CONSTRAINT ((*A)), VPAIR_FP_CONSTRAINT ((*B)))
+
+#define vpair_f64_max(R,A,B)                                           \
+  __asm__ ("xvmaxdp %x0,%x1,%x2" "\n\t"                                        
\
+           "xvmaxdp %" VPAIR_FP_SECOND "0,%" VPAIR_FP_SECOND "1,%" 
VPAIR_FP_SECOND "2" \
+           : "=" VPAIR_FP_CONSTRAINT (*R)                              \
+          : VPAIR_FP_CONSTRAINT ((*A)), VPAIR_FP_CONSTRAINT ((*B)))
+
+#define vpair_f64_min(R,A,B)                                           \
+  __asm__ ("xvmindp %x0,%x1,%x2" "\n\t"                                        
\
+           "xvmindp %" VPAIR_FP_SECOND "0,%" VPAIR_FP_SECOND "1,%" 
VPAIR_FP_SECOND "2" \
+           : "=" VPAIR_FP_CONSTRAINT (*R)                              \
+          : VPAIR_FP_CONSTRAINT ((*A)), VPAIR_FP_CONSTRAINT ((*B)))
+
+#define vpair_f64_mul(R,A,B)                                           \
+  __asm__ ("xvmuldp %x0,%x1,%x2" "\n\t"                                        
\
+           "xvmuldp %" VPAIR_FP_SECOND "0,%" VPAIR_FP_SECOND "1,%" 
VPAIR_FP_SECOND "2" \
+           : "=" VPAIR_FP_CONSTRAINT (*R)                              \
+          : VPAIR_FP_CONSTRAINT ((*A)), VPAIR_FP_CONSTRAINT ((*B)))
+
+#define vpair_f64_sub(R,A,B)                                           \
+  __asm__ ("xvsubdp %x0,%x1,%x2" "\n\t"                                        
\
+           "xvsubdp %" VPAIR_FP_SECOND "0,%" VPAIR_FP_SECOND "1,%" 
VPAIR_FP_SECOND "2" \
+           : "=" VPAIR_FP_CONSTRAINT (*R)                              \
+          : VPAIR_FP_CONSTRAINT ((*A)), VPAIR_FP_CONSTRAINT ((*B)))
+
+#define vpair_f64_fma(R,A,B,C)                                         \
+  __asm__ ("xvmaddadp %x0,%x1,%x2" "\n\t"                              \
+           "xvmaddadp %" VPAIR_FP_SECOND "0,%" VPAIR_FP_SECOND "1,%" 
VPAIR_FP_SECOND "2" \
+           : "=" VPAIR_FP_CONSTRAINT (*R)                              \
+          : VPAIR_FP_CONSTRAINT ((*A)), VPAIR_FP_CONSTRAINT ((*B)), "0" ((*C)))
+
+#define vpair_f64_fms(R,A,B,C)                                         \
+  __asm__ ("xvmsubadp %x0,%x1,%x2" "\n\t"                              \
+           "xvmsubadp %" VPAIR_FP_SECOND "0,%" VPAIR_FP_SECOND "1,%" 
VPAIR_FP_SECOND "2" \
+           : "=" VPAIR_FP_CONSTRAINT (*R)                              \
+          : VPAIR_FP_CONSTRAINT ((*A)), VPAIR_FP_CONSTRAINT ((*B)), "0" ((*C)))
+
+#define vpair_f64_nfma(R,A,B,C)                                                
\
+  __asm__ ("xvnmaddadp %x0,%x1,%x2" "\n\t"                             \
+           "xvnmaddadp %" VPAIR_FP_SECOND "0,%" VPAIR_FP_SECOND "1,%" 
VPAIR_FP_SECOND "2" \
+           : "=" VPAIR_FP_CONSTRAINT (*R)                              \
+          : VPAIR_FP_CONSTRAINT ((*A)), VPAIR_FP_CONSTRAINT ((*B)), "0" ((*C)))
+
+#define vpair_f64_nfms(R,A,B,C)                                                
\
+  __asm__ ("xvnmsubadp %x0,%x1,%x2" "\n\t"                             \
+           "xvnmsubadp %" VPAIR_FP_SECOND "0,%" VPAIR_FP_SECOND "1,%" 
VPAIR_FP_SECOND "2" \
+           : "=" VPAIR_FP_CONSTRAINT (*R)                              \
+          : VPAIR_FP_CONSTRAINT ((*A)), VPAIR_FP_CONSTRAINT ((*B)), "0" ((*C)))
+
+/* vector pair float operations on power10.  */
+#define vpair_f32_splat(R, A)                                          \
+  __asm__ ("xscvdpspn %x0,%x1" "\n\t"                                  \
+           "xxspltw %x0,%x0,0" "\n\t"                                  \
+           "xxlor %" VPAIR_FP_SECOND "0,%x0,%x0"                       \
+           : "=" VPAIR_FP_CONSTRAINT (*R)                              \
+          : "wa" (((float) (A))))
+
+#define vpair_f32_neg(R,A)                                             \
+  __asm__ ("xvnegsp %x0,%x1" "\n\t"                                    \
+           "xvnegsp %" VPAIR_FP_SECOND "0,%" VPAIR_FP_SECOND "1"       \
+           : "=" VPAIR_FP_CONSTRAINT (*R)                              \
+          : VPAIR_FP_CONSTRAINT ((*A)))
+
+#define vpair_f32_abs(R,A)                                             \
+  __asm__ ("xvabssp %x0,%x1" "\n\t"                                    \
+           "xvabssp %" VPAIR_FP_SECOND "0,%" VPAIR_FP_SECOND "1"       \
+           : "=" VPAIR_FP_CONSTRAINT (*R)                              \
+          : VPAIR_FP_CONSTRAINT ((*A)))
+
+#define vpair_f32_nabs(R,A)                                            \
+  __asm__ ("xvnabssp %x0,%x1" "\n\t"                                   \
+           "xvnabssp %" VPAIR_FP_SECOND "0,%" VPAIR_FP_SECOND "1"      \
+           : "=" VPAIR_FP_CONSTRAINT (*R)                              \
+          : VPAIR_FP_CONSTRAINT ((*A)))
+
+#define vpair_f32_sqrt(R,A)                                            \
+  __asm__ ("xvsqrtsp %x0,%x1" "\n\t"                                   \
+           "xvsqrtsp %" VPAIR_FP_SECOND "0,%" VPAIR_FP_SECOND "1"      \
+           : "=" VPAIR_FP_CONSTRAINT (*R)                              \
+          : VPAIR_FP_CONSTRAINT ((*A)))
+
+#define vpair_f32_add(R,A,B)                                           \
+  __asm__ ("xvaddsp %x0,%x1,%x2" "\n\t"                                        
\
+           "xvaddsp %" VPAIR_FP_SECOND "0,%" VPAIR_FP_SECOND "1,%" 
VPAIR_FP_SECOND "2" \
+           : "=" VPAIR_FP_CONSTRAINT (*R)                              \
+          : VPAIR_FP_CONSTRAINT ((*A)), VPAIR_FP_CONSTRAINT ((*B)))
+
+#define vpair_f32_div(R,A,B)                                           \
+  __asm__ ("xvdivsp %x0,%x1,%x2" "\n\t"                                        
\
+           "xvdivsp %" VPAIR_FP_SECOND "0,%" VPAIR_FP_SECOND "1,%" 
VPAIR_FP_SECOND "2" \
+           : "=" VPAIR_FP_CONSTRAINT (*R)                              \
+          : VPAIR_FP_CONSTRAINT ((*A)), VPAIR_FP_CONSTRAINT ((*B)))
+
+#define vpair_f32_max(R,A,B)                                           \
+  __asm__ ("xvmaxsp %x0,%x1,%x2" "\n\t"                                        
\
+           "xvmaxsp %" VPAIR_FP_SECOND "0,%" VPAIR_FP_SECOND "1,%" 
VPAIR_FP_SECOND "2" \
+           : "=" VPAIR_FP_CONSTRAINT (*R)                              \
+          : VPAIR_FP_CONSTRAINT ((*A)), VPAIR_FP_CONSTRAINT ((*B)))
+
+#define vpair_f32_min(R,A,B)                                           \
+  __asm__ ("xvminsp %x0,%x1,%x2" "\n\t"                                        
\
+           "xvminsp %" VPAIR_FP_SECOND "0,%" VPAIR_FP_SECOND "1,%" 
VPAIR_FP_SECOND "2" \
+           : "=" VPAIR_FP_CONSTRAINT (*R)                              \
+          : VPAIR_FP_CONSTRAINT ((*A)), VPAIR_FP_CONSTRAINT ((*B)))
+
+#define vpair_f32_mul(R,A,B)                                           \
+  __asm__ ("xvmulsp %x0,%x1,%x2" "\n\t"                                        
\
+           "xvmulsp %" VPAIR_FP_SECOND "0,%" VPAIR_FP_SECOND "1,%" 
VPAIR_FP_SECOND "2" \
+           : "=" VPAIR_FP_CONSTRAINT (*R)                              \
+          : VPAIR_FP_CONSTRAINT ((*A)), VPAIR_FP_CONSTRAINT ((*B)))
+
+#define vpair_f32_sub(R,A,B)                                           \
+  __asm__ ("xvsubsp %x0,%x1,%x2" "\n\t"                                        
\
+           "xvsubsp %" VPAIR_FP_SECOND "0,%" VPAIR_FP_SECOND "1,%" 
VPAIR_FP_SECOND "2" \
+           : "=" VPAIR_FP_CONSTRAINT (*R)                              \
+          : VPAIR_FP_CONSTRAINT ((*A)), VPAIR_FP_CONSTRAINT ((*B)))
+
+#define vpair_f32_fma(R,A,B,C)                                         \
+  __asm__ ("xvmaddasp %x0,%x1,%x2" "\n\t"                              \
+           "xvmaddasp %" VPAIR_FP_SECOND "0,%" VPAIR_FP_SECOND "1,%" 
VPAIR_FP_SECOND "2" \
+           : "=" VPAIR_FP_CONSTRAINT (*R)                              \
+          : VPAIR_FP_CONSTRAINT ((*A)), VPAIR_FP_CONSTRAINT ((*B)), "0" ((*C)))
+
+#define vpair_f32_fms(R,A,B,C)                                         \
+  __asm__ ("xvmsubasp %x0,%x1,%x2" "\n\t"                              \
+           "xvmsubasp %" VPAIR_FP_SECOND "0,%" VPAIR_FP_SECOND "1,%" 
VPAIR_FP_SECOND "2" \
+           : "=" VPAIR_FP_CONSTRAINT (*R)                              \
+          : VPAIR_FP_CONSTRAINT ((*A)), VPAIR_FP_CONSTRAINT ((*B)), "0" ((*C)))
+
+#define vpair_f32_nfma(R,A,B,C)                                                
\
+  __asm__ ("xvnmaddasp %x0,%x1,%x2" "\n\t"                             \
+           "xvnmaddasp %" VPAIR_FP_SECOND "0,%" VPAIR_FP_SECOND "1,%" 
VPAIR_FP_SECOND "2" \
+           : "=" VPAIR_FP_CONSTRAINT (*R)                              \
+          : VPAIR_FP_CONSTRAINT ((*A)), VPAIR_FP_CONSTRAINT ((*B)), "0" ((*C)))
+
+#define vpair_f32_nfms(R,A,B,C)                                                
\
+  __asm__ ("xvnmsubasp %x0,%x1,%x2" "\n\t"                             \
+           "xvnmsubasp %" VPAIR_FP_SECOND "0,%" VPAIR_FP_SECOND "1,%" 
VPAIR_FP_SECOND "2" \
+           : "=" VPAIR_FP_CONSTRAINT (*R)                              \
+          : VPAIR_FP_CONSTRAINT ((*A)), VPAIR_FP_CONSTRAINT ((*B)), "0" ((*C)))
+
+#else  /* !__MMA__.  */
+typedef union {
+  /* Double vector pairs.  */
+  double __attribute__((__vector_size__(32))) __vpair_vp_f64;
+  vector double __vpair_vec_f64[2];
+  double __vpair_scalar_f64[4];
+
+  /* Float vector pairs.  */
+  float  __attribute__((__vector_size__(32))) __vpair_vp_f32;
+  vector float  __vpair_vec_f32[2];
+  float __vpair_scalar_f32[8];
+
+} vector_pair_t;
+
+#define VPAIR_FP_CONSTRAINT    "d"             /* Only use FPR registers.  */
+#define VPAIR_FP_SECOND                "L"             /* Access 2nd FPR 
register.  */
+
+/* vector pair double operations on power8/power9.  */
+#define vpair_f64_splat(R,A)                                           \
+  ((R)->__vpair_vec_f64[0] = (R)->__vpair_vec_f64[1]                   \
+   = __builtin_vec_splats ((double) (A)))
+
+#define vpair_f64_neg(R,A)                                             \
+  ((R)->__vpair_vp_f64 = - (A)->__vpair_vp_f64)
+
+#define vpair_f64_abs(R,A)                                             \
+  (((R)->__vpair_vec_f64[0]                                            \
+    = __builtin_vsx_xvabsdp ((A)->__vpair_vec_f64[0])),                        
\
+   ((R)->__vpair_vec_f64[1]                                            \
+    = __builtin_vsx_xvabsdp ((A)->__vpair_vec_f64[1])))
+
+#define vpair_f64_nabs(R,A)                                            \
+  (((R)->__vpair_vec_f64[0]                                            \
+    = __builtin_vsx_xvnabsdp ((A)->__vpair_vec_f64[0])),               \
+   ((R)->__vpair_vec_f64[1]                                            \
+    = __builtin_vsx_xvnabsdp ((A)->__vpair_vec_f64[1])))
+
+#define vpair_f64_sqrt(R,A)                                            \
+  (((R)->__vpair_vec_f64[0]                                            \
+    = __builtin_vsx_xvsqrtdp ((A)->__vpair_vec_f64[0])),               \
+   ((R)->__vpair_vec_f64[1]                                            \
+    = __builtin_vsx_xvsqrtdp ((A)->__vpair_vec_f64[1])))
+
+#define vpair_f64_add(R,A,B)                                           \
+  ((R)->__vpair_vp_f64 = (A)->__vpair_vp_f64 + (B)->__vpair_vp_f64)
+
+#define vpair_f64_div(R,A,B)                                           \
+  ((R)->__vpair_vp_f64 = (A)->__vpair_vp_f64 / (B)->__vpair_vp_f64)
+
+#define vpair_f64_max(R,A,B)                                           \
+  (((R)->__vpair_vec_f64[0]                                            \
+    = __builtin_vsx_xvmaxdp ((A)->__vpair_vec_f64[0],                  \
+                            (B)->__vpair_vec_f64[0])),                 \
+  (((R)->__vpair_vec_f64[1]                                            \
+    = __builtin_vsx_xvmaxdp ((A)->__vpair_vec_f64[1],                  \
+                            (B)->__vpair_vec_f64[1]))))
+
+#define vpair_f64_min(R,A,B)                                           \
+  (((R)->__vpair_vec_f64[0]                                            \
+    = __builtin_vsx_xvmindp ((A)->__vpair_vec_f64[0],                  \
+                            (B)->__vpair_vec_f64[0])),                 \
+  (((R)->__vpair_vec_f64[1]                                            \
+    = __builtin_vsx_xvmindp ((A)->__vpair_vec_f64[1],                  \
+                            (B)->__vpair_vec_f64[1]))))
+
+#define vpair_f64_mul(R,A,B)                                           \
+  ((R)->__vpair_vp_f64 = (A)->__vpair_vp_f64 * (B)->__vpair_vp_f64)
+
+#define vpair_f64_sub(R,A,B)                                           \
+  ((R)->__vpair_vp_f64 = (A)->__vpair_vp_f64 - (B)->__vpair_vp_f64)
+
+#define vpair_f64_fma(R,A,B,C)                                         \
+  (((R)->__vpair_vec_f64[0]                                            \
+    = __builtin_vsx_xvmadddp ((A)->__vpair_vec_f64[0],                 \
+                              (B)->__vpair_vec_f64[0],                 \
+                             (C)->__vpair_vec_f64[0])),                \
+  (((R)->__vpair_vec_f64[1]                                            \
+    = __builtin_vsx_xvmadddp ((A)->__vpair_vec_f64[1],                 \
+                              (B)->__vpair_vec_f64[1],                 \
+                             (C)->__vpair_vec_f64[1]))))
+
+#define vpair_f64_fms(R,A,B,C)                                         \
+  (((R)->__vpair_vec_f64[0]                                            \
+    = __builtin_vsx_xvmsubdp ((A)->__vpair_vec_f64[0],                 \
+                              (B)->__vpair_vec_f64[0],                 \
+                             (C)->__vpair_vec_f64[0])),                \
+  (((R)->__vpair_vec_f64[1]                                            \
+    = __builtin_vsx_xvmsubdp ((A)->__vpair_vec_f64[1],                 \
+                              (B)->__vpair_vec_f64[1],                 \
+                             (C)->__vpair_vec_f64[1]))))
+
+#define vpair_f64_nfma(R,A,B,C)                                                
\
+  (((R)->__vpair_vec_f64[0]                                            \
+    = __builtin_vsx_xvnmadddp ((A)->__vpair_vec_f64[0],                        
\
+                              (B)->__vpair_vec_f64[0],                 \
+                              (C)->__vpair_vec_f64[0])),               \
+  (((R)->__vpair_vec_f64[1]                                            \
+    = __builtin_vsx_xvnmadddp ((A)->__vpair_vec_f64[1],                        
\
+                              (B)->__vpair_vec_f64[1],                 \
+                              (C)->__vpair_vec_f64[1]))))
+
+#define vpair_f64_nfms(R,A,B,C)                                                
\
+  (((R)->__vpair_vec_f64[0]                                            \
+    = __builtin_vsx_xvnmsubdp ((A)->__vpair_vec_f64[0],                        
\
+                              (B)->__vpair_vec_f64[0],                 \
+                              (C)->__vpair_vec_f64[0])),               \
+  (((R)->__vpair_vec_f64[1]                                            \
+    = __builtin_vsx_xvnmsubdp ((A)->__vpair_vec_f64[1],                        
\
+                              (B)->__vpair_vec_f64[1],                 \
+                              (C)->__vpair_vec_f64[1]))))
+
+/* vector pair float operations on power8/power9.  */
+#define vpair_f32_splat(R,A)                                           \
+  ((R)->__vpair_vec_f32[0] = (R)->__vpair_vec_f32[1]                   \
+   = __builtin_vec_splats ((float) (A)))
+
+#define vpair_f32_neg(R,A)                                             \
+  ((R)->__vpair_vp_f64 = - (A)->__vpair_vp_f64)
+
+#define vpair_f32_abs(R,A)                                             \
+  (((R)->__vpair_vec_f32[0]                                            \
+    = __builtin_vsx_xvnabssp ((A)->__vpair_vec_f32[0])),               \
+   ((R)->__vpair_vec_f32[1]                                            \
+    = __builtin_vsx_xvnabssp ((A)->__vpair_vec_f32[1])))
+
+#define vpair_f32_nabs(R,A)                                            \
+  (((R)->__vpair_vec_f32[0]                                            \
+    = __builtin_vsx_xvnabssp ((A)->__vpair_vec_f32[0])),               \
+   ((R)->__vpair_vec_f32[1]                                            \
+    = __builtin_vsx_xvnabssp ((A)->__vpair_vec_f32[1])))
+
+#define vpair_f32_sqrt(R,A)                                            \
+  (((R)->__vpair_vec_f32[0]                                            \
+    = __builtin_vsx_xvsqrtsp ((A)->__vpair_vec_f32[0])),               \
+   ((R)->__vpair_vec_f32[1]                                            \
+    = __builtin_vsx_xvsqrtsp ((A)->__vpair_vec_f32[1])))
+
+#define vpair_f32_add(R,A,B)                                           \
+  ((R)->__vpair_vp_f32 = (A)->__vpair_vp_f32 + (B)->__vpair_vp_f32)
+
+#define vpair_f32_div(R,A,B)                                           \
+  ((R)->__vpair_vp_f32 = (A)->__vpair_vp_f32 / (B)->__vpair_vp_f32)
+
+#define vpair_f32_max(R,A,B)                                           \
+  (((R)->__vpair_vec_f32[0]                                            \
+    = __builtin_vsx_xvmaxsp ((A)->__vpair_vec_f32[0],                  \
+                            (B)->__vpair_vec_f32[0])),                 \
+  (((R)->__vpair_vec_f32[1]                                            \
+    = __builtin_vsx_xvmaxsp ((A)->__vpair_vec_f32[1],                  \
+                            (B)->__vpair_vec_f32[1]))))
+
+#define vpair_f32_min(R,A,B)                                           \
+  (((R)->__vpair_vec_f32[0]                                            \
+    = __builtin_vsx_xvminsp ((A)->__vpair_vec_f32[0],                  \
+                            (B)->__vpair_vec_f32[0])),                 \
+  (((R)->__vpair_vec_f32[1]                                            \
+    = __builtin_vsx_xvminsp ((A)->__vpair_vec_f32[1],                  \
+                            (B)->__vpair_vec_f32[1]))))
+
+#define vpair_f32_mul(R,A,B)                                           \
+  ((R)->__vpair_vp_f32 = (A)->__vpair_vp_f32 * (B)->__vpair_vp_f32)
+
+#define vpair_f32_sub(R,A,B)                                           \
+  ((R)->__vpair_vp_f32 = (A)->__vpair_vp_f32 - (B)->__vpair_vp_f32)
+
+#define vpair_f32_fma(R,A,B,C)                                         \
+  (((R)->__vpair_vec_f32[0]                                            \
+    = __builtin_vsx_xvmaddsp ((A)->__vpair_vec_f32[0],                 \
+                              (B)->__vpair_vec_f32[0],                 \
+                             (C)->__vpair_vec_f32[0])),                \
+  (((R)->__vpair_vec_f32[1]                                            \
+    = __builtin_vsx_xvmaddsp ((A)->__vpair_vec_f32[1],                 \
+                              (B)->__vpair_vec_f32[1],                 \
+                             (C)->__vpair_vec_f32[1]))))
+
+#define vpair_f32_fms(R,A,B,C)                                         \
+  (((R)->__vpair_vec_f32[0]                                            \
+    = __builtin_vsx_xvmsubsp ((A)->__vpair_vec_f32[0],                 \
+                              (B)->__vpair_vec_f32[0],                 \
+                             (C)->__vpair_vec_f32[0])),                \
+  (((R)->__vpair_vec_f32[1]                                            \
+    = __builtin_vsx_xvmsubsp ((A)->__vpair_vec_f32[1],                 \
+                              (B)->__vpair_vec_f32[1],                 \
+                             (C)->__vpair_vec_f32[1]))))
+
+#define vpair_f32_nfma(R,A,B,C)                                                
\
+  (((R)->__vpair_vec_f32[0]                                            \
+    = __builtin_vsx_xvnmaddsp ((A)->__vpair_vec_f32[0],                        
\
+                              (B)->__vpair_vec_f32[0],                 \
+                              (C)->__vpair_vec_f32[0])),               \
+  (((R)->__vpair_vec_f32[1]                                            \
+    = __builtin_vsx_xvnmaddsp ((A)->__vpair_vec_f32[1],                        
\
+                              (B)->__vpair_vec_f32[1],                 \
+                              (C)->__vpair_vec_f32[1]))))
+
+#define vpair_f32_nfms(R,A,B,C)                                                
\
+  (((R)->__vpair_vec_f32[0]                                            \
+    = __builtin_vsx_xvnmsubsp ((A)->__vpair_vec_f32[0],                        
\
+                              (B)->__vpair_vec_f32[0],                 \
+                              (C)->__vpair_vec_f32[0])),               \
+  (((R)->__vpair_vec_f32[1]                                            \
+    = __builtin_vsx_xvnmsubsp ((A)->__vpair_vec_f32[1],                        
\
+                              (B)->__vpair_vec_f32[1],                 \
+                              (C)->__vpair_vec_f32[1]))))
+
+#endif /* __MMA__.  */
+
+#endif /* _VECTOR_PAIR_H.  */

Reply via email to