https://gcc.gnu.org/g:4362442ef3a8738a62ba36dfab2829705de71044
commit 4362442ef3a8738a62ba36dfab2829705de71044 Author: Michael Meissner <meiss...@linux.ibm.com> Date: Wed Sep 4 22:18:39 2024 -0400 Initial vector-pair.h support 2024-09-03 Michael Meissner <meiss...@linux.ibm.com> gcc/ * config.gcc (powerpc*-*-*): Add vector-pair.h to extra headers. * config/rs6000/rs6000.cc (print_operand): Add %S output modifier. * config/rs6000/vector-pair.h: New file. Diff: --- gcc/config.gcc | 2 +- gcc/config/rs6000/rs6000.cc | 46 +++++ gcc/config/rs6000/vector-pair.h | 430 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 477 insertions(+), 1 deletion(-) diff --git a/gcc/config.gcc b/gcc/config.gcc index 0b794e977f6..3627bed8b86 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -537,7 +537,7 @@ powerpc*-*-*) extra_headers="${extra_headers} pmmintrin.h tmmintrin.h smmintrin.h" extra_headers="${extra_headers} nmmintrin.h immintrin.h x86gprintrin.h" extra_headers="${extra_headers} ppu_intrinsics.h spu2vmx.h vec_types.h si2vmx.h" - extra_headers="${extra_headers} amo.h" + extra_headers="${extra_headers} amo.h vector-pair.h" case x$with_cpu in xpowerpc64|xdefault64|x6[23]0|x970|xG5|xpower[3456789]|xpower1[01]|xpower6x|xrs64a|xcell|xa2|xe500mc64|xe5500|xe6500|xfuture) cpu_is_64bit=yes diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc index f320d1762c3..fdcda403672 100644 --- a/gcc/config/rs6000/rs6000.cc +++ b/gcc/config/rs6000/rs6000.cc @@ -14414,6 +14414,52 @@ print_operand (FILE *file, rtx x, int code) fprintf (file, HOST_WIDE_INT_PRINT_DEC, (32 - INTVAL (x)) & 31); return; + case 'S': + /* Like %L<x>, but assume the second register is a VSX register. This + works on VSX registers and memory addresses. */ + if (REG_P (x)) + { + int reg = REGNO (x); + if (!VSX_REGNO_P (reg) || (reg & 1) != 0) + output_operand_lossage ("invalid %%S value"); + else + { + int vsx_reg = (FP_REGNO_P (reg) + ? reg - 32 + : reg - FIRST_ALTIVEC_REGNO + 32) + 1; + +#ifdef TARGET_REGNAMES + if (TARGET_REGNAMES) + fprintf (file, "%%vs%d", vsx_reg); + else +#endif + fprintf (file, "%d", vsx_reg); + } + } + + else if (MEM_P (x)) + { + machine_mode mode = GET_MODE (x); + /* Vectors and vector pairs can't have auto increment addreses. */ + if (GET_CODE (XEXP (x, 0)) == PRE_INC + || GET_CODE (XEXP (x, 0)) == PRE_DEC + || GET_CODE (XEXP (x, 0)) == PRE_MODIFY) + output_operand_lossage ("invalid auto-increment %%S value"); + else + output_address (mode, XEXP (adjust_address_nv (x, SImode, + UNITS_PER_WORD), + 0)); + + if (small_data_operand (x, GET_MODE (x))) + fprintf (file, "@%s(%s)", SMALL_DATA_RELOC, + reg_names[SMALL_DATA_REG]); + } + + else + output_operand_lossage ("invalid %%S value"); + + return; + case 't': /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */ if (!REG_P (x) || !CR_REGNO_P (REGNO (x))) diff --git a/gcc/config/rs6000/vector-pair.h b/gcc/config/rs6000/vector-pair.h new file mode 100644 index 00000000000..91725ef43ec --- /dev/null +++ b/gcc/config/rs6000/vector-pair.h @@ -0,0 +1,430 @@ +/* PowerPC vector pair include file. + Copyright (C) 2024 Free Software Foundation, Inc. + Contributed by Aldy Hernandez (al...@redhat.com). + Rewritten by Paolo Bonzini (bonz...@gnu.org). + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Provide support for vector pairs, even on systems that do not have native + support for loading and storing pairs of vectors. */ + +#ifndef _VECTOR_PAIR_H +#define _VECTOR_PAIR_H 1 + +/* If we have MMA support, use power10 support. */ +#if __MMA__ +typedef __vector_pair vector_pair_t; + +#define VPAIR_FP_CONSTRAINT "wa" /* Allow all VSX registers. */ +#define VPAIR_FP_SECOND "S" /* Access 2nd VSX register. */ + +/* vector pair double operations on power10. */ +#define vpair_f64_splat(R, A) \ + __asm__ ("xxpermdi %x0,%x1,%x1,0" "\n\t" \ + "xxpermdi %" VPAIR_FP_SECOND "0,%x1,%x1,0" \ + : "=" VPAIR_FP_CONSTRAINT (*R) \ + : "wa" ((A))) + +#define vpair_f64_neg(R,A) \ + __asm__ ("xvnegdp %x0,%x1" "\n\t" \ + "xvnegdp %" VPAIR_FP_SECOND "0,%" VPAIR_FP_SECOND "1" \ + : "=" VPAIR_FP_CONSTRAINT (*R) \ + : VPAIR_FP_CONSTRAINT ((*A))) + +#define vpair_f64_abs(R,A) \ + __asm__ ("xvabsdp %x0,%x1" "\n\t" \ + "xvabsdp %" VPAIR_FP_SECOND "0,%" VPAIR_FP_SECOND "1" \ + : "=" VPAIR_FP_CONSTRAINT (*R) \ + : VPAIR_FP_CONSTRAINT ((*A))) + +#define vpair_f64_nabs(R,A) \ + __asm__ ("xvnabsdp %x0,%x1" "\n\t" \ + "xvnabsdp %" VPAIR_FP_SECOND "0,%" VPAIR_FP_SECOND "1" \ + : "=" VPAIR_FP_CONSTRAINT (*R) \ + : VPAIR_FP_CONSTRAINT ((*A))) + +#define vpair_f64_sqrt(R,A) \ + __asm__ ("xvsqrtdp %x0,%x1" "\n\t" \ + "xvsqrtdp %" VPAIR_FP_SECOND "0,%" VPAIR_FP_SECOND "1" \ + : "=" VPAIR_FP_CONSTRAINT (*R) \ + : VPAIR_FP_CONSTRAINT ((*A))) + +#define vpair_f64_add(R,A,B) \ + __asm__ ("xvadddp %x0,%x1,%x2" "\n\t" \ + "xvadddp %" VPAIR_FP_SECOND "0,%" VPAIR_FP_SECOND "1,%" VPAIR_FP_SECOND "2" \ + : "=" VPAIR_FP_CONSTRAINT (*R) \ + : VPAIR_FP_CONSTRAINT ((*A)), VPAIR_FP_CONSTRAINT ((*B))) + +#define vpair_f64_div(R,A,B) \ + __asm__ ("xvdivdp %x0,%x1,%x2" "\n\t" \ + "xvdivdp %" VPAIR_FP_SECOND "0,%" VPAIR_FP_SECOND "1,%" VPAIR_FP_SECOND "2" \ + : "=" VPAIR_FP_CONSTRAINT (*R) \ + : VPAIR_FP_CONSTRAINT ((*A)), VPAIR_FP_CONSTRAINT ((*B))) + +#define vpair_f64_max(R,A,B) \ + __asm__ ("xvmaxdp %x0,%x1,%x2" "\n\t" \ + "xvmaxdp %" VPAIR_FP_SECOND "0,%" VPAIR_FP_SECOND "1,%" VPAIR_FP_SECOND "2" \ + : "=" VPAIR_FP_CONSTRAINT (*R) \ + : VPAIR_FP_CONSTRAINT ((*A)), VPAIR_FP_CONSTRAINT ((*B))) + +#define vpair_f64_min(R,A,B) \ + __asm__ ("xvmindp %x0,%x1,%x2" "\n\t" \ + "xvmindp %" VPAIR_FP_SECOND "0,%" VPAIR_FP_SECOND "1,%" VPAIR_FP_SECOND "2" \ + : "=" VPAIR_FP_CONSTRAINT (*R) \ + : VPAIR_FP_CONSTRAINT ((*A)), VPAIR_FP_CONSTRAINT ((*B))) + +#define vpair_f64_mul(R,A,B) \ + __asm__ ("xvmuldp %x0,%x1,%x2" "\n\t" \ + "xvmuldp %" VPAIR_FP_SECOND "0,%" VPAIR_FP_SECOND "1,%" VPAIR_FP_SECOND "2" \ + : "=" VPAIR_FP_CONSTRAINT (*R) \ + : VPAIR_FP_CONSTRAINT ((*A)), VPAIR_FP_CONSTRAINT ((*B))) + +#define vpair_f64_sub(R,A,B) \ + __asm__ ("xvsubdp %x0,%x1,%x2" "\n\t" \ + "xvsubdp %" VPAIR_FP_SECOND "0,%" VPAIR_FP_SECOND "1,%" VPAIR_FP_SECOND "2" \ + : "=" VPAIR_FP_CONSTRAINT (*R) \ + : VPAIR_FP_CONSTRAINT ((*A)), VPAIR_FP_CONSTRAINT ((*B))) + +#define vpair_f64_fma(R,A,B,C) \ + __asm__ ("xvmaddadp %x0,%x1,%x2" "\n\t" \ + "xvmaddadp %" VPAIR_FP_SECOND "0,%" VPAIR_FP_SECOND "1,%" VPAIR_FP_SECOND "2" \ + : "=" VPAIR_FP_CONSTRAINT (*R) \ + : VPAIR_FP_CONSTRAINT ((*A)), VPAIR_FP_CONSTRAINT ((*B)), "0" ((*C))) + +#define vpair_f64_fms(R,A,B,C) \ + __asm__ ("xvmsubadp %x0,%x1,%x2" "\n\t" \ + "xvmsubadp %" VPAIR_FP_SECOND "0,%" VPAIR_FP_SECOND "1,%" VPAIR_FP_SECOND "2" \ + : "=" VPAIR_FP_CONSTRAINT (*R) \ + : VPAIR_FP_CONSTRAINT ((*A)), VPAIR_FP_CONSTRAINT ((*B)), "0" ((*C))) + +#define vpair_f64_nfma(R,A,B,C) \ + __asm__ ("xvnmaddadp %x0,%x1,%x2" "\n\t" \ + "xvnmaddadp %" VPAIR_FP_SECOND "0,%" VPAIR_FP_SECOND "1,%" VPAIR_FP_SECOND "2" \ + : "=" VPAIR_FP_CONSTRAINT (*R) \ + : VPAIR_FP_CONSTRAINT ((*A)), VPAIR_FP_CONSTRAINT ((*B)), "0" ((*C))) + +#define vpair_f64_nfms(R,A,B,C) \ + __asm__ ("xvnmsubadp %x0,%x1,%x2" "\n\t" \ + "xvnmsubadp %" VPAIR_FP_SECOND "0,%" VPAIR_FP_SECOND "1,%" VPAIR_FP_SECOND "2" \ + : "=" VPAIR_FP_CONSTRAINT (*R) \ + : VPAIR_FP_CONSTRAINT ((*A)), VPAIR_FP_CONSTRAINT ((*B)), "0" ((*C))) + +/* vector pair float operations on power10. */ +#define vpair_f32_splat(R, A) \ + __asm__ ("xscvdpspn %x0,%x1" "\n\t" \ + "xxspltw %x0,%x0,0" "\n\t" \ + "xxlor %" VPAIR_FP_SECOND "0,%x0,%x0" \ + : "=" VPAIR_FP_CONSTRAINT (*R) \ + : "wa" (((float) (A)))) + +#define vpair_f32_neg(R,A) \ + __asm__ ("xvnegsp %x0,%x1" "\n\t" \ + "xvnegsp %" VPAIR_FP_SECOND "0,%" VPAIR_FP_SECOND "1" \ + : "=" VPAIR_FP_CONSTRAINT (*R) \ + : VPAIR_FP_CONSTRAINT ((*A))) + +#define vpair_f32_abs(R,A) \ + __asm__ ("xvabssp %x0,%x1" "\n\t" \ + "xvabssp %" VPAIR_FP_SECOND "0,%" VPAIR_FP_SECOND "1" \ + : "=" VPAIR_FP_CONSTRAINT (*R) \ + : VPAIR_FP_CONSTRAINT ((*A))) + +#define vpair_f32_nabs(R,A) \ + __asm__ ("xvnabssp %x0,%x1" "\n\t" \ + "xvnabssp %" VPAIR_FP_SECOND "0,%" VPAIR_FP_SECOND "1" \ + : "=" VPAIR_FP_CONSTRAINT (*R) \ + : VPAIR_FP_CONSTRAINT ((*A))) + +#define vpair_f32_sqrt(R,A) \ + __asm__ ("xvsqrtsp %x0,%x1" "\n\t" \ + "xvsqrtsp %" VPAIR_FP_SECOND "0,%" VPAIR_FP_SECOND "1" \ + : "=" VPAIR_FP_CONSTRAINT (*R) \ + : VPAIR_FP_CONSTRAINT ((*A))) + +#define vpair_f32_add(R,A,B) \ + __asm__ ("xvaddsp %x0,%x1,%x2" "\n\t" \ + "xvaddsp %" VPAIR_FP_SECOND "0,%" VPAIR_FP_SECOND "1,%" VPAIR_FP_SECOND "2" \ + : "=" VPAIR_FP_CONSTRAINT (*R) \ + : VPAIR_FP_CONSTRAINT ((*A)), VPAIR_FP_CONSTRAINT ((*B))) + +#define vpair_f32_div(R,A,B) \ + __asm__ ("xvdivsp %x0,%x1,%x2" "\n\t" \ + "xvdivsp %" VPAIR_FP_SECOND "0,%" VPAIR_FP_SECOND "1,%" VPAIR_FP_SECOND "2" \ + : "=" VPAIR_FP_CONSTRAINT (*R) \ + : VPAIR_FP_CONSTRAINT ((*A)), VPAIR_FP_CONSTRAINT ((*B))) + +#define vpair_f32_max(R,A,B) \ + __asm__ ("xvmaxsp %x0,%x1,%x2" "\n\t" \ + "xvmaxsp %" VPAIR_FP_SECOND "0,%" VPAIR_FP_SECOND "1,%" VPAIR_FP_SECOND "2" \ + : "=" VPAIR_FP_CONSTRAINT (*R) \ + : VPAIR_FP_CONSTRAINT ((*A)), VPAIR_FP_CONSTRAINT ((*B))) + +#define vpair_f32_min(R,A,B) \ + __asm__ ("xvminsp %x0,%x1,%x2" "\n\t" \ + "xvminsp %" VPAIR_FP_SECOND "0,%" VPAIR_FP_SECOND "1,%" VPAIR_FP_SECOND "2" \ + : "=" VPAIR_FP_CONSTRAINT (*R) \ + : VPAIR_FP_CONSTRAINT ((*A)), VPAIR_FP_CONSTRAINT ((*B))) + +#define vpair_f32_mul(R,A,B) \ + __asm__ ("xvmulsp %x0,%x1,%x2" "\n\t" \ + "xvmulsp %" VPAIR_FP_SECOND "0,%" VPAIR_FP_SECOND "1,%" VPAIR_FP_SECOND "2" \ + : "=" VPAIR_FP_CONSTRAINT (*R) \ + : VPAIR_FP_CONSTRAINT ((*A)), VPAIR_FP_CONSTRAINT ((*B))) + +#define vpair_f32_sub(R,A,B) \ + __asm__ ("xvsubsp %x0,%x1,%x2" "\n\t" \ + "xvsubsp %" VPAIR_FP_SECOND "0,%" VPAIR_FP_SECOND "1,%" VPAIR_FP_SECOND "2" \ + : "=" VPAIR_FP_CONSTRAINT (*R) \ + : VPAIR_FP_CONSTRAINT ((*A)), VPAIR_FP_CONSTRAINT ((*B))) + +#define vpair_f32_fma(R,A,B,C) \ + __asm__ ("xvmaddasp %x0,%x1,%x2" "\n\t" \ + "xvmaddasp %" VPAIR_FP_SECOND "0,%" VPAIR_FP_SECOND "1,%" VPAIR_FP_SECOND "2" \ + : "=" VPAIR_FP_CONSTRAINT (*R) \ + : VPAIR_FP_CONSTRAINT ((*A)), VPAIR_FP_CONSTRAINT ((*B)), "0" ((*C))) + +#define vpair_f32_fms(R,A,B,C) \ + __asm__ ("xvmsubasp %x0,%x1,%x2" "\n\t" \ + "xvmsubasp %" VPAIR_FP_SECOND "0,%" VPAIR_FP_SECOND "1,%" VPAIR_FP_SECOND "2" \ + : "=" VPAIR_FP_CONSTRAINT (*R) \ + : VPAIR_FP_CONSTRAINT ((*A)), VPAIR_FP_CONSTRAINT ((*B)), "0" ((*C))) + +#define vpair_f32_nfma(R,A,B,C) \ + __asm__ ("xvnmaddasp %x0,%x1,%x2" "\n\t" \ + "xvnmaddasp %" VPAIR_FP_SECOND "0,%" VPAIR_FP_SECOND "1,%" VPAIR_FP_SECOND "2" \ + : "=" VPAIR_FP_CONSTRAINT (*R) \ + : VPAIR_FP_CONSTRAINT ((*A)), VPAIR_FP_CONSTRAINT ((*B)), "0" ((*C))) + +#define vpair_f32_nfms(R,A,B,C) \ + __asm__ ("xvnmsubasp %x0,%x1,%x2" "\n\t" \ + "xvnmsubasp %" VPAIR_FP_SECOND "0,%" VPAIR_FP_SECOND "1,%" VPAIR_FP_SECOND "2" \ + : "=" VPAIR_FP_CONSTRAINT (*R) \ + : VPAIR_FP_CONSTRAINT ((*A)), VPAIR_FP_CONSTRAINT ((*B)), "0" ((*C))) + +#else /* !__MMA__. */ +typedef union { + /* Double vector pairs. */ + double __attribute__((__vector_size__(32))) __vpair_vp_f64; + vector double __vpair_vec_f64[2]; + double __vpair_scalar_f64[4]; + + /* Float vector pairs. */ + float __attribute__((__vector_size__(32))) __vpair_vp_f32; + vector float __vpair_vec_f32[2]; + float __vpair_scalar_f32[8]; + +} vector_pair_t; + +#define VPAIR_FP_CONSTRAINT "d" /* Only use FPR registers. */ +#define VPAIR_FP_SECOND "L" /* Access 2nd FPR register. */ + +/* vector pair double operations on power8/power9. */ +#define vpair_f64_splat(R,A) \ + ((R)->__vpair_vec_f64[0] = (R)->__vpair_vec_f64[1] \ + = __builtin_vec_splats ((double) (A))) + +#define vpair_f64_neg(R,A) \ + ((R)->__vpair_vp_f64 = - (A)->__vpair_vp_f64) + +#define vpair_f64_abs(R,A) \ + (((R)->__vpair_vec_f64[0] \ + = __builtin_vsx_xvabsdp ((A)->__vpair_vec_f64[0])), \ + ((R)->__vpair_vec_f64[1] \ + = __builtin_vsx_xvabsdp ((A)->__vpair_vec_f64[1]))) + +#define vpair_f64_nabs(R,A) \ + (((R)->__vpair_vec_f64[0] \ + = __builtin_vsx_xvnabsdp ((A)->__vpair_vec_f64[0])), \ + ((R)->__vpair_vec_f64[1] \ + = __builtin_vsx_xvnabsdp ((A)->__vpair_vec_f64[1]))) + +#define vpair_f64_sqrt(R,A) \ + (((R)->__vpair_vec_f64[0] \ + = __builtin_vsx_xvsqrtdp ((A)->__vpair_vec_f64[0])), \ + ((R)->__vpair_vec_f64[1] \ + = __builtin_vsx_xvsqrtdp ((A)->__vpair_vec_f64[1]))) + +#define vpair_f64_add(R,A,B) \ + ((R)->__vpair_vp_f64 = (A)->__vpair_vp_f64 + (B)->__vpair_vp_f64) + +#define vpair_f64_div(R,A,B) \ + ((R)->__vpair_vp_f64 = (A)->__vpair_vp_f64 / (B)->__vpair_vp_f64) + +#define vpair_f64_max(R,A,B) \ + (((R)->__vpair_vec_f64[0] \ + = __builtin_vsx_xvmaxdp ((A)->__vpair_vec_f64[0], \ + (B)->__vpair_vec_f64[0])), \ + (((R)->__vpair_vec_f64[1] \ + = __builtin_vsx_xvmaxdp ((A)->__vpair_vec_f64[1], \ + (B)->__vpair_vec_f64[1])))) + +#define vpair_f64_min(R,A,B) \ + (((R)->__vpair_vec_f64[0] \ + = __builtin_vsx_xvmindp ((A)->__vpair_vec_f64[0], \ + (B)->__vpair_vec_f64[0])), \ + (((R)->__vpair_vec_f64[1] \ + = __builtin_vsx_xvmindp ((A)->__vpair_vec_f64[1], \ + (B)->__vpair_vec_f64[1])))) + +#define vpair_f64_mul(R,A,B) \ + ((R)->__vpair_vp_f64 = (A)->__vpair_vp_f64 * (B)->__vpair_vp_f64) + +#define vpair_f64_sub(R,A,B) \ + ((R)->__vpair_vp_f64 = (A)->__vpair_vp_f64 - (B)->__vpair_vp_f64) + +#define vpair_f64_fma(R,A,B,C) \ + (((R)->__vpair_vec_f64[0] \ + = __builtin_vsx_xvmadddp ((A)->__vpair_vec_f64[0], \ + (B)->__vpair_vec_f64[0], \ + (C)->__vpair_vec_f64[0])), \ + (((R)->__vpair_vec_f64[1] \ + = __builtin_vsx_xvmadddp ((A)->__vpair_vec_f64[1], \ + (B)->__vpair_vec_f64[1], \ + (C)->__vpair_vec_f64[1])))) + +#define vpair_f64_fms(R,A,B,C) \ + (((R)->__vpair_vec_f64[0] \ + = __builtin_vsx_xvmsubdp ((A)->__vpair_vec_f64[0], \ + (B)->__vpair_vec_f64[0], \ + (C)->__vpair_vec_f64[0])), \ + (((R)->__vpair_vec_f64[1] \ + = __builtin_vsx_xvmsubdp ((A)->__vpair_vec_f64[1], \ + (B)->__vpair_vec_f64[1], \ + (C)->__vpair_vec_f64[1])))) + +#define vpair_f64_nfma(R,A,B,C) \ + (((R)->__vpair_vec_f64[0] \ + = __builtin_vsx_xvnmadddp ((A)->__vpair_vec_f64[0], \ + (B)->__vpair_vec_f64[0], \ + (C)->__vpair_vec_f64[0])), \ + (((R)->__vpair_vec_f64[1] \ + = __builtin_vsx_xvnmadddp ((A)->__vpair_vec_f64[1], \ + (B)->__vpair_vec_f64[1], \ + (C)->__vpair_vec_f64[1])))) + +#define vpair_f64_nfms(R,A,B,C) \ + (((R)->__vpair_vec_f64[0] \ + = __builtin_vsx_xvnmsubdp ((A)->__vpair_vec_f64[0], \ + (B)->__vpair_vec_f64[0], \ + (C)->__vpair_vec_f64[0])), \ + (((R)->__vpair_vec_f64[1] \ + = __builtin_vsx_xvnmsubdp ((A)->__vpair_vec_f64[1], \ + (B)->__vpair_vec_f64[1], \ + (C)->__vpair_vec_f64[1])))) + +/* vector pair float operations on power8/power9. */ +#define vpair_f32_splat(R,A) \ + ((R)->__vpair_vec_f32[0] = (R)->__vpair_vec_f32[1] \ + = __builtin_vec_splats ((float) (A))) + +#define vpair_f32_neg(R,A) \ + ((R)->__vpair_vp_f64 = - (A)->__vpair_vp_f64) + +#define vpair_f32_abs(R,A) \ + (((R)->__vpair_vec_f32[0] \ + = __builtin_vsx_xvnabssp ((A)->__vpair_vec_f32[0])), \ + ((R)->__vpair_vec_f32[1] \ + = __builtin_vsx_xvnabssp ((A)->__vpair_vec_f32[1]))) + +#define vpair_f32_nabs(R,A) \ + (((R)->__vpair_vec_f32[0] \ + = __builtin_vsx_xvnabssp ((A)->__vpair_vec_f32[0])), \ + ((R)->__vpair_vec_f32[1] \ + = __builtin_vsx_xvnabssp ((A)->__vpair_vec_f32[1]))) + +#define vpair_f32_sqrt(R,A) \ + (((R)->__vpair_vec_f32[0] \ + = __builtin_vsx_xvsqrtsp ((A)->__vpair_vec_f32[0])), \ + ((R)->__vpair_vec_f32[1] \ + = __builtin_vsx_xvsqrtsp ((A)->__vpair_vec_f32[1]))) + +#define vpair_f32_add(R,A,B) \ + ((R)->__vpair_vp_f32 = (A)->__vpair_vp_f32 + (B)->__vpair_vp_f32) + +#define vpair_f32_div(R,A,B) \ + ((R)->__vpair_vp_f32 = (A)->__vpair_vp_f32 / (B)->__vpair_vp_f32) + +#define vpair_f32_max(R,A,B) \ + (((R)->__vpair_vec_f32[0] \ + = __builtin_vsx_xvmaxsp ((A)->__vpair_vec_f32[0], \ + (B)->__vpair_vec_f32[0])), \ + (((R)->__vpair_vec_f32[1] \ + = __builtin_vsx_xvmaxsp ((A)->__vpair_vec_f32[1], \ + (B)->__vpair_vec_f32[1])))) + +#define vpair_f32_min(R,A,B) \ + (((R)->__vpair_vec_f32[0] \ + = __builtin_vsx_xvminsp ((A)->__vpair_vec_f32[0], \ + (B)->__vpair_vec_f32[0])), \ + (((R)->__vpair_vec_f32[1] \ + = __builtin_vsx_xvminsp ((A)->__vpair_vec_f32[1], \ + (B)->__vpair_vec_f32[1])))) + +#define vpair_f32_mul(R,A,B) \ + ((R)->__vpair_vp_f32 = (A)->__vpair_vp_f32 * (B)->__vpair_vp_f32) + +#define vpair_f32_sub(R,A,B) \ + ((R)->__vpair_vp_f32 = (A)->__vpair_vp_f32 - (B)->__vpair_vp_f32) + +#define vpair_f32_fma(R,A,B,C) \ + (((R)->__vpair_vec_f32[0] \ + = __builtin_vsx_xvmaddsp ((A)->__vpair_vec_f32[0], \ + (B)->__vpair_vec_f32[0], \ + (C)->__vpair_vec_f32[0])), \ + (((R)->__vpair_vec_f32[1] \ + = __builtin_vsx_xvmaddsp ((A)->__vpair_vec_f32[1], \ + (B)->__vpair_vec_f32[1], \ + (C)->__vpair_vec_f32[1])))) + +#define vpair_f32_fms(R,A,B,C) \ + (((R)->__vpair_vec_f32[0] \ + = __builtin_vsx_xvmsubsp ((A)->__vpair_vec_f32[0], \ + (B)->__vpair_vec_f32[0], \ + (C)->__vpair_vec_f32[0])), \ + (((R)->__vpair_vec_f32[1] \ + = __builtin_vsx_xvmsubsp ((A)->__vpair_vec_f32[1], \ + (B)->__vpair_vec_f32[1], \ + (C)->__vpair_vec_f32[1])))) + +#define vpair_f32_nfma(R,A,B,C) \ + (((R)->__vpair_vec_f32[0] \ + = __builtin_vsx_xvnmaddsp ((A)->__vpair_vec_f32[0], \ + (B)->__vpair_vec_f32[0], \ + (C)->__vpair_vec_f32[0])), \ + (((R)->__vpair_vec_f32[1] \ + = __builtin_vsx_xvnmaddsp ((A)->__vpair_vec_f32[1], \ + (B)->__vpair_vec_f32[1], \ + (C)->__vpair_vec_f32[1])))) + +#define vpair_f32_nfms(R,A,B,C) \ + (((R)->__vpair_vec_f32[0] \ + = __builtin_vsx_xvnmsubsp ((A)->__vpair_vec_f32[0], \ + (B)->__vpair_vec_f32[0], \ + (C)->__vpair_vec_f32[0])), \ + (((R)->__vpair_vec_f32[1] \ + = __builtin_vsx_xvnmsubsp ((A)->__vpair_vec_f32[1], \ + (B)->__vpair_vec_f32[1], \ + (C)->__vpair_vec_f32[1])))) + +#endif /* __MMA__. */ + +#endif /* _VECTOR_PAIR_H. */