https://gcc.gnu.org/g:875555003006531164e1c59b6e5c4751ce38e396
commit 875555003006531164e1c59b6e5c4751ce38e396 Author: Michael Meissner <meiss...@linux.ibm.com> Date: Fri Sep 13 04:12:28 2024 -0400 Initial vector-pair.h support 2024-09-13 Michael Meissner <meiss...@linux.ibm.com> gcc/ * config.gcc (powerpc*-*-*): Add vector-pair.h to extra headers. * config/rs6000/rs6000-c.cc (rs6000_target_modify_macros): On systems with vector pair support, define __VPAIR__. * config/rs6000/vector-pair.h: New file. Diff: --- gcc/config.gcc | 2 +- gcc/config/rs6000/rs6000-c.cc | 8 +- gcc/config/rs6000/vector-pair.h | 612 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 619 insertions(+), 3 deletions(-) diff --git a/gcc/config.gcc b/gcc/config.gcc index 0b794e977f6a..3627bed8b863 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -537,7 +537,7 @@ powerpc*-*-*) extra_headers="${extra_headers} pmmintrin.h tmmintrin.h smmintrin.h" extra_headers="${extra_headers} nmmintrin.h immintrin.h x86gprintrin.h" extra_headers="${extra_headers} ppu_intrinsics.h spu2vmx.h vec_types.h si2vmx.h" - extra_headers="${extra_headers} amo.h" + extra_headers="${extra_headers} amo.h vector-pair.h" case x$with_cpu in xpowerpc64|xdefault64|x6[23]0|x970|xG5|xpower[3456789]|xpower1[01]|xpower6x|xrs64a|xcell|xa2|xe500mc64|xe5500|xe6500|xfuture) cpu_is_64bit=yes diff --git a/gcc/config/rs6000/rs6000-c.cc b/gcc/config/rs6000/rs6000-c.cc index 82826f96a8e7..2d674f9b2369 100644 --- a/gcc/config/rs6000/rs6000-c.cc +++ b/gcc/config/rs6000/rs6000-c.cc @@ -590,9 +590,13 @@ rs6000_target_modify_macros (bool define_p, HOST_WIDE_INT flags, if (rs6000_cpu == PROCESSOR_CELL) rs6000_define_or_undefine_macro (define_p, "__PPU__"); - /* Tell the user if we support the MMA instructions. */ + /* Tell the user if we support the MMA instructions. Also say that we + support the vector pair built-in functions. */ if ((flags & OPTION_MASK_MMA) != 0) - rs6000_define_or_undefine_macro (define_p, "__MMA__"); + { + rs6000_define_or_undefine_macro (define_p, "__MMA__"); + rs6000_define_or_undefine_macro (define_p, "__VPAIR__"); + } /* Whether pc-relative code is being generated. */ if ((flags & OPTION_MASK_PCREL) != 0) rs6000_define_or_undefine_macro (define_p, "__PCREL__"); diff --git a/gcc/config/rs6000/vector-pair.h b/gcc/config/rs6000/vector-pair.h new file mode 100644 index 000000000000..ebfaaa1e8a0c --- /dev/null +++ b/gcc/config/rs6000/vector-pair.h @@ -0,0 +1,612 @@ +/* PowerPC vector pair include file. + Copyright (C) 2024 Free Software Foundation, Inc. + Contributed by Aldy Hernandez (al...@redhat.com). + Rewritten by Paolo Bonzini (bonz...@gnu.org). + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Provide support for vector pairs, even on systems that do not have native + support for loading and storing pairs of vectors. */ + +#ifndef _VECTOR_PAIR_H +#define _VECTOR_PAIR_H 1 + +/* During testing, allow vector-pair.h to be included multiple times. */ +#undef vector_pair_t +#undef vector_pair_f64_t +#undef vector_pair_f32_t + +#undef vpair_f64_abs +#undef vpair_f64_add +#undef vpair_f64_div +#undef vpair_f64_fma +#undef vpair_f64_fms +#undef vpair_f64_max +#undef vpair_f64_min +#undef vpair_f64_mul +#undef vpair_f64_nabs +#undef vpair_f64_neg +#undef vpair_f64_nfma +#undef vpair_f64_nfms +#undef vpair_f64_splat +#undef vpair_f64_sqrt +#undef vpair_f64_sub + +#undef vpair_f32_abs +#undef vpair_f32_add +#undef vpair_f32_div +#undef vpair_f32_fma +#undef vpair_f32_fms +#undef vpair_f32_max +#undef vpair_f32_min +#undef vpair_f32_mul +#undef vpair_f32_nabs +#undef vpair_f32_neg +#undef vpair_f32_nfma +#undef vpair_f32_nfms +#undef vpair_f32_splat +#undef vpair_f32_sqrt +#undef vpair_f32_sub + +#if !__VPAIR_BUILTIN__ && !__VPAIR_ASM__ && !__VPAIR_NOP10__ +#if __MMA__ && __VPAIR__ +#define __VPAIR_BUILTIN__ 1 + +#elif __MMA__ +#define __VPAIR_ASM__ 1 + +#else +#define __VPAIR_NOP10__ 1 +#endif +#endif + +/* Do we have MMA support and the vector pair built-in function? */ +#if __VPAIR_BUILTIN__ && !__VPAIR_ASM__ && !__VPAIR_NOP10__ +#define vector_pair_t __vector_pair +#define vector_pair_f64_t __vector_pair +#define vector_pair_f32_t __vector_pair + +/* vector pair double operations on power10. */ +#define vpair_f64_splat(R, A) (*R) = __builtin_vpair_f64_splat (A) + +#define vpair_f64_abs(R,A) (*R) = __builtin_vpair_f64_abs (*A) +#define vpair_f64_nabs(R,A) (*R) = __builtin_vpair_f64_nabs (*A) +#define vpair_f64_neg(R,A) (*R) = __builtin_vpair_f64_neg (*A) +#define vpair_f64_sqrt(R,A) (*R) = __builtin_vpair_f64_sqrt (*A) + +#define vpair_f64_add(R,A,B) (*R) = __builtin_vpair_f64_add (*A, *B) +#define vpair_f64_div(R,A,B) (*R) = __builtin_vpair_f64_div (*A, *B) +#define vpair_f64_max(R,A,B) (*R) = __builtin_vpair_f64_max (*A, *B) +#define vpair_f64_min(R,A,B) (*R) = __builtin_vpair_f64_min (*A, *B) +#define vpair_f64_mul(R,A,B) (*R) = __builtin_vpair_f64_mul (*A, *B) +#define vpair_f64_sub(R,A,B) (*R) = __builtin_vpair_f64_sub (*A, *B) + +#define vpair_f64_fma(R,A,B,C) (*R) = __builtin_vpair_f64_fma (*A, *B, *C) +#define vpair_f64_fms(R,A,B,C) (*R) = __builtin_vpair_f64_fms (*A, *B, *C) +#define vpair_f64_nfma(R,A,B,C) (*R) = __builtin_vpair_f64_nfma (*A, *B, *C) +#define vpair_f64_nfms(R,A,B,C) (*R) = __builtin_vpair_f64_nfms (*A, *B, *C) + +/* vector pair float operations on power10. */ +#define vpair_f32_splat(R, A) (*R) = __builtin_vpair_f32_splat (A) + +#define vpair_f32_abs(R,A) (*R) = __builtin_vpair_f32_abs (*A) +#define vpair_f32_nabs(R,A) (*R) = __builtin_vpair_f32_nabs (*A) +#define vpair_f32_neg(R,A) (*R) = __builtin_vpair_f32_neg (*A) +#define vpair_f32_sqrt(R,A) (*R) = __builtin_vpair_f32_sqrt (*A) + +#define vpair_f32_add(R,A,B) (*R) = __builtin_vpair_f32_add (*A, *B) +#define vpair_f32_div(R,A,B) (*R) = __builtin_vpair_f32_div (*A, *B) +#define vpair_f32_max(R,A,B) (*R) = __builtin_vpair_f32_max (*A, *B) +#define vpair_f32_min(R,A,B) (*R) = __builtin_vpair_f32_min (*A, *B) +#define vpair_f32_mul(R,A,B) (*R) = __builtin_vpair_f32_mul (*A, *B) +#define vpair_f32_sub(R,A,B) (*R) = __builtin_vpair_f32_sub (*A, *B) + +#define vpair_f32_fma(R,A,B,C) (*R) = __builtin_vpair_f32_fma (*A, *B, *C) +#define vpair_f32_fms(R,A,B,C) (*R) = __builtin_vpair_f32_fms (*A, *B, *C) +#define vpair_f32_nfma(R,A,B,C) (*R) = __builtin_vpair_f32_nfma (*A, *B, *C) +#define vpair_f32_nfms(R,A,B,C) (*R) = __builtin_vpair_f32_nfma (*A, *B, *C) + + +/* Do we have the __vector_pair type available, but we don't have the built-in + functions? */ + +#elif __VPAIR_ASM__ +#define vector_pair_t __vector_pair +#define vector_pair_f64_t __vector_pair +#define vector_pair_f32_t __vector_pair + +#undef __VPAIR_FP_UNARY_ASM +#define __VPAIR_FP_UNARY_ASM(OPCODE, R, A) \ + __asm__ (OPCODE " %x0,%x1\n\t" OPCODE " %x0+1,%x1+1" \ + : "=wa" (*(__vector_pair *)(R)) \ + : "wa" (*(__vector_pair *)(A))); + +#undef __VPAIR_FP_BINARY_ASM +#define __VPAIR_FP_BINARY_ASM(OPCODE, R, A, B) \ + __asm__ (OPCODE " %x0,%x1,%x2\n\t" OPCODE " %x0+1,%x1+1,%x2+1" \ + : "=wa" (*(__vector_pair *)(R)) \ + : "wa" (*(__vector_pair *)(A)), \ + "wa" (*(__vector_pair *)(B))); + + /* Note the 'a' version of the FMA instruction must be used. */ +#undef __VPAIR_FP_FMA_ASM +#define __VPAIR_FP_FMA_ASM(OPCODE, R, A, B, C) \ + __asm__ (OPCODE " %x0,%x1,%x2\n\t" OPCODE " %x0+1,%x1+1,%x2+1" \ + : "=wa" (*(__vector_pair *)(R)) \ + : "wa" (*(__vector_pair *)(A)), \ + "wa" (*(__vector_pair *)(B)), \ + "0" (*(__vector_pair *)(C))); + +#define vpair_f64_splat(R, A) \ + __asm__ ("xxlor %x0+1,%x1,%x1" \ + : "=wa" (*(__vector_pair *)(R)) \ + : "0" (__builtin_vec_splats ((double) (A)))) + +#define vpair_f64_abs(R,A) __VPAIR_FP_UNARY_ASM ("xvabsdp", R, A) +#define vpair_f64_nabs(R,A) __VPAIR_FP_UNARY_ASM ("xvnabsdp", R, A) +#define vpair_f64_neg(R,A) __VPAIR_FP_UNARY_ASM ("xvnegdp", R, A) +#define vpair_f64_sqrt(R,A) __VPAIR_FP_UNARY_ASM ("xvsqrtdp", R, A) + +#define vpair_f64_add(R,A,B) __VPAIR_FP_BINARY_ASM ("xvadddp", R, A, B) +#define vpair_f64_div(R,A,B) __VPAIR_FP_BINARY_ASM ("xvdivdp", R, A, B) +#define vpair_f64_max(R,A,B) __VPAIR_FP_BINARY_ASM ("xvmaxdp", R, A, B) +#define vpair_f64_min(R,A,B) __VPAIR_FP_BINARY_ASM ("xvmindp", R, A, B) +#define vpair_f64_mul(R,A,B) __VPAIR_FP_BINARY_ASM ("xvmuldp", R, A, B) +#define vpair_f64_sub(R,A,B) __VPAIR_FP_BINARY_ASM ("xvsubdp", R, A, B) + +#define vpair_f64_fma(R,A,B,C) __VPAIR_FP_FMA_ASM ("xvmaddadp", R, A, B, C) +#define vpair_f64_fms(R,A,B,C) __VPAIR_FP_FMA_ASM ("xvmsubadp", R, A, B, C) +#define vpair_f64_nfma(R,A,B,C) __VPAIR_FP_FMA_ASM ("xvnmaddadp", R, A, B, C) +#define vpair_f64_nfms(R,A,B,C) __VPAIR_FP_FMA_ASM ("xvnmsubadp", R, A, B, C) + +#define vpair_f32_splat(R, A) \ + __asm__ ("xxlor %x0+1,%x1,%x1" \ + : "=wa" (*(__vector_pair *)(R)) \ + : "0" (__builtin_vec_splats ((float) (A)))) + +#define vpair_f32_abs(R,A) __VPAIR_FP_UNARY_ASM ("xvabssp", R, A) +#define vpair_f32_nabs(R,A) __VPAIR_FP_UNARY_ASM ("xvnabssp", R, A) +#define vpair_f32_neg(R,A) __VPAIR_FP_UNARY_ASM ("xvnegsp", R, A) +#define vpair_f32_sqrt(R,A) __VPAIR_FP_UNARY_ASM ("xvsqrtsp", R, A) + +#define vpair_f32_add(R,A,B) __VPAIR_FP_BINARY_ASM ("xvaddsp", R, A, B) +#define vpair_f32_div(R,A,B) __VPAIR_FP_BINARY_ASM ("xvdivsp", R, A, B) +#define vpair_f32_max(R,A,B) __VPAIR_FP_BINARY_ASM ("xvmaxsp", R, A, B) +#define vpair_f32_min(R,A,B) __VPAIR_FP_BINARY_ASM ("xvminsp", R, A, B) +#define vpair_f32_mul(R,A,B) __VPAIR_FP_BINARY_ASM ("xvmulsp", R, A, B) +#define vpair_f32_sub(R,A,B) __VPAIR_FP_BINARY_ASM ("xvsubsp", R, A, B) + +#define vpair_f32_fma(R,A,B,C) __VPAIR_FP_FMA_ASM ("xvmaddasp", R, A, B, C) +#define vpair_f32_fms(R,A,B,C) __VPAIR_FP_FMA_ASM ("xvmsubasp", R, A, B, C) +#define vpair_f32_nfma(R,A,B,C) __VPAIR_FP_FMA_ASM ("xvnmaddasp", R, A, B, C) +#define vpair_f32_nfms(R,A,B,C) __VPAIR_FP_FMA_ASM ("xvnmsubasp", R, A, B, C) + + +#else /* !__VPAIR_BUILTIN__ && !__VPAIR_ASM__. */ + +#ifndef __VECTOR_PAIR_UNION__ +#define __VECTOR_PAIR_UNION__ 1 + +union vpair_union { + /* Double vector pairs. */ + vector double __vdbl[2]; + + /* Float vector pairs. */ + vector float __vflt[2]; + +}; +#endif /* __VECTOR_PAIR_UNION__. */ + +#define vector_pair_t union vpair_union +#define vector_pair_f64_t union vpair_union +#define vector_pair_f32_t union vpair_union + +/* vector pair double operations on power8/power9. */ +#define vpair_f64_splat(R, A) \ + do \ + { \ + union vpair_union *__vr = (union vpair_union *)(R); \ + __vr->__vdbl[0] = __vr->__vdbl[1] \ + = __builtin_vec_splats ((double)(A)); \ + } \ + while (0) + +#define vpair_f64_abs(R, A) \ + do \ + { \ + union vpair_union *__vr = (union vpair_union *)(R); \ + union vpair_union *__va = (union vpair_union *)(A); \ + __vr->__vdbl[0] = __builtin_vsx_xvabsdp (__va->__vdbl[0]); \ + __vr->__vdbl[1] = __builtin_vsx_xvabsdp (__va->__vdbl[1]); \ + } \ + while (0) + +#define vpair_f64_nabs(R, A) \ + do \ + { \ + union vpair_union *__vr = (union vpair_union *)(R); \ + union vpair_union *__va = (union vpair_union *)(A); \ + __vr->__vdbl[0] = __builtin_vsx_xvnabsdp (__va->__vdbl[0]); \ + __vr->__vdbl[1] = __builtin_vsx_xvnabsdp (__va->__vdbl[1]); \ + } \ + while (0) + +#define vpair_f64_neg(R, A) \ + do \ + { \ + union vpair_union *__vr = (union vpair_union *)(R); \ + union vpair_union *__va = (union vpair_union *)(A); \ + __vr->__vdbl[0] = - __va->__vdbl[0]; \ + __vr->__vdbl[1] = - __va->__vdbl[1]; \ + } \ + while (0) + +#define vpair_f64_sqrt(R, A) \ + do \ + { \ + union vpair_union *__vr = (union vpair_union *)(R); \ + union vpair_union *__va = (union vpair_union *)(A); \ + __vr->__vdbl[0] = __builtin_vsx_xvsqrtdp (__va->__vdbl[0]); \ + __vr->__vdbl[1] = __builtin_vsx_xvsqrtdp (__va->__vdbl[1]); \ + } \ + while (0) + +#define vpair_f64_add(R, A, B) \ + do \ + { \ + union vpair_union *__vr = (union vpair_union *)(R); \ + union vpair_union *__va = (union vpair_union *)(A); \ + union vpair_union *__vb = (union vpair_union *)(B); \ + __vr->__vdbl[0] = __va->__vdbl[0] + __vb->__vdbl[0]; \ + __vr->__vdbl[1] = __va->__vdbl[1] + __vb->__vdbl[1]; \ + } \ + while (0) + +#define vpair_f64_div(R, A, B) \ + do \ + { \ + union vpair_union *__vr = (union vpair_union *)(R); \ + union vpair_union *__va = (union vpair_union *)(A); \ + union vpair_union *__vb = (union vpair_union *)(B); \ + __vr->__vdbl[0] = __va->__vdbl[0] / __vb->__vdbl[0]; \ + __vr->__vdbl[1] = __va->__vdbl[1] / __vb->__vdbl[1]; \ + } \ + while (0) + +#define vpair_f64_max(R, A, B) \ + do \ + { \ + union vpair_union *__vr = (union vpair_union *)(R); \ + union vpair_union *__va = (union vpair_union *)(A); \ + union vpair_union *__vb = (union vpair_union *)(B); \ + __vr->__vdbl[0] \ + = __builtin_vsx_xvmaxdp (__va->__vdbl[0], __vb->__vdbl[0]); \ + __vr->__vdbl[1] \ + = __builtin_vsx_xvmaxdp (__va->__vdbl[1], __vb->__vdbl[1]); \ + } \ + while (0) + +#define vpair_f64_min(R, A, B) \ + do \ + { \ + union vpair_union *__vr = (union vpair_union *)(R); \ + union vpair_union *__va = (union vpair_union *)(A); \ + union vpair_union *__vb = (union vpair_union *)(B); \ + __vr->__vdbl[0] \ + = __builtin_vsx_xvmindp (__va->__vdbl[0], __vb->__vdbl[0]); \ + __vr->__vdbl[1] \ + = __builtin_vsx_xvmindp (__va->__vdbl[1], __vb->__vdbl[1]); \ + } \ + while (0) + +#define vpair_f64_mul(R, A, B) \ + do \ + { \ + union vpair_union *__vr = (union vpair_union *)(R); \ + union vpair_union *__va = (union vpair_union *)(A); \ + union vpair_union *__vb = (union vpair_union *)(B); \ + __vr->__vdbl[0] = __va->__vdbl[0] * __vb->__vdbl[0]; \ + __vr->__vdbl[1] = __va->__vdbl[1] * __vb->__vdbl[1]; \ + } \ + while (0) + +#define vpair_f64_sub(R, A, B) \ + do \ + { \ + union vpair_union *__vr = (union vpair_union *)(R); \ + union vpair_union *__va = (union vpair_union *)(A); \ + union vpair_union *__vb = (union vpair_union *)(B); \ + __vr->__vdbl[0] = __va->__vdbl[0] - __vb->__vdbl[0]; \ + __vr->__vdbl[1] = __va->__vdbl[1] - __vb->__vdbl[1]; \ + } \ + while (0) + +#define vpair_f64_fma(R, A, B, C) \ + do \ + { \ + union vpair_union *__vr = (union vpair_union *)(R); \ + union vpair_union *__va = (union vpair_union *)(A); \ + union vpair_union *__vb = (union vpair_union *)(B); \ + union vpair_union *__vc = (union vpair_union *)(C); \ + __vr->__vdbl[0] \ + = __builtin_vsx_xvmadddp (__va->__vdbl[0], \ + __vb->__vdbl[0], \ + __vc->__vdbl[0]); \ + __vr->__vdbl[1] \ + = __builtin_vsx_xvmadddp (__va->__vdbl[1], \ + __vb->__vdbl[1], \ + __vc->__vdbl[1]); \ + } \ + while (0) + +#define vpair_f64_fms(R, A, B, C) \ + do \ + { \ + union vpair_union *__vr = (union vpair_union *)(R); \ + union vpair_union *__va = (union vpair_union *)(A); \ + union vpair_union *__vb = (union vpair_union *)(B); \ + union vpair_union *__vc = (union vpair_union *)(C); \ + __vr->__vdbl[0] \ + = __builtin_vsx_xvmsubdp (__va->__vdbl[0], \ + __vb->__vdbl[0], \ + __vc->__vdbl[0]); \ + __vr->__vdbl[1] \ + = __builtin_vsx_xvmsubdp (__va->__vdbl[1], \ + __vb->__vdbl[1], \ + __vc->__vdbl[1]); \ + } \ + while (0) + +#define vpair_f64_nfma(R, A, B, C) \ + do \ + { \ + union vpair_union *__vr = (union vpair_union *)(R); \ + union vpair_union *__va = (union vpair_union *)(A); \ + union vpair_union *__vb = (union vpair_union *)(B); \ + union vpair_union *__vc = (union vpair_union *)(C); \ + __vr->__vdbl[0] \ + = __builtin_vsx_xvnmadddp (__va->__vdbl[0], \ + __vb->__vdbl[0], \ + __vc->__vdbl[0]); \ + __vr->__vdbl[1] \ + = __builtin_vsx_xvnmadddp (__va->__vdbl[1], \ + __vb->__vdbl[1], \ + __vc->__vdbl[1]); \ + } \ + while (0) + +#define vpair_f64_nfms(R, A, B, C) \ + do \ + { \ + union vpair_union *__vr = (union vpair_union *)(R); \ + union vpair_union *__va = (union vpair_union *)(A); \ + union vpair_union *__vb = (union vpair_union *)(B); \ + union vpair_union *__vc = (union vpair_union *)(C); \ + __vr->__vdbl[0] \ + = __builtin_vsx_xvnmsubdp (__va->__vdbl[0], \ + __vb->__vdbl[0], \ + __vc->__vdbl[0]); \ + __vr->__vdbl[1] \ + = __builtin_vsx_xvnmsubdp (__va->__vdbl[1], \ + __vb->__vdbl[1], \ + __vc->__vdbl[1]); \ + } \ + while (0) + + +/* vector pair float operations on power8/power9. */ + +/* vector pair float operations on power8/power9. */ +#define vpair_f32_splat(R, A) \ + do \ + { \ + union vpair_union *__vr = (union vpair_union *)(R); \ + __vr->__vflt[0] = __vr->__vflt[1] \ + = __builtin_vec_splats ((float)(A)); \ + } \ + while (0) + +#define vpair_f32_abs(R, A) \ + do \ + { \ + union vpair_union *__vr = (union vpair_union *)(R); \ + union vpair_union *__va = (union vpair_union *)(A); \ + __vr->__vflt[0] = __builtin_vsx_xvabssp (__va->__vflt[0]); \ + __vr->__vflt[1] = __builtin_vsx_xvabssp (__va->__vflt[1]); \ + } \ + while (0) + +#define vpair_f32_nabs(R, A) \ + do \ + { \ + union vpair_union *__vr = (union vpair_union *)(R); \ + union vpair_union *__va = (union vpair_union *)(A); \ + __vr->__vflt[0] = __builtin_vsx_xvnabssp (__va->__vflt[0]); \ + __vr->__vflt[1] = __builtin_vsx_xvnabssp (__va->__vflt[1]); \ + } \ + while (0) + +#define vpair_f32_neg(R, A) \ + do \ + { \ + union vpair_union *__vr = (union vpair_union *)(R); \ + union vpair_union *__va = (union vpair_union *)(A); \ + __vr->__vflt[0] = - __va->__vflt[0]; \ + __vr->__vflt[1] = - __va->__vflt[1]; \ + } \ + while (0) + +#define vpair_f32_sqrt(R, A) \ + do \ + { \ + union vpair_union *__vr = (union vpair_union *)(R); \ + union vpair_union *__va = (union vpair_union *)(A); \ + __vr->__vflt[0] = __builtin_vsx_xvsqrtsp (__va->__vflt[0]); \ + __vr->__vflt[1] = __builtin_vsx_xvsqrtsp (__va->__vflt[1]); \ + } \ + while (0) + +#define vpair_f32_add(R, A, B) \ + do \ + { \ + union vpair_union *__vr = (union vpair_union *)(R); \ + union vpair_union *__va = (union vpair_union *)(A); \ + union vpair_union *__vb = (union vpair_union *)(B); \ + __vr->__vflt[0] = __va->__vflt[0] + __vb->__vflt[0]; \ + __vr->__vflt[1] = __va->__vflt[1] + __vb->__vflt[1]; \ + } \ + while (0) + +#define vpair_f32_div(R, A, B) \ + do \ + { \ + union vpair_union *__vr = (union vpair_union *)(R); \ + union vpair_union *__va = (union vpair_union *)(A); \ + union vpair_union *__vb = (union vpair_union *)(B); \ + __vr->__vflt[0] = __va->__vflt[0] / __vb->__vflt[0]; \ + __vr->__vflt[1] = __va->__vflt[1] / __vb->__vflt[1]; \ + } \ + while (0) + +#define vpair_f32_max(R, A, B) \ + do \ + { \ + union vpair_union *__vr = (union vpair_union *)(R); \ + union vpair_union *__va = (union vpair_union *)(A); \ + union vpair_union *__vb = (union vpair_union *)(B); \ + __vr->__vflt[0] \ + = __builtin_vsx_xvmaxsp (__va->__vflt[0], __vb->__vflt[0]); \ + __vr->__vflt[1] \ + = __builtin_vsx_xvmaxsp (__va->__vflt[1], __vb->__vflt[1]); \ + } \ + while (0) + +#define vpair_f32_min(R, A, B) \ + do \ + { \ + union vpair_union *__vr = (union vpair_union *)(R); \ + union vpair_union *__va = (union vpair_union *)(A); \ + union vpair_union *__vb = (union vpair_union *)(B); \ + __vr->__vflt[0] \ + = __builtin_vsx_xvminsp (__va->__vflt[0], __vb->__vflt[0]); \ + __vr->__vflt[1] \ + = __builtin_vsx_xvminsp (__va->__vflt[1], __vb->__vflt[1]); \ + } \ + while (0) + +#define vpair_f32_mul(R, A, B) \ + do \ + { \ + union vpair_union *__vr = (union vpair_union *)(R); \ + union vpair_union *__va = (union vpair_union *)(A); \ + union vpair_union *__vb = (union vpair_union *)(B); \ + __vr->__vflt[0] = __va->__vflt[0] * __vb->__vflt[0]; \ + __vr->__vflt[1] = __va->__vflt[1] * __vb->__vflt[1]; \ + } \ + while (0) + +#define vpair_f32_sub(R, A, B) \ + do \ + { \ + union vpair_union *__vr = (union vpair_union *)(R); \ + union vpair_union *__va = (union vpair_union *)(A); \ + union vpair_union *__vb = (union vpair_union *)(B); \ + __vr->__vflt[0] = __va->__vflt[0] - __vb->__vflt[0]; \ + __vr->__vflt[1] = __va->__vflt[1] - __vb->__vflt[1]; \ + } \ + while (0) + +#define vpair_f32_fma(R, A, B, C) \ + do \ + { \ + union vpair_union *__vr = (union vpair_union *)(R); \ + union vpair_union *__va = (union vpair_union *)(A); \ + union vpair_union *__vb = (union vpair_union *)(B); \ + union vpair_union *__vc = (union vpair_union *)(C); \ + __vr->__vflt[0] \ + = __builtin_vsx_xvmaddsp (__va->__vflt[0], \ + __vb->__vflt[0], \ + __vc->__vflt[0]); \ + __vr->__vflt[1] \ + = __builtin_vsx_xvmaddsp (__va->__vflt[1], \ + __vb->__vflt[1], \ + __vc->__vflt[1]); \ + } \ + while (0) + +#define vpair_f32_fms(R, A, B, C) \ + do \ + { \ + union vpair_union *__vr = (union vpair_union *)(R); \ + union vpair_union *__va = (union vpair_union *)(A); \ + union vpair_union *__vb = (union vpair_union *)(B); \ + union vpair_union *__vc = (union vpair_union *)(C); \ + __vr->__vflt[0] \ + = __builtin_vsx_xvmsubsp (__va->__vflt[0], \ + __vb->__vflt[0], \ + __vc->__vflt[0]); \ + __vr->__vflt[1] \ + = __builtin_vsx_xvmsubsp (__va->__vflt[1], \ + __vb->__vflt[1], \ + __vc->__vflt[1]); \ + } \ + while (0) + +#define vpair_f32_nfma(R, A, B, C) \ + do \ + { \ + union vpair_union *__vr = (union vpair_union *)(R); \ + union vpair_union *__va = (union vpair_union *)(A); \ + union vpair_union *__vb = (union vpair_union *)(B); \ + union vpair_union *__vc = (union vpair_union *)(C); \ + __vr->__vflt[0] \ + = __builtin_vsx_xvnmaddsp (__va->__vflt[0], \ + __vb->__vflt[0], \ + __vc->__vflt[0]); \ + __vr->__vflt[1] \ + = __builtin_vsx_xvnmaddsp (__va->__vflt[1], \ + __vb->__vflt[1], \ + __vc->__vflt[1]); \ + } \ + while (0) + +#define vpair_f32_nfms(R, A, B, C) \ + do \ + { \ + union vpair_union *__vr = (union vpair_union *)(R); \ + union vpair_union *__va = (union vpair_union *)(A); \ + union vpair_union *__vb = (union vpair_union *)(B); \ + union vpair_union *__vc = (union vpair_union *)(C); \ + __vr->__vflt[0] \ + = __builtin_vsx_xvnmsubsp (__va->__vflt[0], \ + __vb->__vflt[0], \ + __vc->__vflt[0]); \ + __vr->__vflt[1] \ + = __builtin_vsx_xvnmsubsp (__va->__vflt[1], \ + __vb->__vflt[1], \ + __vc->__vflt[1]); \ + } \ + while (0) + +#endif /* !__VPAIR_BUILTIN__ && !__VPAIR_ASM__. */ + +#endif /* _VECTOR_PAIR_H. */