https://gcc.gnu.org/g:7f65d3e030ef489b057c8a8409a3aefc7cd62ede
commit 7f65d3e030ef489b057c8a8409a3aefc7cd62ede Author: Michael Meissner <meiss...@linux.ibm.com> Date: Wed Sep 4 23:12:07 2024 -0400 Add vector pair init and splat. 2024-09-04 Michael Meissner <meiss...@linux.ibm.com> gcc/ * config/rs6000/rs6000-builtins.def (__builtin_vpair_zero): New built-in function. (__builtin_vpair_f32_splat): Likewise. (__builtin_vpair_f64_splat): Likewise. * config/rs6000/vector-pair.h: Update power10 splat patterns. * config/rs6000/vector-pair.md (UNSPEC_VPAIR_ZERO): New unspec. (UNSPEC_VPAIR_SPLAT): Likewise. (VPAIR_SPLAT_VMODE): New mode iterator. (VPAIR_SPLAT_ELEMENT_TO_VMODE): New mode attribute. (vpair_splat_name): Likewise. (vpair_zero): New insn. (vpair_splat_<vpair_splat_name>): New define_expand. (vpair_splat_<vpair_splat_name>_internal): New insns. gcc/testsuite/ * gcc.target/powerpc/vector-pair-5.c: New test. * gcc.target/powerpc/vector-pair-6.c: Likewise. Diff: --- gcc/config/rs6000/rs6000-builtins.def | 10 +++ gcc/config/rs6000/vector-pair.h | 13 +-- gcc/config/rs6000/vector-pair.md | 102 ++++++++++++++++++++++- gcc/doc/extend.texi | 9 ++ gcc/testsuite/gcc.target/powerpc/vector-pair-5.c | 54 ++++++++++++ gcc/testsuite/gcc.target/powerpc/vector-pair-6.c | 56 +++++++++++++ 6 files changed, 232 insertions(+), 12 deletions(-) diff --git a/gcc/config/rs6000/rs6000-builtins.def b/gcc/config/rs6000/rs6000-builtins.def index 2bac0e58971..e0b1c744f7c 100644 --- a/gcc/config/rs6000/rs6000-builtins.def +++ b/gcc/config/rs6000/rs6000-builtins.def @@ -3934,6 +3934,10 @@ void __builtin_vsx_stxvp (v256, unsigned long, const v256 *); STXVP nothing {mma,pair} +;; Vector pair built-in functions. + v256 __builtin_vpair_zero (); + VPAIR_ZERO vpair_zero {mma} + ;; Vector pair built-in functions with float elements v256 __builtin_vpair_f32_abs (v256); VPAIR_F32_ABS vpair_abs_v8sf2 {mma} @@ -3974,6 +3978,9 @@ v256 __builtin_vpair_f32_nfms (v256, v256, v256); VPAIR_F32_NFMS vpair_nfms_v8sf4 {mma} + v256 __builtin_vpair_f32_splat (float); + VPAIR_F32_SPLAT vpair_splat_v8sf {mma} + v256 __builtin_vpair_f32_sub (v256, v256); VPAIR_F32_SUB vpair_sub_v8sf3 {mma} @@ -4017,5 +4024,8 @@ v256 __builtin_vpair_f64_nfms (v256, v256, v256); VPAIR_F64_NFMS vpair_nfms_v4df4 {mma} + v256 __builtin_vpair_f64_splat (double); + VPAIR_F64_SPLAT vpair_splat_v4df {mma} + v256 __builtin_vpair_f64_sub (v256, v256); VPAIR_F64_SUB vpair_sub_v4df3 {mma} diff --git a/gcc/config/rs6000/vector-pair.h b/gcc/config/rs6000/vector-pair.h index e399e89e8e4..3c03e44f3f4 100644 --- a/gcc/config/rs6000/vector-pair.h +++ b/gcc/config/rs6000/vector-pair.h @@ -38,11 +38,7 @@ typedef __vector_pair vector_pair_t; #define VPAIR_FP_SECOND "S" /* Access 2nd VSX register. */ /* vector pair double operations on power10. */ -#define vpair_f64_splat(R, A) \ - __asm__ ("xxpermdi %x0,%x1,%x1,0" "\n\t" \ - "xxpermdi %" VPAIR_FP_SECOND "0,%x1,%x1,0" \ - : "=" VPAIR_FP_CONSTRAINT (*R) \ - : "wa" ((A))) +#define vpair_f64_splat(R, A) (*R) = __builtin_vpair_f64_splat (A) #define vpair_f64_neg(R,A) (*R) = __builtin_vpair_f64_neg (*A) #define vpair_f64_abs(R,A) (*R) = __builtin_vpair_f64_abs (*A) @@ -63,12 +59,7 @@ typedef __vector_pair vector_pair_t; /* vector pair float operations on power10. */ -#define vpair_f32_splat(R, A) \ - __asm__ ("xscvdpspn %x0,%x1" "\n\t" \ - "xxspltw %x0,%x0,0" "\n\t" \ - "xxlor %" VPAIR_FP_SECOND "0,%x0,%x0" \ - : "=" VPAIR_FP_CONSTRAINT (*R) \ - : "wa" (((float) (A)))) +#define vpair_f32_splat(R, A) (*R) = __builtin_vpair_f32_splat (A) #define vpair_f32_neg(R,A) (*R) = __builtin_vpair_f32_neg (*A) #define vpair_f32_abs(R,A) (*R) = __builtin_vpair_f32_abs (*A) diff --git a/gcc/config/rs6000/vector-pair.md b/gcc/config/rs6000/vector-pair.md index fe8004b75d5..6fbc90cf528 100644 --- a/gcc/config/rs6000/vector-pair.md +++ b/gcc/config/rs6000/vector-pair.md @@ -39,7 +39,9 @@ UNSPEC_VPAIR_PLUS UNSPEC_VPAIR_SMAX UNSPEC_VPAIR_SMIN - UNSPEC_VPAIR_SQRT]) + UNSPEC_VPAIR_SPLAT + UNSPEC_VPAIR_SQRT + UNSPEC_VPAIR_ZERO]) ;; Vector pair element ID that defines the scaler element within the vector pair. (define_c_enum "vpair_element" @@ -102,6 +104,104 @@ ;; Map the scalar element ID into the appropriate insn type for divide. (define_int_attr vpair_divtype [(VPAIR_ELEMENT_FLOAT "vecfdiv") (VPAIR_ELEMENT_DOUBLE "vecdiv")]) + +;; Mode iterator for the vector modes that we provide splat operations for. +(define_mode_iterator VPAIR_SPLAT_VMODE [V4SF V2DF]) + +;; Map element mode to 128-bit vector mode for splat operations +(define_mode_attr VPAIR_SPLAT_ELEMENT_TO_VMODE [(SF "V4SF") + (DF "V2DF")]) + +;; Map either element mode or vector mode into the name for the splat insn. +(define_mode_attr vpair_splat_name [(SF "v8sf") + (DF "v4df") + (V4SF "v8sf") + (V2DF "v4df")]) + +;; Initialize a vector pair to 0 +(define_insn_and_split "vpair_zero" + [(set (match_operand:OO 0 "vsx_register_operand" "=wa") + (unspec:OO [(const_int 0)] UNSPEC_VPAIR_ZERO))] + "TARGET_MMA" + "#" + "&& reload_completed" + [(set (match_dup 1) (match_dup 3)) + (set (match_dup 2) (match_dup 3))] +{ + rtx op0 = operands[0]; + + operands[1] = simplify_gen_subreg (V2DFmode, op0, OOmode, 0); + operands[2] = simplify_gen_subreg (V2DFmode, op0, OOmode, 16); + operands[3] = CONST0_RTX (V2DFmode); +} + [(set_attr "length" "8") + (set_attr "type" "vecperm")]) + +;; Create a vector pair with a value splat'ed (duplicated) to all of the +;; elements. +(define_expand "vpair_splat_<vpair_splat_name>" + [(use (match_operand:OO 0 "vsx_register_operand")) + (use (match_operand:SFDF 1 "input_operand"))] + "TARGET_MMA" +{ + rtx op0 = operands[0]; + rtx op1 = operands[1]; + machine_mode element_mode = <MODE>mode; + + if (op1 == CONST0_RTX (element_mode)) + { + emit_insn (gen_vpair_zero (op0)); + DONE; + } + + machine_mode vector_mode = <VPAIR_SPLAT_ELEMENT_TO_VMODE>mode; + rtx vec = gen_reg_rtx (vector_mode); + unsigned num_elements = GET_MODE_NUNITS (vector_mode); + rtvec elements = rtvec_alloc (num_elements); + for (size_t i = 0; i < num_elements; i++) + RTVEC_ELT (elements, i) = copy_rtx (op1); + + rs6000_expand_vector_init (vec, gen_rtx_PARALLEL (vector_mode, elements)); + emit_insn (gen_vpair_splat_<vpair_splat_name>_internal (op0, vec)); + DONE; +}) + +;; Inner splat support. Operand1 is the vector splat created above. Allow +;; operand 1 to overlap with the output registers to eliminate one move +;; instruction. +(define_insn_and_split "vpair_splat_<vpair_splat_name>_internal" + [(set (match_operand:OO 0 "vsx_register_operand" "=wa,wa") + (unspec:OO + [(match_operand:VPAIR_SPLAT_VMODE 1 "vsx_register_operand" "0,wa")] + UNSPEC_VPAIR_SPLAT))] + "TARGET_MMA" + "#" + "&& reload_completed" + [(const_int 0)] +{ + rtx op0 = operands[0]; + rtx op0_a = simplify_gen_subreg (<MODE>mode, op0, OOmode, 0); + rtx op0_b = simplify_gen_subreg (<MODE>mode, op0, OOmode, 16); + rtx op1 = operands[1]; + unsigned op1_regno = reg_or_subregno (op1); + + /* Check if the input is one of the output registers. */ + if (op1_regno == reg_or_subregno (op0_a)) + emit_move_insn (op0_b, op1); + + else if (op1_regno == reg_or_subregno (op0_b)) + emit_move_insn (op0_a, op1); + + else + { + emit_move_insn (op0_a, op1); + emit_move_insn (op0_b, op1); + } + + DONE; +} + [(set_attr "length" "*,8") + (set_attr "type" "vecmove")]) ;; Vector pair unary operations. The last argument in the UNSPEC is a ;; CONST_INT which identifies what the scalar element is. diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index a54f1194378..0163a420c7f 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -24273,6 +24273,13 @@ The @code{nfma} built-in is a combination of @code{neg} of the The @code{nfms} built-in is a combination of @code{neg} of the @code{fms} built-in. +The following built-in function is independent on the type of the +underlying vector: + +@smallexample +__vector_pair __builtin_vpair_zero (); +@end smallexample + The following built-in functions operate on pairs of @code{vector float} values: @@ -24293,6 +24300,7 @@ __vector_pair __builtin_vpair_f32_nfma (__vector_pair, __vector_pair, __vector_pair); __vector_pair __builtin_vpair_f32_nfms (__vector_pair, __vector_pair, __vector_pair); +__vector_pair __builtin_vpair_f32_splat (float); __vector_pair __builtin_vpair_f32_sub (__vector_pair, __vector_pair); @end smallexample @@ -24316,6 +24324,7 @@ __vector_pair __builtin_vpair_f64_nfma (__vector_pair, __vector_pair, __vector_pair); __vector_pair __builtin_vpair_f64_nfms (__vector_pair, __vector_pair, __vector_pair); +__vector_pair __builtin_vpair_f64_splat (double); __vector_pair __builtin_vpair_f64_sub (__vector_pair, __vector_pair); @end smallexample diff --git a/gcc/testsuite/gcc.target/powerpc/vector-pair-5.c b/gcc/testsuite/gcc.target/powerpc/vector-pair-5.c new file mode 100644 index 00000000000..f90ad5c2e1d --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vector-pair-5.c @@ -0,0 +1,54 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ + +/* Test whether the vector builtin code generates the expected instructions for + vector pairs zero and splat functions for vector pairs containing + doubles. */ + +void +test_zero (__vector_pair *p) +{ + /* 2 xxspltib/xxlxor. */ + *p = __builtin_vpair_zero (); +} + +void +test_splat_zero (__vector_pair *p) +{ + /* 2 xxspltib/xxlxor. */ + *p = __builtin_vpair_f64_splat (0.0); +} + +void +test_splat_one (__vector_pair *p) +{ + /* xxspltidp, xxlor. */ + *p = __builtin_vpair_f64_splat (1.0); +} + +void +test_splat_pi (__vector_pair *p) +{ + /* plxv, xxlor (64-bit) or lfd, xxpermdi, xxlor (32-bit). */ + *p = __builtin_vpair_f64_splat (3.1415926535); +} + +void +test_splat_arg (__vector_pair *p, double x) +{ + /* xxpermdi, xxlor. */ + *p = __builtin_vpair_f64_splat (x); +} + +void +test_splat_mem (__vector_pair *p, double *q) +{ + /* lxvdsx, xxlor. */ + *p = __builtin_vpair_f64_splat (*q); +} + +/* { dg-final { scan-assembler-times {\mlxvdsx\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mstxvp\M} 6 } } */ +/* { dg-final { scan-assembler-times {\mxxspltib\M|\mxxlxor\M} 4 } } */ +/* { dg-final { scan-assembler-times {\mxxspltidp\M} 1 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/vector-pair-6.c b/gcc/testsuite/gcc.target/powerpc/vector-pair-6.c new file mode 100644 index 00000000000..5ec53d4bfc3 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vector-pair-6.c @@ -0,0 +1,56 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ + +/* Test whether the vector builtin code generates the expected instructions for + vector pairs zero and splat functions for vector pairs containing + floats. */ + +void +test_zero (__vector_pair *p) +{ + /* 2 xxspltib/xxlxor. */ + *p = __builtin_vpair_zero (); +} + +void +test_splat_zero (__vector_pair *p) +{ + /* 2 xxspltib/xxlxor. */ + *p = __builtin_vpair_f32_splat (0.0f); +} + +void +test_splat_one (__vector_pair *p) +{ + /* xxspltiw, xxlor. */ + *p = __builtin_vpair_f32_splat (1.0f); +} + +void +test_splat_pi (__vector_pair *p) +{ + /* xxspltiw, xxlor. */ + *p = __builtin_vpair_f32_splat (3.1415926535f); +} + +void +test_splat_arg (__vector_pair *p, float x) +{ + /* xscvdpspn, xxspltw, xxlor. */ + *p = __builtin_vpair_f32_splat (x); +} + +void +test_splat_mem (__vector_pair *p, float *q) +{ + /* xlvwsx, xxlor. */ + *p = __builtin_vpair_f32_splat (*q); +} + +/* { dg-final { scan-assembler-times {\mlxvwsx\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mstxvp\M} 6 } } */ +/* { dg-final { scan-assembler-times {\mxscvdpspn\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mxxspltib\M|\mxxlxor\M} 4 } } */ +/* { dg-final { scan-assembler-times {\mxxspltiw\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mxxspltw\M} 1 } } */