work177-vpair)] Add vector pair init and splat.

Michael Meissner via Gcc-cvs Wed, 04 Sep 2024 20:25:51 -0700

https://gcc.gnu.org/g:7f65d3e030ef489b057c8a8409a3aefc7cd62ede


commit 7f65d3e030ef489b057c8a8409a3aefc7cd62ede
Author: Michael Meissner <meiss...@linux.ibm.com>
Date:   Wed Sep 4 23:12:07 2024 -0400

    Add vector pair init and splat.
    
    2024-09-04  Michael Meissner  <meiss...@linux.ibm.com>
    
    gcc/
    
            * config/rs6000/rs6000-builtins.def (__builtin_vpair_zero): New
            built-in function.
            (__builtin_vpair_f32_splat): Likewise.
            (__builtin_vpair_f64_splat): Likewise.
            * config/rs6000/vector-pair.h: Update power10 splat patterns.
            * config/rs6000/vector-pair.md (UNSPEC_VPAIR_ZERO): New unspec.
            (UNSPEC_VPAIR_SPLAT): Likewise.
            (VPAIR_SPLAT_VMODE): New mode iterator.
            (VPAIR_SPLAT_ELEMENT_TO_VMODE): New mode attribute.
            (vpair_splat_name): Likewise.
            (vpair_zero): New insn.
            (vpair_splat_<vpair_splat_name>): New define_expand.
            (vpair_splat_<vpair_splat_name>_internal): New insns.
    
    gcc/testsuite/
    
            * gcc.target/powerpc/vector-pair-5.c: New test.
            * gcc.target/powerpc/vector-pair-6.c: Likewise.

Diff:
---
 gcc/config/rs6000/rs6000-builtins.def            |  10 +++
 gcc/config/rs6000/vector-pair.h                  |  13 +--
 gcc/config/rs6000/vector-pair.md                 | 102 ++++++++++++++++++++++-
 gcc/doc/extend.texi                              |   9 ++
 gcc/testsuite/gcc.target/powerpc/vector-pair-5.c |  54 ++++++++++++
 gcc/testsuite/gcc.target/powerpc/vector-pair-6.c |  56 +++++++++++++
 6 files changed, 232 insertions(+), 12 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-builtins.def 
b/gcc/config/rs6000/rs6000-builtins.def
index 2bac0e58971..e0b1c744f7c 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -3934,6 +3934,10 @@
   void __builtin_vsx_stxvp (v256, unsigned long, const v256 *);
     STXVP nothing {mma,pair}
 
+;; Vector pair built-in functions.
+  v256 __builtin_vpair_zero ();
+    VPAIR_ZERO vpair_zero {mma}
+
 ;; Vector pair built-in functions with float elements
   v256 __builtin_vpair_f32_abs (v256);
     VPAIR_F32_ABS vpair_abs_v8sf2 {mma}
@@ -3974,6 +3978,9 @@
   v256 __builtin_vpair_f32_nfms (v256, v256, v256);
     VPAIR_F32_NFMS vpair_nfms_v8sf4 {mma}
 
+  v256 __builtin_vpair_f32_splat (float);
+    VPAIR_F32_SPLAT vpair_splat_v8sf {mma}
+
   v256 __builtin_vpair_f32_sub (v256, v256);
     VPAIR_F32_SUB vpair_sub_v8sf3 {mma}
 
@@ -4017,5 +4024,8 @@
   v256 __builtin_vpair_f64_nfms (v256, v256, v256);
     VPAIR_F64_NFMS vpair_nfms_v4df4 {mma}
 
+  v256 __builtin_vpair_f64_splat (double);
+    VPAIR_F64_SPLAT vpair_splat_v4df {mma}
+
   v256 __builtin_vpair_f64_sub (v256, v256);
     VPAIR_F64_SUB vpair_sub_v4df3 {mma}
diff --git a/gcc/config/rs6000/vector-pair.h b/gcc/config/rs6000/vector-pair.h
index e399e89e8e4..3c03e44f3f4 100644
--- a/gcc/config/rs6000/vector-pair.h
+++ b/gcc/config/rs6000/vector-pair.h
@@ -38,11 +38,7 @@ typedef __vector_pair vector_pair_t;
 #define VPAIR_FP_SECOND                "S"             /* Access 2nd VSX 
register.  */
 
 /* vector pair double operations on power10.  */
-#define vpair_f64_splat(R, A)                                          \
-  __asm__ ("xxpermdi %x0,%x1,%x1,0" "\n\t"                             \
-           "xxpermdi %" VPAIR_FP_SECOND "0,%x1,%x1,0"                  \
-           : "=" VPAIR_FP_CONSTRAINT (*R)                              \
-          : "wa" ((A)))
+#define vpair_f64_splat(R, A)  (*R) = __builtin_vpair_f64_splat (A)
 
 #define vpair_f64_neg(R,A)     (*R) = __builtin_vpair_f64_neg (*A)
 #define vpair_f64_abs(R,A)     (*R) = __builtin_vpair_f64_abs (*A)
@@ -63,12 +59,7 @@ typedef __vector_pair vector_pair_t;
 
 
 /* vector pair float operations on power10.  */
-#define vpair_f32_splat(R, A)                                          \
-  __asm__ ("xscvdpspn %x0,%x1" "\n\t"                                  \
-           "xxspltw %x0,%x0,0" "\n\t"                                  \
-           "xxlor %" VPAIR_FP_SECOND "0,%x0,%x0"                       \
-           : "=" VPAIR_FP_CONSTRAINT (*R)                              \
-          : "wa" (((float) (A))))
+#define vpair_f32_splat(R, A)  (*R) = __builtin_vpair_f32_splat (A)
 
 #define vpair_f32_neg(R,A)     (*R) = __builtin_vpair_f32_neg (*A)
 #define vpair_f32_abs(R,A)     (*R) = __builtin_vpair_f32_abs (*A)
diff --git a/gcc/config/rs6000/vector-pair.md b/gcc/config/rs6000/vector-pair.md
index fe8004b75d5..6fbc90cf528 100644
--- a/gcc/config/rs6000/vector-pair.md
+++ b/gcc/config/rs6000/vector-pair.md
@@ -39,7 +39,9 @@
    UNSPEC_VPAIR_PLUS
    UNSPEC_VPAIR_SMAX
    UNSPEC_VPAIR_SMIN
-   UNSPEC_VPAIR_SQRT])
+   UNSPEC_VPAIR_SPLAT
+   UNSPEC_VPAIR_SQRT
+   UNSPEC_VPAIR_ZERO])
 
 ;; Vector pair element ID that defines the scaler element within the vector 
pair.
 (define_c_enum "vpair_element"
@@ -102,6 +104,104 @@
 ;; Map the scalar element ID into the appropriate insn type for divide.
 (define_int_attr vpair_divtype [(VPAIR_ELEMENT_FLOAT  "vecfdiv")
                                (VPAIR_ELEMENT_DOUBLE "vecdiv")])
+
+;; Mode iterator for the vector modes that we provide splat operations for.
+(define_mode_iterator VPAIR_SPLAT_VMODE [V4SF V2DF])
+
+;; Map element mode to 128-bit vector mode for splat operations
+(define_mode_attr VPAIR_SPLAT_ELEMENT_TO_VMODE [(SF "V4SF")
+                                               (DF "V2DF")])
+
+;; Map either element mode or vector mode into the name for the splat insn.
+(define_mode_attr vpair_splat_name [(SF   "v8sf")
+                                   (DF   "v4df")
+                                   (V4SF "v8sf")
+                                   (V2DF "v4df")])
+
+;; Initialize a vector pair to 0
+(define_insn_and_split "vpair_zero"
+  [(set (match_operand:OO 0 "vsx_register_operand" "=wa")
+       (unspec:OO [(const_int 0)] UNSPEC_VPAIR_ZERO))]
+  "TARGET_MMA"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 1) (match_dup 3))
+   (set (match_dup 2) (match_dup 3))]
+{
+  rtx op0 = operands[0];
+
+  operands[1] = simplify_gen_subreg (V2DFmode, op0, OOmode, 0);
+  operands[2] = simplify_gen_subreg (V2DFmode, op0, OOmode, 16);
+  operands[3] = CONST0_RTX (V2DFmode);
+}
+  [(set_attr "length" "8")
+   (set_attr "type" "vecperm")])
+
+;; Create a vector pair with a value splat'ed (duplicated) to all of the
+;; elements.
+(define_expand "vpair_splat_<vpair_splat_name>"
+  [(use (match_operand:OO 0 "vsx_register_operand"))
+   (use (match_operand:SFDF 1 "input_operand"))]
+  "TARGET_MMA"
+{
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+  machine_mode element_mode = <MODE>mode;
+
+  if (op1 == CONST0_RTX (element_mode))
+    {
+      emit_insn (gen_vpair_zero (op0));
+      DONE;
+    }
+
+  machine_mode vector_mode = <VPAIR_SPLAT_ELEMENT_TO_VMODE>mode;
+  rtx vec = gen_reg_rtx (vector_mode);
+  unsigned num_elements = GET_MODE_NUNITS (vector_mode);
+  rtvec elements = rtvec_alloc (num_elements);
+  for (size_t i = 0; i < num_elements; i++)
+    RTVEC_ELT (elements, i) = copy_rtx (op1);
+
+  rs6000_expand_vector_init (vec, gen_rtx_PARALLEL (vector_mode, elements));
+  emit_insn (gen_vpair_splat_<vpair_splat_name>_internal (op0, vec));
+  DONE;
+})
+
+;; Inner splat support.  Operand1 is the vector splat created above.  Allow
+;; operand 1 to overlap with the output registers to eliminate one move
+;; instruction.
+(define_insn_and_split "vpair_splat_<vpair_splat_name>_internal"
+  [(set (match_operand:OO 0 "vsx_register_operand" "=wa,wa")
+       (unspec:OO
+        [(match_operand:VPAIR_SPLAT_VMODE 1 "vsx_register_operand" "0,wa")]
+        UNSPEC_VPAIR_SPLAT))]
+  "TARGET_MMA"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx op0 = operands[0];
+  rtx op0_a = simplify_gen_subreg (<MODE>mode, op0, OOmode, 0);
+  rtx op0_b = simplify_gen_subreg (<MODE>mode, op0, OOmode, 16);
+  rtx op1 = operands[1];
+  unsigned op1_regno = reg_or_subregno (op1);
+
+  /* Check if the input is one of the output registers.  */
+  if (op1_regno == reg_or_subregno (op0_a))
+    emit_move_insn (op0_b, op1);
+
+  else if (op1_regno == reg_or_subregno (op0_b))
+    emit_move_insn (op0_a, op1);
+
+  else
+    {
+      emit_move_insn (op0_a, op1);
+      emit_move_insn (op0_b, op1);
+    }
+
+  DONE;
+}
+  [(set_attr "length" "*,8")
+   (set_attr "type" "vecmove")])
 
 ;; Vector pair unary operations.  The last argument in the UNSPEC is a
 ;; CONST_INT which identifies what the scalar element is.
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index a54f1194378..0163a420c7f 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -24273,6 +24273,13 @@ The @code{nfma} built-in is a combination of 
@code{neg} of the
 The @code{nfms} built-in is a combination of @code{neg} of the
 @code{fms} built-in.
 
+The following built-in function is independent on the type of the
+underlying vector:
+
+@smallexample
+__vector_pair __builtin_vpair_zero ();
+@end smallexample
+
 The following built-in functions operate on pairs of
 @code{vector float} values:
 
@@ -24293,6 +24300,7 @@ __vector_pair __builtin_vpair_f32_nfma (__vector_pair, 
__vector_pair,
                                        __vector_pair);
 __vector_pair __builtin_vpair_f32_nfms (__vector_pair, __vector_pair,
                                        __vector_pair);
+__vector_pair __builtin_vpair_f32_splat (float);
 __vector_pair __builtin_vpair_f32_sub (__vector_pair, __vector_pair);
 @end smallexample
 
@@ -24316,6 +24324,7 @@ __vector_pair __builtin_vpair_f64_nfma (__vector_pair, 
__vector_pair,
                                        __vector_pair);
 __vector_pair __builtin_vpair_f64_nfms (__vector_pair, __vector_pair,
                                        __vector_pair);
+__vector_pair __builtin_vpair_f64_splat (double);
 __vector_pair __builtin_vpair_f64_sub (__vector_pair, __vector_pair);
 @end smallexample
 
diff --git a/gcc/testsuite/gcc.target/powerpc/vector-pair-5.c 
b/gcc/testsuite/gcc.target/powerpc/vector-pair-5.c
new file mode 100644
index 00000000000..f90ad5c2e1d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vector-pair-5.c
@@ -0,0 +1,54 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+/* Test whether the vector builtin code generates the expected instructions for
+   vector pairs zero and splat functions for vector pairs containing
+   doubles.  */
+
+void
+test_zero (__vector_pair *p)
+{
+  /* 2 xxspltib/xxlxor.  */
+  *p = __builtin_vpair_zero ();
+}
+
+void
+test_splat_zero (__vector_pair *p)
+{
+  /* 2 xxspltib/xxlxor.  */
+  *p = __builtin_vpair_f64_splat (0.0);
+}
+
+void
+test_splat_one (__vector_pair *p)
+{
+  /* xxspltidp, xxlor.  */
+  *p = __builtin_vpair_f64_splat (1.0);
+}
+
+void
+test_splat_pi (__vector_pair *p)
+{
+  /* plxv, xxlor (64-bit) or lfd, xxpermdi, xxlor (32-bit).  */
+  *p = __builtin_vpair_f64_splat (3.1415926535);
+}
+
+void
+test_splat_arg (__vector_pair *p, double x)
+{
+  /* xxpermdi, xxlor.  */
+  *p = __builtin_vpair_f64_splat (x);
+}
+
+void
+test_splat_mem (__vector_pair *p, double *q)
+{
+  /* lxvdsx, xxlor.  */
+  *p = __builtin_vpair_f64_splat (*q);
+}
+
+/* { dg-final { scan-assembler-times {\mlxvdsx\M}              1 } } */
+/* { dg-final { scan-assembler-times {\mstxvp\M}               6 } } */
+/* { dg-final { scan-assembler-times {\mxxspltib\M|\mxxlxor\M} 4 } } */
+/* { dg-final { scan-assembler-times {\mxxspltidp\M}           1 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/vector-pair-6.c 
b/gcc/testsuite/gcc.target/powerpc/vector-pair-6.c
new file mode 100644
index 00000000000..5ec53d4bfc3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vector-pair-6.c
@@ -0,0 +1,56 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+/* Test whether the vector builtin code generates the expected instructions for
+   vector pairs zero and splat functions for vector pairs containing
+   floats.  */
+
+void
+test_zero (__vector_pair *p)
+{
+  /* 2 xxspltib/xxlxor.  */
+  *p = __builtin_vpair_zero ();
+}
+
+void
+test_splat_zero (__vector_pair *p)
+{
+  /* 2 xxspltib/xxlxor.  */
+  *p = __builtin_vpair_f32_splat (0.0f);
+}
+
+void
+test_splat_one (__vector_pair *p)
+{
+  /* xxspltiw, xxlor.  */
+  *p = __builtin_vpair_f32_splat (1.0f);
+}
+
+void
+test_splat_pi (__vector_pair *p)
+{
+  /* xxspltiw, xxlor.  */
+  *p = __builtin_vpair_f32_splat (3.1415926535f);
+}
+
+void
+test_splat_arg (__vector_pair *p, float x)
+{
+  /* xscvdpspn, xxspltw, xxlor.  */
+  *p = __builtin_vpair_f32_splat (x);
+}
+
+void
+test_splat_mem (__vector_pair *p, float *q)
+{
+  /* xlvwsx, xxlor.  */
+  *p = __builtin_vpair_f32_splat (*q);
+}
+
+/* { dg-final { scan-assembler-times {\mlxvwsx\M}              1 } } */
+/* { dg-final { scan-assembler-times {\mstxvp\M}               6 } } */
+/* { dg-final { scan-assembler-times {\mxscvdpspn\M}           1 } } */
+/* { dg-final { scan-assembler-times {\mxxspltib\M|\mxxlxor\M} 4 } } */
+/* { dg-final { scan-assembler-times {\mxxspltiw\M}            2 } } */
+/* { dg-final { scan-assembler-times {\mxxspltw\M}             1 } } */

[gcc(refs/users/meissner/heads/work177-vpair)] Add vector pair init and splat.

Reply via email to