Re: [PATCH 5/6 ver 2] rs6000, Add vector splat builtin support

will schmidt via Gcc-patches Tue, 16 Jun 2020 13:22:22 -0700

On Mon, 2020-06-15 at 16:38 -0700, Carl Love via Gcc-patches wrote:
> v2 changes:  
> 
>     change log fixes
> 
>     gcc/config/rs6000/altivec changed name of define_insn and define_expand
>     for vxxspltiw... to xxspltiw...   Fixed spaces in 
> gen_xxsplti32dx_v4sf_inst (operands[0], GEN_INT
> 
>     gcc/rs6000-builtin.def propagated name changes above where they are used.
> 
>     Updated definition for S32bit_cint_operand, c32bit_cint_operand,
>     f32bit_const_operand predicate definitions.
> 
>     Changed name of rs6000_constF32toI32 to rs6000_const_f32_to_i32, 
> propagated
>     name change as needed.  Replaced if test with gcc_assert().
> 
>     Fixed description of vec_splatid() in documentation.


Probably easier to read if you place the 'changes from previous patch' 
below the patch introduction.


> -----------------------
> 
> GCC maintainers:
> 
> The following patch adds support for the vec_splati, vec_splatid and
> vec_splati_ins builtins.
> 
> Note, this patch adds support for instructions that take a 32-bit immediate
> value that represents a floating point value.  This support adds new
> predicates and a support function to properly handle the immediate value.

s/Note//.

> 
> The patch has been compiled and tested on
> 
>   powerpc64le-unknown-linux-gnu (Power 9 LE)
> 
> with no regression errors.
> 
> The test case was compiled on a Power 9 system and then tested on
> Mambo.
> 
> Please let me know if this patch is acceptable for the pu
> branch.  Thanks.

Which branch?



> 
>                          Carl Love
> --------------------------------------------------------
> gcc/ChangeLog
> 
> 2020-06-15  Carl Love  <c...@us.ibm.com>
> 
>         * config/rs6000/altivec.h (vec_splati, vec_splatid, vec_splati_ins):
>       Add defines.
>         * config/rs6000/altivec.md (UNSPEC_XXSPLTIW, UNSPEC_XXSPLTID,
>       UNSPEC_XXSPLTI32DX): New.
>         (vxxspltiw_v4si, vxxspltiw_v4sf_inst, vxxspltidp_v2df_inst,
>       vxxsplti32dx_v4si_inst, vxxsplti32dx_v4sf_inst): New define_insn.
>         (vxxspltiw_v4sf, vxxspltidp_v2df, vxxsplti32dx_v4si,
>       vxxsplti32dx_v4sf.): New define_expands.
>         * config/rs6000/predicates (u1bit_cint_operand,
>         s32bit_cint_operand, c32bit_cint_operand,
>       f32bit_const_operand): New predicates.
>         * config/rs6000/rs6000-builtin.def (VXXSPLTIW_V4SI, VXXSPLTIW_V4SF,
>        VXXSPLTID): NewBU_FUTURE_V_1 definitions.

NewBU_FUTURE_V_1 ? 


>         (VXXSPLTI32DX_V4SI, VXXSPLTI32DX_V4SF): New BU_FUTURE_V_3
>       definitions.
>         (XXSPLTIW, XXSPLTID): New BU_FUTURE_OVERLOAD_1 definitions.

seems to be a typo

>         (XXSPLTI32DX): Add BU_FUTURE_OVERLOAD_3 definition.

>         * config/rs6000/rs6000-call.c (FUTURE_BUILTIN_VEC_XXSPLTIW,
>       FUTURE_BUILTIN_VEC_XXSPLTID, FUTURE_BUILTIN_VEC_XXSPLTI32DX):
>       New definitions.
>         * config/rs6000/rs6000-protos.h (rs6000_constF32toI32): New extern
>       declaration.
>         * config/rs6000/rs6000.c (rs6000_constF32toI32): New function.
>         * config/doc/extend.texi: Add documentation for vec_splati,
>       vec_splatid, and vec_splati_ins.

tabs/spaces.



THanks,
-WIll


> gcc/testsuite/ChangeLog
> 
> 2020-06-15  Carl Love  <c...@us.ibm.com>
> 
>         * testsuite/gcc.target/powerpc/vec-splati-runnable: New test.
> ---
>  gcc/config/rs6000/altivec.h                   |   3 +
>  gcc/config/rs6000/altivec.md                  | 109 +++++++++++++
>  gcc/config/rs6000/predicates.md               |  33 ++++
>  gcc/config/rs6000/rs6000-builtin.def          |  13 ++
>  gcc/config/rs6000/rs6000-call.c               |  19 +++
>  gcc/config/rs6000/rs6000-protos.h             |   1 +
>  gcc/config/rs6000/rs6000.c                    |  11 ++
>  gcc/doc/extend.texi                           |  35 +++++
>  .../gcc.target/powerpc/vec-splati-runnable.c  | 145 ++++++++++++++++++
>  9 files changed, 369 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-splati-runnable.c
> 
> diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h
> index 0be68892aad..9ed41b1cbf1 100644
> --- a/gcc/config/rs6000/altivec.h
> +++ b/gcc/config/rs6000/altivec.h
> @@ -705,6 +705,9 @@ __altivec_scalar_pred(vec_any_nle,
>  #define vec_replace_unaligned(a, b, c) __builtin_vec_replace_un (a, b, c)
>  #define vec_sldb(a, b, c)      __builtin_vec_sldb (a, b, c)
>  #define vec_srdb(a, b, c)      __builtin_vec_srdb (a, b, c)
> +#define vec_splati(a)  __builtin_vec_xxspltiw (a)
> +#define vec_splatid(a) __builtin_vec_xxspltid (a)
> +#define vec_splati_ins(a, b, c)        __builtin_vec_xxsplti32dx (a, b, c)
> 
>  #define vec_gnb(a, b)        __builtin_vec_gnb (a, b)
>  #define vec_clrl(a, b)       __builtin_vec_clrl (a, b)
> diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
> index 832a35cdaa9..25f6b9b2f07 100644
> --- a/gcc/config/rs6000/altivec.md
> +++ b/gcc/config/rs6000/altivec.md
> @@ -173,6 +173,9 @@
>     UNSPEC_VSTRIL
>     UNSPEC_SLDB
>     UNSPEC_SRDB
> +   UNSPEC_XXSPLTIW
> +   UNSPEC_XXSPLTID
> +   UNSPEC_XXSPLTI32DX
>  ])
> 
>  (define_c_enum "unspecv"
> @@ -799,6 +802,112 @@
>    "vs<SLDB_LR>dbi %0,%1,%2,%3"
>    [(set_attr "type" "vecsimple")])
> 
> +(define_insn "xxspltiw_v4si"
> +  [(set (match_operand:V4SI 0 "register_operand" "=wa")
> +     (unspec:V4SI [(match_operand:SI 1 "s32bit_cint_operand" "n")]
> +                  UNSPEC_XXSPLTIW))]
> + "TARGET_FUTURE"
> + "xxspltiw %x0,%1"
> + [(set_attr "type" "vecsimple")])
> +
> +(define_expand "xxspltiw_v4sf"
> +  [(set (match_operand:V4SF 0 "register_operand" "=wa")
> +     (unspec:V4SF [(match_operand:SF 1 "f32bit_const_operand" "n")]
> +                  UNSPEC_XXSPLTIW))]
> + "TARGET_FUTURE"
> +{
> +  long long value = rs6000_const_f32_to_i32 (operands[1]);
> +  emit_insn (gen_xxspltiw_v4sf_inst (operands[0], GEN_INT (value)));
> +  DONE;
> +})
> +
> +(define_insn "xxspltiw_v4sf_inst"
> +  [(set (match_operand:V4SF 0 "register_operand" "=wa")
> +     (unspec:V4SF [(match_operand:SI 1 "c32bit_cint_operand" "n")]
> +                  UNSPEC_XXSPLTIW))]
> + "TARGET_FUTURE"
> + "xxspltiw %x0,%c1"
> + [(set_attr "type" "vecsimple")])
> +
> +(define_expand "xxspltidp_v2df"
> +  [(set (match_operand:V2DF 0 "register_operand" )
> +     (unspec:V2DF [(match_operand:SF 1 "f32bit_const_operand")]
> +                  UNSPEC_XXSPLTID))]
> + "TARGET_FUTURE"
> +{
> +  long value = rs6000_const_f32_to_i32 (operands[1]);
> +  emit_insn (gen_xxspltidp_v2df_inst (operands[0], GEN_INT (value)));
> +  DONE;
> +})
> +
> +(define_insn "xxspltidp_v2df_inst"
> +  [(set (match_operand:V2DF 0 "register_operand" "=wa")
> +     (unspec:V2DF [(match_operand:SI 1 "c32bit_cint_operand" "n")]
> +                  UNSPEC_XXSPLTID))]
> +  "TARGET_FUTURE"
> +  "xxspltidp %x0,%c1"
> +  [(set_attr "type" "vecsimple")])
> +
> +(define_expand "xxsplti32dx_v4si"
> +  [(set (match_operand:V4SI 0 "register_operand" "=wa")
> +     (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "wa")
> +                   (match_operand:QI 2 "u1bit_cint_operand" "n")
> +                   (match_operand:SI 3 "s32bit_cint_operand" "n")]
> +                  UNSPEC_XXSPLTI32DX))]
> + "TARGET_FUTURE"
> +{
> +  int index = INTVAL (operands[2]);
> +
> +  if (!BYTES_BIG_ENDIAN)
> +    index = 1 - index;
> +
> +   /* Instruction uses destination as a source.  Do not overwrite source.  */
> +   emit_move_insn (operands[0], operands[1]);
> +
> +   emit_insn (gen_xxsplti32dx_v4si_inst (operands[0], GEN_INT (index),
> +                                       operands[3]));
> +   DONE;
> +}
> + [(set_attr "type" "vecsimple")])
> +
> +(define_insn "xxsplti32dx_v4si_inst"
> +  [(set (match_operand:V4SI 0 "register_operand" "+wa")
> +     (unspec:V4SI [(match_operand:QI 1 "u1bit_cint_operand" "n")
> +                   (match_operand:SI 2 "s32bit_cint_operand" "n")]
> +                  UNSPEC_XXSPLTI32DX))]
> +  "TARGET_FUTURE"
> +  "xxsplti32dx %x0,%1,%2"
> +  [(set_attr "type" "vecsimple")])
> +
> +(define_expand "xxsplti32dx_v4sf"
> +  [(set (match_operand:V4SF 0 "register_operand" "=wa")
> +     (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "wa")
> +                   (match_operand:QI 2 "u1bit_cint_operand" "n")
> +                   (match_operand:SF 3 "f32bit_const_operand" "n")]
> +                  UNSPEC_XXSPLTI32DX))]
> +  "TARGET_FUTURE"
> +{
> +  int index = INTVAL (operands[2]);
> +  long value = rs6000_const_f32_to_i32 (operands[3]);
> +  if (!BYTES_BIG_ENDIAN)
> +    index = 1 - index;
> +
> +  /* Instruction uses destination as a source.  Do not overwrite source.  */
> +   emit_move_insn (operands[0], operands[1]);
> +   emit_insn (gen_xxsplti32dx_v4sf_inst (operands[0], GEN_INT (index),
> +                                      GEN_INT (value)));
> +   DONE;
> +})
> +
> +(define_insn "xxsplti32dx_v4sf_inst"
> +  [(set (match_operand:V4SF 0 "register_operand" "+wa")
> +     (unspec:V4SF [(match_operand:QI 1 "u1bit_cint_operand" "n")
> +                   (match_operand:SI 2 "s32bit_cint_operand" "n")]
> +                  UNSPEC_XXSPLTI32DX))]
> +  "TARGET_FUTURE"
> +  "xxsplti32dx %x0,%1,%2"
> +   [(set_attr "type" "vecsimple")])
> +
>  (define_expand "vstrir_<mode>"
>    [(set (match_operand:VIshort 0 "altivec_register_operand")
>       (unspec:VIshort [(match_operand:VIshort 1 "altivec_register_operand")]
> diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
> index c3f460face2..48f913c5718 100644
> --- a/gcc/config/rs6000/predicates.md
> +++ b/gcc/config/rs6000/predicates.md
> @@ -214,6 +214,11 @@
>    (and (match_code "const_int")
>         (match_test "INTVAL (op) >= -16 && INTVAL (op) <= 15")))
> 
> +;; Return 1 if op is a unsigned 1-bit constant integer.
> +(define_predicate "u1bit_cint_operand"
> +  (and (match_code "const_int")
> +       (match_test "INTVAL (op) >= 0 && INTVAL (op) <= 1")))
> +
>  ;; Return 1 if op is a unsigned 3-bit constant integer.
>  (define_predicate "u3bit_cint_operand"
>    (and (match_code "const_int")
> @@ -272,6 +277,34 @@
>         (match_test "(unsigned HOST_WIDE_INT)
>                   (INTVAL (op) + 0x8000) >= 0x10000")))
> 
> +;; Return 1 if op is a 32-bit constant signed integer
> +(define_predicate "s32bit_cint_operand"
> +  (and (match_code "const_int")
> +       (match_test "(unsigned HOST_WIDE_INT)
> +                    (0x80000000 + UINTVAL (op)) >> 32 == 0")))
> +
> +;; Return 1 if op is a constant 32-bit unsigned
> +(define_predicate "c32bit_cint_operand"
> +  (and (match_code "const_int")
> +       (match_test "((UINTVAL (op) >> 32) == 0)")))
> +
> +;; Return 1 if op is a constant 32-bit floating point value
> +(define_predicate "f32bit_const_operand"
> +  (match_code "const_double")
> +{
> +  if (GET_MODE (op) == SFmode)
> +    return 1;
> +
> +  else if ((GET_MODE (op) == DFmode) && ((UINTVAL (op) >> 32) == 0))
> +   {
> +    /* Value fits in 32-bits */
> +    return 1;
> +    }
> +  else
> +    /* Not the expected mode.  */
> +    return 0;
> +})
> +
>  ;; Return 1 if op is a positive constant integer that is an exact power of 2.
>  (define_predicate "exact_log2_cint_operand"
>    (and (match_code "const_int")
> diff --git a/gcc/config/rs6000/rs6000-builtin.def 
> b/gcc/config/rs6000/rs6000-builtin.def
> index 2b198177ef0..c85326de7f2 100644
> --- a/gcc/config/rs6000/rs6000-builtin.def
> +++ b/gcc/config/rs6000/rs6000-builtin.def
> @@ -2666,6 +2666,15 @@ BU_FUTURE_V_3 (VSRDB_V16QI, "vsrdb_v16qi", CONST, 
> vsrdb_v16qi)
>  BU_FUTURE_V_3 (VSRDB_V8HI, "vsrdb_v8hi", CONST, vsrdb_v8hi)
>  BU_FUTURE_V_3 (VSRDB_V4SI, "vsrdb_v4si", CONST, vsrdb_v4si)
>  BU_FUTURE_V_3 (VSRDB_V2DI, "vsrdb_v2di", CONST, vsrdb_v2di)
> +
> +BU_FUTURE_V_1 (VXXSPLTIW_V4SI, "vxxspltiw_v4si", CONST, xxspltiw_v4si)
> +BU_FUTURE_V_1 (VXXSPLTIW_V4SF, "vxxspltiw_v4sf", CONST, xxspltiw_v4sf)
> +
> +BU_FUTURE_V_1 (VXXSPLTID, "vxxspltidp", CONST, xxspltidp_v2df)
> +
> +BU_FUTURE_V_3 (VXXSPLTI32DX_V4SI, "vxxsplti32dx_v4si", CONST, 
> xxsplti32dx_v4si)
> +BU_FUTURE_V_3 (VXXSPLTI32DX_V4SF, "vxxsplti32dx_v4sf", CONST, 
> xxsplti32dx_v4sf)
> +
>  BU_FUTURE_V_1 (VSTRIBR, "vstribr", CONST, vstrir_v16qi)
>  BU_FUTURE_V_1 (VSTRIHR, "vstrihr", CONST, vstrir_v8hi)
>  BU_FUTURE_V_1 (VSTRIBL, "vstribl", CONST, vstril_v16qi)
> @@ -2697,6 +2706,10 @@ BU_FUTURE_OVERLOAD_1 (VSTRIL, "stril")
> 
>  BU_FUTURE_OVERLOAD_1 (VSTRIR_P, "strir_p")
>  BU_FUTURE_OVERLOAD_1 (VSTRIL_P, "stril_p")
> +
> +BU_FUTURE_OVERLOAD_1 (XXSPLTIW, "xxspltiw")
> +BU_FUTURE_OVERLOAD_1 (XXSPLTID, "xxspltid")
> +BU_FUTURE_OVERLOAD_3 (XXSPLTI32DX, "xxsplti32dx")
>  
>  /* 1 argument crypto functions.  */
>  BU_CRYPTO_1 (VSBOX,          "vsbox",          CONST, crypto_vsbox_v2di)
> diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
> index 092e6c1cc2c..e36aafaf71c 100644
> --- a/gcc/config/rs6000/rs6000-call.c
> +++ b/gcc/config/rs6000/rs6000-call.c
> @@ -5679,6 +5679,22 @@ const struct altivec_builtin_types 
> altivec_overloaded_builtins[] = {
>      RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
>      RS6000_BTI_unsigned_V2DI, RS6000_BTI_UINTQI },
> 
> +  { FUTURE_BUILTIN_VEC_XXSPLTIW, FUTURE_BUILTIN_VXXSPLTIW_V4SI,
> +    RS6000_BTI_V4SI, RS6000_BTI_INTSI, 0, 0 },
> +  { FUTURE_BUILTIN_VEC_XXSPLTIW, FUTURE_BUILTIN_VXXSPLTIW_V4SF,
> +    RS6000_BTI_V4SF, RS6000_BTI_float, 0, 0 },
> +
> +  { FUTURE_BUILTIN_VEC_XXSPLTID, FUTURE_BUILTIN_VXXSPLTID,
> +    RS6000_BTI_V2DF, RS6000_BTI_float, 0, 0 },
> +
> +  { FUTURE_BUILTIN_VEC_XXSPLTI32DX, FUTURE_BUILTIN_VXXSPLTI32DX_V4SI,
> +    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_UINTQI, RS6000_BTI_INTSI },
> +  { FUTURE_BUILTIN_VEC_XXSPLTI32DX, FUTURE_BUILTIN_VXXSPLTI32DX_V4SI,
> +    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_UINTQI,
> +    RS6000_BTI_UINTSI },
> +  { FUTURE_BUILTIN_VEC_XXSPLTI32DX, FUTURE_BUILTIN_VXXSPLTI32DX_V4SF,
> +    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_UINTQI, RS6000_BTI_float },
> +
>    { FUTURE_BUILTIN_VEC_SRDB, FUTURE_BUILTIN_VSRDB_V16QI,
>      RS6000_BTI_V16QI, RS6000_BTI_V16QI,
>      RS6000_BTI_V16QI, RS6000_BTI_UINTQI },
> @@ -13539,6 +13555,9 @@ builtin_function_type (machine_mode mode_ret, 
> machine_mode mode_arg0,
>      case ALTIVEC_BUILTIN_VSRH:
>      case ALTIVEC_BUILTIN_VSRW:
>      case P8V_BUILTIN_VSRD:
> +    /* Vector splat immediate insert */
> +    case FUTURE_BUILTIN_VXXSPLTI32DX_V4SI:
> +    case FUTURE_BUILTIN_VXXSPLTI32DX_V4SF:
>        h.uns_p[2] = 1;
>        break;
> 
> diff --git a/gcc/config/rs6000/rs6000-protos.h 
> b/gcc/config/rs6000/rs6000-protos.h
> index 5508484ba19..c6158874ce9 100644
> --- a/gcc/config/rs6000/rs6000-protos.h
> +++ b/gcc/config/rs6000/rs6000-protos.h
> @@ -274,6 +274,7 @@ extern void rs6000_asm_output_dwarf_pcrel (FILE *file, 
> int size,
>                                          const char *label);
>  extern void rs6000_asm_output_dwarf_datarel (FILE *file, int size,
>                                            const char *label);
> +extern long long rs6000_const_f32_to_i32 (rtx operand);
> 
>  /* Declare functions in rs6000-c.c */
> 
> diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
> index 58f5d780603..89fcc99df0a 100644
> --- a/gcc/config/rs6000/rs6000.c
> +++ b/gcc/config/rs6000/rs6000.c
> @@ -26494,6 +26494,17 @@ rs6000_cannot_substitute_mem_equiv_p (rtx mem)
>    return false;
>  }
> 
> +long long
> +rs6000_const_f32_to_i32 (rtx operand)
> +{
> +  long long value;
> +  const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (operand);
> +
> +  gcc_assert (GET_MODE (operand) == SFmode);
> +  REAL_VALUE_TO_TARGET_SINGLE (*rv, value);
> +  return value;
> +}
> +
>  struct gcc_target targetm = TARGET_INITIALIZER;
> 
>  #include "gt-rs6000.h"
> diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
> index 6926c866492..dfdffead903 100644
> --- a/gcc/doc/extend.texi
> +++ b/gcc/doc/extend.texi
> @@ -21148,6 +21148,41 @@ this instruction must be endian-aware.
> 
>  @findex vec_srdb
> 
> +Vector Splat
> +
> +@smallexample
> +@exdent vector signed int vec_splati (const signed int);
> +@exdent vector float vec_splati (const float);
> +@end smallexample
> +
> +Splat a 32-bit immediate into a vector of words.
> +
> +@findex vec_splati
> +
> +@smallexample
> +@exdent vector double vec_splatid (const float);
> +@end smallexample
> +
> +Convert a single precision floating-point value to double-precision and splat
> +the result to a vector of double-precision floats.
> +
> +@findex vec_splatid
> +
> +@smallexample
> +@exdent vector signed int vec_splati_ins (vector signed int,
> +const unsigned int, const signed int);
> +@exdent vector unsigned int vec_splati_ins (vector unsigned int,
> +const unsigned int, const unsigned int);
> +@exdent vector float vec_splati_ins (vector float, const unsigned int,
> +const float);
> +@end smallexample
> +
> +Argument 2 must be either 0 or 1.  Splat the value of argument 3 into the 
> word
> +identified by argument 2 of each doubleword of argument 1 and return the
> +result.  The other words of argument 1 are unchanged.
> +
> +@findex vec_splati_ins
> +
>  @smallexample
>  @exdent vector unsigned long long int
>  @exdent vec_pdep (vector unsigned long long int, vector unsigned long long 
> int)
> diff --git a/gcc/testsuite/gcc.target/powerpc/vec-splati-runnable.c 
> b/gcc/testsuite/gcc.target/powerpc/vec-splati-runnable.c
> new file mode 100644
> index 00000000000..f9fa55ae0d4
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/vec-splati-runnable.c
> @@ -0,0 +1,145 @@
> +/* { dg-do run } */
> +/* { dg-require-effective-target powerpc_future_hw } */
> +/* { dg-options "-mdejagnu-cpu=future" } */
> +#include <altivec.h>
> +
> +#define DEBUG 0
> +
> +#ifdef DEBUG
> +#include <stdio.h>
> +#endif
> +
> +extern void abort (void);
> +
> +int
> +main (int argc, char *argv [])
> +{
> +  int i;
> +  vector int vsrc_a_int;
> +  vector int vresult_int;
> +  vector int expected_vresult_int;
> +  int src_a_int = 13;
> +
> +  vector unsigned int vsrc_a_uint;
> +  vector unsigned int vresult_uint;
> +  vector unsigned int expected_vresult_uint;
> +  unsigned int src_a_uint = 7;
> +
> +  vector float vresult_f;
> +  vector float expected_vresult_f;
> +  vector float vsrc_a_f;
> +  float src_a_f = 23.0;
> +
> +  vector double vsrc_a_d;
> +  vector double vresult_d;
> +  vector double expected_vresult_d;
> + 
> +  /* Vector splati word */
> +  vresult_int = (vector signed int) { 1, 2, 3, 4 };
> +  expected_vresult_int = (vector signed int) { -13, -13, -13, -13 }; 
> +                                              
> +  vresult_int = vec_splati ( -13 );
> +
> +  if (!vec_all_eq (vresult_int,  expected_vresult_int)) {
> +#if DEBUG
> +    printf("ERROR, vec_splati (src_a_int)\n");
> +    for(i = 0; i < 4; i++)
> +      printf(" vresult_int[%d] = %d, expected_vresult_int[%d] = %d\n",
> +          i, vresult_int[i], i, expected_vresult_int[i]);
> +#else
> +    abort();
> +#endif
> +  }
> +
> +  vresult_f = (vector float) { 1.0, 2.0, 3.0, 4.0 };
> +  expected_vresult_f = (vector float) { 23.0, 23.0, 23.0, 23.0 };
> +                                              
> +  vresult_f = vec_splati (23.0f);
> +
> +  if (!vec_all_eq (vresult_f,  expected_vresult_f)) {
> +#if DEBUG
> +    printf("ERROR, vec_splati (src_a_f)\n");
> +    for(i = 0; i < 4; i++)
> +      printf(" vresult_f[%d] = %f, expected_vresult_f[%d] = %f\n",
> +          i, vresult_f[i], i, expected_vresult_f[i]);
> +#else
> +    abort();
> +#endif
> +  }
> +
> +  /* Vector splati double */
> +  vresult_d = (vector double) { 2.0, 3.0 };
> +  expected_vresult_d = (vector double) { -31.0, -31.0 };
> +                                              
> +  vresult_d = vec_splatid (-31.0f);
> +
> +  if (!vec_all_eq (vresult_d,  expected_vresult_d)) {
> +#if DEBUG
> +    printf("ERROR, vec_splati (-31.0f)\n");
> +    for(i = 0; i < 2; i++)
> +      printf(" vresult_d[%i] = %f, expected_vresult_d[%i] = %f\n",
> +          i, vresult_d[i], i, expected_vresult_d[i]);
> +#else
> +    abort();
> +#endif
> +  }
> +
> +  /* Vector splat immediate */
> +  vsrc_a_int = (vector int) { 2, 3, 4, 5 };
> +  vresult_int = (vector int) { 1, 1, 1, 1 };
> +  expected_vresult_int = (vector int) { 2, 20, 4, 20 };
> +                                              
> +  vresult_int = vec_splati_ins (vsrc_a_int, 1, 20);
> +
> +  if (!vec_all_eq (vresult_int,  expected_vresult_int)) {
> +#if DEBUG
> +    printf("ERROR, vec_splati_ins (vsrc_a_int, 1, 20)\n");
> +    for(i = 0; i < 4; i++)
> +      printf(" vresult_int[%i] = %d, expected_vresult_int[%i] = %d\n",
> +          i, vresult_int[i], i, expected_vresult_int[i]);
> +#else
> +    abort();
> +#endif
> +  }
> +  
> +  vsrc_a_uint = (vector unsigned int) { 4, 5, 6, 7 };
> +  vresult_uint = (vector unsigned int) { 1, 1, 1, 1 };
> +  expected_vresult_uint = (vector unsigned int) { 4, 40, 6, 40 };
> +                                              
> +  vresult_uint = vec_splati_ins (vsrc_a_uint, 1, 40);
> +
> +  if (!vec_all_eq (vresult_uint,  expected_vresult_uint)) {
> +#if DEBUG
> +    printf("ERROR, vec_splati_ins (vsrc_a_uint, 1, 40)\n");
> +    for(i = 0; i < 4; i++)
> +      printf(" vresult_uint[%i] = %d, expected_vresult_uint[%i] = %d\n",
> +          i, vresult_uint[i], i, expected_vresult_uint[i]);
> +#else
> +    abort();
> +#endif
> +  }
> +  
> +  vsrc_a_f = (vector float) { 2.0, 3.0, 4.0, 5.0 };
> +  vresult_f = (vector float) { 1.0, 1.0, 1.0, 1.0 };
> +  expected_vresult_f = (vector float) { 2.0, 20.1, 4.0, 20.1 };
> +                                              
> +  vresult_f = vec_splati_ins (vsrc_a_f, 1, 20.1f);
> +
> +  if (!vec_all_eq (vresult_f,  expected_vresult_f)) {
> +#if DEBUG
> +    printf("ERROR, vec_splati_ins (vsrc_a_f, 1, 20.1)\n");
> +    for(i = 0; i < 4; i++)
> +      printf(" vresult_f[%i] = %f, expected_vresult_f[%i] = %f\n",
> +          i, vresult_f[i], i, expected_vresult_f[i]);
> +#else
> +    abort();
> +#endif
> +  }
> +  
> +  return 0;
> +}
> +
> +/* { dg-final { scan-assembler-times {\msplati\M} 6 } } */
> +/* { dg-final { scan-assembler-times {\msrdbi\M} 6 } } */
> +
> +

Re: [PATCH 5/6 ver 2] rs6000, Add vector splat builtin support

Reply via email to