On Mon, 2020-06-15 at 16:38 -0700, Carl Love via Gcc-patches wrote: > v2 changes: > > Updated ChangeLog per comments. > > define_expand "xxpermx", Updated implementation to use XOR
Unclear.. Updated implementation of the xxpermx ? > (icode == CODE_FOR_xxpermx, fix comments and check for 3-bit immediate > field. Unclear. > > gcc/doc/extend.texi: > comment "Maybe it should say it is related to vsel/xxsel, but per > bigger element?", added comment. I took the description directly > from spec. Don't really don't want to mess with the approved > description. > > fixed typo for Vector Permute Extendedextracth > > ---------- > > GCC maintainers: > > The following patch adds support for the vec_blendv and vec_permx > builtins. > > The patch has been compiled and tested on > > powerpc64le-unknown-linux-gnu (Power 9 LE) > > with no regression errors. > > The test cases were compiled on a Power 9 system and then tested on > Mambo. > > Carl Love > > --------------------------------------------------------------- > rs6000 RFC2609 vector blend, permute instructions > > gcc/ChangeLog > > 2020-06-15 Carl Love <c...@us.ibm.com> > > * config/rs6000/altivec.h (vec_blendv, vec_permx): Add define. > * config/rs6000/altivec.md (UNSPEC_XXBLEND, UNSPEC_XXPERMX.): New > unspecs. > (VM3): New define_mode. > (VM3_char): New define_attr. > (xxblend_<mode> mode VM3): New define_insn. > (xxpermx): New define_expand. > (xxpermx_inst): New define_insn. > * config/rs6000/rs6000-builtin.def (VXXBLEND_V16QI, VXXBLEND_V8HI, > VXXBLEND_V4SI, VXXBLEND_V2DI, VXXBLEND_V4SF, VXXBLEND_V2DF): New > BU_FUTURE_V_3 definitions. > (XXBLENDBU_FUTURE_OVERLOAD_3): New BU_FUTURE_OVERLOAD_3 definition. > (XXPERMX): New BU_FUTURE_OVERLOAD_4 definition. > * config/rs6000/rs6000-c.c (altivec_resolve_overloaded_builtin): > (FUTURE_BUILTIN_VXXPERMX): Add if case support. > * config/rs6000/rs6000-call.c (FUTURE_BUILTIN_VXXBLEND_V16QI, > FUTURE_BUILTIN_VXXBLEND_V8HI, FUTURE_BUILTIN_VXXBLEND_V4SI, > FUTURE_BUILTIN_VXXBLEND_V2DI, FUTURE_BUILTIN_VXXBLEND_V4SF, > FUTURE_BUILTIN_VXXBLEND_V2DF, FUTURE_BUILTIN_VXXPERMX): Define > overloaded arguments. > (rs6000_expand_quaternop_builtin): Add if case for CODE_FOR_xxpermx. > (builtin_quaternary_function_type): Add v16uqi_type and xxpermx_type > variables, add case statement for FUTURE_BUILTIN_VXXPERMX. > (builtin_function_type)[FUTURE_BUILTIN_VXXBLEND_V16QI, > FUTURE_BUILTIN_VXXBLEND_V8HI, FUTURE_BUILTIN_VXXBLEND_V4SI, > FUTURE_BUILTIN_VXXBLEND_V2DI]: Add case statements. > * doc/extend.texi: Add documentation for vec_blendv and vec_permx. > tabs/spaces > gcc/testsuite/ChangeLog > > 2020-06-15 Carl Love <c...@us.ibm.com> > gcc.target/powerpc/vec-blend-runnable.c: New test. > gcc.target/powerpc/vec-permute-ext-runnable.c: New test. > --- > gcc/config/rs6000/altivec.h | 2 + > gcc/config/rs6000/altivec.md | 71 +++++ > gcc/config/rs6000/rs6000-builtin.def | 13 + > gcc/config/rs6000/rs6000-c.c | 25 +- > gcc/config/rs6000/rs6000-call.c | 94 ++++++ > gcc/doc/extend.texi | 63 ++++ > .../gcc.target/powerpc/vec-blend-runnable.c | 276 ++++++++++++++++ > .../powerpc/vec-permute-ext-runnable.c | 294 ++++++++++++++++++ > 8 files changed, 833 insertions(+), 5 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-blend-runnable.c > create mode 100644 > gcc/testsuite/gcc.target/powerpc/vec-permute-ext-runnable.c > > diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h > index 9ed41b1cbf1..1b532effebe 100644 > --- a/gcc/config/rs6000/altivec.h > +++ b/gcc/config/rs6000/altivec.h > @@ -708,6 +708,8 @@ __altivec_scalar_pred(vec_any_nle, > #define vec_splati(a) __builtin_vec_xxspltiw (a) > #define vec_splatid(a) __builtin_vec_xxspltid (a) > #define vec_splati_ins(a, b, c) __builtin_vec_xxsplti32dx (a, b, c) > +#define vec_blendv(a, b, c) __builtin_vec_xxblend (a, b, c) > +#define vec_permx(a, b, c, d) __builtin_vec_xxpermx (a, b, c, d) > > #define vec_gnb(a, b) __builtin_vec_gnb (a, b) > #define vec_clrl(a, b) __builtin_vec_clrl (a, b) ok > diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md > index 25f6b9b2f07..fd221bb21f6 100644 > --- a/gcc/config/rs6000/altivec.md > +++ b/gcc/config/rs6000/altivec.md > @@ -176,6 +176,8 @@ > UNSPEC_XXSPLTIW > UNSPEC_XXSPLTID > UNSPEC_XXSPLTI32DX > + UNSPEC_XXBLEND > + UNSPEC_XXPERMX > ]) > > (define_c_enum "unspecv" > @@ -218,6 +220,21 @@ > (KF "FLOAT128_VECTOR_P (KFmode)") > (TF "FLOAT128_VECTOR_P (TFmode)")]) > > +;; Like VM2, just do char, short, int, long, float and double > +(define_mode_iterator VM3 [V4SI > + V8HI > + V16QI > + V4SF > + V2DF > + V2DI]) > + > +(define_mode_attr VM3_char [(V2DI "d") > + (V4SI "w") > + (V8HI "h") > + (V16QI "b") > + (V2DF "d") > + (V4SF "w")]) spaces/tabs > + > ;; Map the Vector convert single precision to double precision for integer > ;; versus floating point > (define_mode_attr VS_sxwsp [(V4SI "sxw") (V4SF "sp")]) > @@ -908,6 +925,60 @@ > "xxsplti32dx %x0,%1,%2" > [(set_attr "type" "vecsimple")]) > > +(define_insn "xxblend_<mode>" > + [(set (match_operand:VM3 0 "register_operand" "=wa") > + (unspec:VM3 [(match_operand:VM3 1 "register_operand" "wa") > + (match_operand:VM3 2 "register_operand" "wa") > + (match_operand:VM3 3 "register_operand" "wa")] > + UNSPEC_XXBLEND))] > + "TARGET_FUTURE" > + "xxblendv<VM3_char> %x0,%x1,%x2,%x3" > + [(set_attr "type" "vecsimple")]) > + > +(define_expand "xxpermx" > + [(set (match_operand:V2DI 0 "register_operand" "+wa") > + (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "wa") > + (match_operand:V2DI 2 "register_operand" "wa") > + (match_operand:V16QI 3 "register_operand" "wa") > + (match_operand:QI 4 "u8bit_cint_operand" "n")] > + UNSPEC_XXPERMX))] > + "TARGET_FUTURE" > +{ > + if (BYTES_BIG_ENDIAN) > + emit_insn (gen_xxpermx_inst (operands[0], operands[1], > + operands[2], operands[3], > + operands[4])); > + else > + { > + /* Reverse value of byte element index eidx by XORing with 0xFF. > + Reverse the 32-byte section identifier match by subracting bits [0:2] > + of elemet from 7. */ eidx,elemet typos > + int value = INTVAL (operands[4]); > + rtx vreg = gen_reg_rtx (V16QImode); > + > + emit_insn (gen_xxspltib_v16qi (vreg, GEN_INT (-1))); > + emit_insn (gen_xorv16qi3 (operands[3], operands[3], vreg)); > + value = 7 - value; > + emit_insn (gen_xxpermx_inst (operands[0], operands[2], > + operands[1], operands[3], > + GEN_INT (value))); > + } > + > + DONE; > +} > + [(set_attr "type" "vecsimple")]) > + > +(define_insn "xxpermx_inst" > + [(set (match_operand:V2DI 0 "register_operand" "+v") > + (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "v") > + (match_operand:V2DI 2 "register_operand" "v") > + (match_operand:V16QI 3 "register_operand" "v") > + (match_operand:QI 4 "u3bit_cint_operand" "n")] > + UNSPEC_XXPERMX))] > + "TARGET_FUTURE" > + "xxpermx %x0,%x1,%x2,%x3,%4" > + [(set_attr "type" "vecsimple")]) > + > (define_expand "vstrir_<mode>" > [(set (match_operand:VIshort 0 "altivec_register_operand") > (unspec:VIshort [(match_operand:VIshort 1 "altivec_register_operand")] ok > diff --git a/gcc/config/rs6000/rs6000-builtin.def > b/gcc/config/rs6000/rs6000-builtin.def > index c85326de7f2..d1d04f013bb 100644 > --- a/gcc/config/rs6000/rs6000-builtin.def > +++ b/gcc/config/rs6000/rs6000-builtin.def > @@ -2675,6 +2675,15 @@ BU_FUTURE_V_1 (VXXSPLTID, "vxxspltidp", CONST, > xxspltidp_v2df) > BU_FUTURE_V_3 (VXXSPLTI32DX_V4SI, "vxxsplti32dx_v4si", CONST, > xxsplti32dx_v4si) > BU_FUTURE_V_3 (VXXSPLTI32DX_V4SF, "vxxsplti32dx_v4sf", CONST, > xxsplti32dx_v4sf) > > +BU_FUTURE_V_3 (VXXBLEND_V16QI, "xxblend_v16qi", CONST, xxblend_v16qi) > +BU_FUTURE_V_3 (VXXBLEND_V8HI, "xxblend_v8hi", CONST, xxblend_v8hi) > +BU_FUTURE_V_3 (VXXBLEND_V4SI, "xxblend_v4si", CONST, xxblend_v4si) > +BU_FUTURE_V_3 (VXXBLEND_V2DI, "xxblend_v2di", CONST, xxblend_v2di) > +BU_FUTURE_V_3 (VXXBLEND_V4SF, "xxblend_v4sf", CONST, xxblend_v4sf) > +BU_FUTURE_V_3 (VXXBLEND_V2DF, "xxblend_v2df", CONST, xxblend_v2df) > + > +BU_FUTURE_V_4 (VXXPERMX, "xxpermx", CONST, xxpermx) > + > BU_FUTURE_V_1 (VSTRIBR, "vstribr", CONST, vstrir_v16qi) > BU_FUTURE_V_1 (VSTRIHR, "vstrihr", CONST, vstrir_v8hi) > BU_FUTURE_V_1 (VSTRIBL, "vstribl", CONST, vstril_v16qi) > @@ -2710,6 +2719,10 @@ BU_FUTURE_OVERLOAD_1 (VSTRIL_P, "stril_p") > BU_FUTURE_OVERLOAD_1 (XXSPLTIW, "xxspltiw") > BU_FUTURE_OVERLOAD_1 (XXSPLTID, "xxspltid") > BU_FUTURE_OVERLOAD_3 (XXSPLTI32DX, "xxsplti32dx") > + > +BU_FUTURE_OVERLOAD_3 (XXBLEND, "xxblend") > +BU_FUTURE_OVERLOAD_4 (XXPERMX, "xxpermx") > + > > /* 1 argument crypto functions. */ > BU_CRYPTO_1 (VSBOX, "vsbox", CONST, crypto_vsbox_v2di) ok > diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c > index 07ca33a89b4..88bbc52de6b 100644 > --- a/gcc/config/rs6000/rs6000-c.c > +++ b/gcc/config/rs6000/rs6000-c.c > @@ -1796,22 +1796,37 @@ altivec_resolve_overloaded_builtin (location_t loc, > tree fndecl, > unsupported_builtin = true; > } > } > - else if (fcode == FUTURE_BUILTIN_VEC_XXEVAL) > + else if ((fcode == FUTURE_BUILTIN_VEC_XXEVAL) > + || (fcode == FUTURE_BUILTIN_VXXPERMX)) > { > - /* Need to special case __builtin_vec_xxeval because this takes > + signed char op3_type; > + > + /* Need to special case these builins_xxeval because they takes 'these builtins' ? drop 's' from takes typo builins > 4 arguments, and the existing infrastructure handles no > more than three. */ > if (nargs != 4) > { > - error ("builtin %qs requires 4 arguments", > - "__builtin_vec_xxeval"); > + if (fcode == FUTURE_BUILTIN_VEC_XXEVAL) > + error ("builtin %qs requires 4 arguments", > + "__builtin_vec_xxeval"); > + else > + error ("builtin %qs requires 4 arguments", > + "__builtin_vec_xxpermx"); > + > return error_mark_node; > } > + > + /* Set value for vec_xxpermx here as it it a constant. */ it is > + op3_type = RS6000_BTI_V16QI; > + > for ( ; desc->code == fcode; desc++) > { > + if (fcode == FUTURE_BUILTIN_VEC_XXEVAL) > + op3_type = desc->op3; > + > if (rs6000_builtin_type_compatible (types[0], desc->op1) > && rs6000_builtin_type_compatible (types[1], desc->op2) > - && rs6000_builtin_type_compatible (types[2], desc->op3) > + && rs6000_builtin_type_compatible (types[2], op3_type) > && rs6000_builtin_type_compatible (types[3], > RS6000_BTI_UINTSI)) > { > diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c > index e36aafaf71c..6770a7f05a2 100644 > --- a/gcc/config/rs6000/rs6000-call.c > +++ b/gcc/config/rs6000/rs6000-call.c > @@ -5554,6 +5554,39 @@ const struct altivec_builtin_types > altivec_overloaded_builtins[] = { > RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, > RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI }, > > + /* The overloaded XXPERMX definitions are handled specially because the > + fourth unsigned char operand is not encoded in this table. */ > + { FUTURE_BUILTIN_VEC_XXPERMX, FUTURE_BUILTIN_VXXPERMX, > + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, > + RS6000_BTI_unsigned_V16QI }, > + { FUTURE_BUILTIN_VEC_XXPERMX, FUTURE_BUILTIN_VXXPERMX, > + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, > + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI }, > + { FUTURE_BUILTIN_VEC_XXPERMX, FUTURE_BUILTIN_VXXPERMX, > + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, > + RS6000_BTI_unsigned_V16QI }, > + { FUTURE_BUILTIN_VEC_XXPERMX, FUTURE_BUILTIN_VXXPERMX, > + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, > + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V16QI }, > + { FUTURE_BUILTIN_VEC_XXPERMX, FUTURE_BUILTIN_VXXPERMX, > + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, > + RS6000_BTI_unsigned_V16QI }, > + { FUTURE_BUILTIN_VEC_XXPERMX, FUTURE_BUILTIN_VXXPERMX, > + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, > + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V16QI }, > + { FUTURE_BUILTIN_VEC_XXPERMX, FUTURE_BUILTIN_VXXPERMX, > + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, > + RS6000_BTI_unsigned_V16QI }, > + { FUTURE_BUILTIN_VEC_XXPERMX, FUTURE_BUILTIN_VXXPERMX, > + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, > + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V16QI }, > + { FUTURE_BUILTIN_VEC_XXPERMX, FUTURE_BUILTIN_VXXPERMX, > + RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, > + RS6000_BTI_unsigned_V16QI }, > + { FUTURE_BUILTIN_VEC_XXPERMX, FUTURE_BUILTIN_VXXPERMX, > + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, > + RS6000_BTI_unsigned_V16QI }, > + > { FUTURE_BUILTIN_VEC_EXTRACTL, FUTURE_BUILTIN_VEXTRACTBL, > RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V16QI, > RS6000_BTI_unsigned_V16QI, RS6000_BTI_UINTQI }, > @@ -5695,6 +5728,37 @@ const struct altivec_builtin_types > altivec_overloaded_builtins[] = { > { FUTURE_BUILTIN_VEC_XXSPLTI32DX, FUTURE_BUILTIN_VXXSPLTI32DX_V4SF, > RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_UINTQI, RS6000_BTI_float }, > > + { FUTURE_BUILTIN_VEC_XXBLEND, FUTURE_BUILTIN_VXXBLEND_V16QI, > + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, > + RS6000_BTI_unsigned_V16QI }, > + { FUTURE_BUILTIN_VEC_XXBLEND, FUTURE_BUILTIN_VXXBLEND_V16QI, > + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, > + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI }, > + { FUTURE_BUILTIN_VEC_XXBLEND, FUTURE_BUILTIN_VXXBLEND_V8HI, > + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, > + RS6000_BTI_unsigned_V8HI }, > + { FUTURE_BUILTIN_VEC_XXBLEND, FUTURE_BUILTIN_VXXBLEND_V8HI, > + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, > + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI }, > + { FUTURE_BUILTIN_VEC_XXBLEND, FUTURE_BUILTIN_VXXBLEND_V4SI, > + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, > + RS6000_BTI_unsigned_V4SI }, > + { FUTURE_BUILTIN_VEC_XXBLEND, FUTURE_BUILTIN_VXXBLEND_V4SI, > + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, > + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI }, > + { FUTURE_BUILTIN_VEC_XXBLEND, FUTURE_BUILTIN_VXXBLEND_V2DI, > + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, > + RS6000_BTI_unsigned_V2DI }, > + { FUTURE_BUILTIN_VEC_XXBLEND, FUTURE_BUILTIN_VXXBLEND_V2DI, > + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, > + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI }, > + { FUTURE_BUILTIN_VEC_XXBLEND, FUTURE_BUILTIN_VXXBLEND_V4SF, > + RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, > + RS6000_BTI_unsigned_V4SI }, > + { FUTURE_BUILTIN_VEC_XXBLEND, FUTURE_BUILTIN_VXXBLEND_V2DF, > + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, > + RS6000_BTI_unsigned_V2DI }, > + > { FUTURE_BUILTIN_VEC_SRDB, FUTURE_BUILTIN_VSRDB_V16QI, > RS6000_BTI_V16QI, RS6000_BTI_V16QI, > RS6000_BTI_V16QI, RS6000_BTI_UINTQI }, > @@ -9911,6 +9975,18 @@ rs6000_expand_quaternop_builtin (enum insn_code icode, > tree exp, rtx target) > } > } > > + else if (icode == CODE_FOR_xxpermx) > + { > + /* Only allow 3-bit unsigned literals. */ > + STRIP_NOPS (arg3); > + if (TREE_CODE (arg3) != INTEGER_CST > + || TREE_INT_CST_LOW (arg3) & ~0x7) > + { > + error ("argument 4 must be an 3-bit unsigned literal"); > + return CONST0_RTX (tmode); > + } > + } > + > if (target == 0 > || GET_MODE (target) != tmode > || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) > @@ -13293,12 +13369,17 @@ builtin_quaternary_function_type (machine_mode > mode_ret, > tree function_type = NULL; > > static tree v2udi_type = builtin_mode_to_type[V2DImode][1]; > + static tree v16uqi_type = builtin_mode_to_type[V16QImode][1]; > static tree uchar_type = builtin_mode_to_type[QImode][1]; > > static tree xxeval_type = > build_function_type_list (v2udi_type, v2udi_type, v2udi_type, > v2udi_type, uchar_type, NULL_TREE); > > + static tree xxpermx_type = > + build_function_type_list (v2udi_type, v2udi_type, v2udi_type, > + v16uqi_type, uchar_type, NULL_TREE); > + > switch (builtin) { > > case FUTURE_BUILTIN_XXEVAL: > @@ -13310,6 +13391,15 @@ builtin_quaternary_function_type (machine_mode > mode_ret, > function_type = xxeval_type; > break; > > + case FUTURE_BUILTIN_VXXPERMX: > + gcc_assert ((mode_ret == V2DImode) > + && (mode_arg0 == V2DImode) > + && (mode_arg1 == V2DImode) > + && (mode_arg2 == V16QImode) > + && (mode_arg3 == QImode)); > + function_type = xxpermx_type; > + break; > + > default: > /* A case for each quaternary built-in must be provided above. */ > gcc_unreachable (); > @@ -13489,6 +13579,10 @@ builtin_function_type (machine_mode mode_ret, > machine_mode mode_arg0, > case FUTURE_BUILTIN_VREPLACE_ELT_UV2DI: > case FUTURE_BUILTIN_VREPLACE_UN_UV4SI: > case FUTURE_BUILTIN_VREPLACE_UN_UV2DI: > + case FUTURE_BUILTIN_VXXBLEND_V16QI: > + case FUTURE_BUILTIN_VXXBLEND_V8HI: > + case FUTURE_BUILTIN_VXXBLEND_V4SI: > + case FUTURE_BUILTIN_VXXBLEND_V2DI: > h.uns_p[0] = 1; > h.uns_p[1] = 1; > h.uns_p[2] = 1; ok > diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi > index dfdffead903..4529d43a7cc 100644 > --- a/gcc/doc/extend.texi > +++ b/gcc/doc/extend.texi > @@ -21183,6 +21183,69 @@ result. The other words of argument 1 are unchanged. > > @findex vec_splati_ins > > +Vector Blend Variable > + > +@smallexample > +@exdent vector signed char vec_blendv (vector signed char, vector signed > char, > +vector unsigned char); > +@exdent vector unsigned char vec_blendv (vector unsigned char, > +vector unsigned char, vector unsigned char); > +@exdent vector signed short vec_blendv (vector signed short, > +vector signed short, vector unsigned short); > +@exdent vector unsigned short vec_blendv (vector unsigned short, > +vector unsigned short, vector unsigned short); > +@exdent vector signed int vec_blendv (vector signed int, vector signed int, > +vector unsigned int); > +@exdent vector unsigned int vec_blendv (vector unsigned int, > +vector unsigned int, vector unsigned int); > +@exdent vector signed long long vec_blendv (vector signed long long, > +vector signed long long, vector unsigned long long); > +@exdent vector unsigned long long vec_blendv (vector unsigned long long, > +vector unsigned long long, vector unsigned long long); > +@exdent vector float vec_blendv (vector float, vector float, > +vector unsigned int); > +@exdent vector double vec_blendv (vector double, vector double, > +vector unsigned long long); > +@end smallexample > + > +Blend the first and second argument vectors according to the sign bits of the > +corresponding elements of the third argument vector. This is similar to the > +vsel and xxsel instructions but for bigger elements. ok > + > +@findex vec_blendv > + > +Vector Permute Extended > + > +@smallexample > +@exdent vector signed char vec_permx (vector signed char, vector signed char, > +vector unsigned char, const int); > +@exdent vector unsigned char vec_permx (vector unsigned char, > +vector unsigned char, vector unsigned char, const int); > +@exdent vector signed short vec_permx (vector signed short, > +vector signed short, vector unsigned char, const int); > +@exdent vector unsigned short vec_permx (vector unsigned short, > +vector unsigned short, vector unsigned char, const int); > +@exdent vector signed int vec_permx (vector signed int, vector signed int, > +vector unsigned char, const int); > +@exdent vector unsigned int vec_permx (vector unsigned int, > +vector unsigned int, vector unsigned char, const int); > +@exdent vector signed long long vec_permx (vector signed long long, > +vector signed long long, vector unsigned char, const int); > +@exdent vector unsigned long long vec_permx (vector unsigned long long, > +vector unsigned long long, vector unsigned char, const int); > +@exdent vector float (vector float, vector float, vector unsigned char, > +const int); > +@exdent vector double (vector double, vector double, vector unsigned char, > +const int); > +@end smallexample > + > +Perform a partial permute of the first two arguments, which form a 32-byte > +section of an emulated vector up to 256 bytes wide, using the partial permute > +control vector in the third argument. The fourth argument (constrained to > +values of 0-7) identifies which 32-byte section of the emulated vector is > +contained in the first two arguments. > +@findex vec_permx > + > @smallexample > @exdent vector unsigned long long int > @exdent vec_pdep (vector unsigned long long int, vector unsigned long long > int) ok. Thanks, -Will > diff --git a/gcc/testsuite/gcc.target/powerpc/vec-blend-runnable.c > b/gcc/testsuite/gcc.target/powerpc/vec-blend-runnable.c > new file mode 100644 > index 00000000000..70b25be3bcb > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/vec-blend-runnable.c > @@ -0,0 +1,276 @@ > +/* { dg-do run } */ > +/* { dg-require-effective-target powerpc_future_hw } */ > +/* { dg-options "-mdejagnu-cpu=future" } */ > +#include <altivec.h> > + > +#define DEBUG 0 > + > +#ifdef DEBUG > +#include <stdio.h> > +#endif > + > +extern void abort (void); > + > +int > +main (int argc, char *argv []) > +{ > + int i; > + vector signed char vsrc_a_char, vsrc_b_char; > + vector signed char vresult_char; > + vector signed char expected_vresult_char; > + > + vector unsigned char vsrc_a_uchar, vsrc_b_uchar, vsrc_c_uchar; > + vector unsigned char vresult_uchar; > + vector unsigned char expected_vresult_uchar; > + > + vector signed short vsrc_a_short, vsrc_b_short, vsrc_c_short; > + vector signed short vresult_short; > + vector signed short expected_vresult_short; > + > + vector unsigned short vsrc_a_ushort, vsrc_b_ushort, vsrc_c_ushort; > + vector unsigned short vresult_ushort; > + vector unsigned short expected_vresult_ushort; > + > + vector int vsrc_a_int, vsrc_b_int, vsrc_c_int; > + vector int vresult_int; > + vector int expected_vresult_int; > + > + vector unsigned int vsrc_a_uint, vsrc_b_uint, vsrc_c_uint; > + vector unsigned int vresult_uint; > + vector unsigned int expected_vresult_uint; > + > + vector long long int vsrc_a_ll, vsrc_b_ll, vsrc_c_ll; > + vector long long int vresult_ll; > + vector long long int expected_vresult_ll; > + > + vector unsigned long long int vsrc_a_ull, vsrc_b_ull, vsrc_c_ull; > + vector unsigned long long int vresult_ull; > + vector unsigned long long int expected_vresult_ull; > + > + vector float vresult_f; > + vector float expected_vresult_f; > + vector float vsrc_a_f, vsrc_b_f; > + > + vector double vsrc_a_d, vsrc_b_d; > + vector double vresult_d; > + vector double expected_vresult_d; > + > + /* Vector blend */ > + vsrc_c_uchar = (vector unsigned char) { 0, 0x80, 0, 0x80, 0, 0x80, 0, 0x80, > + 0, 0x80, 0, 0x80, 0, 0x80, 0, 0x80 }; > + > + vsrc_a_char = (vector signed char) { -1, 3, 5, 7, 9, 11, 13, 15, > + 17, 19, 21, 23, 25, 27, 29 }; > + vsrc_b_char = (vector signed char) { 2, -4, 6, 8, 10, 12, 14, 16, > + 18, 20, 22, 24, 26, 28, 30, 32 }; > + vsrc_c_uchar = (vector unsigned char) { 0, 0x80, 0, 0x80, 0, 0x80, 0, 0x80, > + 0, 0x80, 0, 0x80, 0, 0x80, 0, 0x80 }; > + vresult_char = (vector signed char) { 0, 0, 0, 0, 0, 0, 0, 0, > + 0, 0, 0, 0, 0, 0, 0, 0 }; > + expected_vresult_char = (vector signed char) { -1, -4, 5, 8, > + 9, 12, 13, 16, > + 17, 20, 21, 24, > + 25, 28, 29, 32 }; > + > + vresult_char = vec_blendv (vsrc_a_char, vsrc_b_char, vsrc_c_uchar); > + > + if (!vec_all_eq (vresult_char, expected_vresult_char)) { > +#if DEBUG > + printf("ERROR, vec_blendv (vsrc_a_char, vsrc_b_char, vsrc_c_uchar)\n"); > + for(i = 0; i < 16; i++) > + printf(" vresult_char[%d] = %d, expected_vresult_char[%d] = %d\n", > + i, vresult_char[i], i, expected_vresult_char[i]); > +#else > + abort(); > +#endif > + } > + > + vsrc_a_uchar = (vector unsigned char) { 1, 3, 5, 7, 9, 11, 13, 15, > + 17, 19, 21, 23, 25, 27, 29 }; > + vsrc_b_uchar = (vector unsigned char) { 2, 4, 6, 8, 10, 12, 14, 16, > + 18, 20, 22, 24, 26, 28, 30, 32 }; > + vsrc_c_uchar = (vector unsigned char) { 0, 0x80, 0, 0x80, 0, 0x80, 0, 0x80, > + 0, 0x80, 0, 0x80, 0, 0x80, 0, 0x80 }; > + vresult_uchar = (vector unsigned char) { 0, 0, 0, 0, 0, 0, 0, 0, > + 0, 0, 0, 0, 0, 0, 0, 0 }; > + expected_vresult_uchar = (vector unsigned char) { 1, 4, 5, 8, > + 9, 12, 13, 16, > + 17, 20, 21, 24, > + 25, 28, 29, 32 }; > + > + vresult_uchar = vec_blendv (vsrc_a_uchar, vsrc_b_uchar, vsrc_c_uchar); > + > + if (!vec_all_eq (vresult_uchar, expected_vresult_uchar)) { > +#if DEBUG > + printf("ERROR, vec_blendv (vsrc_a_uchar, vsrc_b_uchar, vsrc_c_uchar)\n"); > + for(i = 0; i < 16; i++) > + printf(" vresult_uchar[%d] = %d, expected_vresult_uchar[%d] = %d\n", > + i, vresult_uchar[i], i, expected_vresult_uchar[i]); > +#else > + abort(); > +#endif > + } > + > + vsrc_a_short = (vector signed short) { -1, 3, 5, 7, 9, 11, 13, 15 }; > + vsrc_b_short = (vector signed short) { 2, -4, 6, 8, 10, 12, 14, 16 }; > + vsrc_c_ushort = (vector unsigned short) { 0, 0x8000, 0, 0x8000, > + 0, 0x8000, 0, 0x8000 }; > + vresult_short = (vector signed short) { 0, 0, 0, 0, 0, 0, 0, 0 }; > + expected_vresult_short = (vector signed short) { -1, -4, 5, 8, > + 9, 12, 13, 16 }; > + > + vresult_short = vec_blendv (vsrc_a_short, vsrc_b_short, vsrc_c_ushort); > + > + if (!vec_all_eq (vresult_short, expected_vresult_short)) { > +#if DEBUG > + printf("ERROR, vec_blendv (vsrc_a_short, vsrc_b_short, > vsrc_c_ushort)\n"); > + for(i = 0; i < 8; i++) > + printf(" vresult_short[%d] = %d, expected_vresult_short[%d] = %d\n", > + i, vresult_short[i], i, expected_vresult_short[i]); > +#else > + abort(); > +#endif > + } > + > + vsrc_a_ushort = (vector unsigned short) { 1, 3, 5, 7, 9, 11, 13, 15 }; > + vsrc_b_ushort = (vector unsigned short) { 2, 4, 6, 8, 10, 12, 14, 16 }; > + vsrc_c_ushort = (vector unsigned short) { 0, 0x8000, 0, 0x8000, > + 0, 0x8000, 0, 0x8000 }; > + vresult_ushort = (vector unsigned short) { 0, 0, 0, 0, 0, 0, 0, 0 }; > + expected_vresult_ushort = (vector unsigned short) { 1, 4, 5, 8, > + 9, 12, 13, 16 }; > + > + vresult_ushort = vec_blendv (vsrc_a_ushort, vsrc_b_ushort, vsrc_c_ushort); > + > + if (!vec_all_eq (vresult_ushort, expected_vresult_ushort)) { > +#if DEBUG > + printf("ERROR, vec_blendv (vsrc_a_ushort, vsrc_b_ushort, > vsrc_c_ushort)\n"); > + for(i = 0; i < 8; i++) > + printf(" vresult_ushort[%d] = %d, expected_vresult_ushort[%d] = %d\n", > + i, vresult_ushort[i], i, expected_vresult_ushort[i]); > +#else > + abort(); > +#endif > + } > + > + vsrc_a_int = (vector signed int) { -1, -3, -5, -7 }; > + vsrc_b_int = (vector signed int) { 2, 4, 6, 8 }; > + vsrc_c_uint = (vector unsigned int) { 0, 0x80000000, 0, 0x80000000}; > + vresult_int = (vector signed int) { 0, 0, 0, 0 }; > + expected_vresult_int = (vector signed int) { -1, 4, -5, 8 }; > + > + vresult_int = vec_blendv (vsrc_a_int, vsrc_b_int, vsrc_c_uint); > + > + if (!vec_all_eq (vresult_int, expected_vresult_int)) { > +#if DEBUG > + printf("ERROR, vec_blendv (vsrc_a_int, vsrc_b_int, vsrc_c_uint)\n"); > + for(i = 0; i < 4; i++) > + printf(" vresult_int[%d] = %d, expected_vresult_int[%d] = %d\n", > + i, vresult_int[i], i, expected_vresult_int[i]); > +#else > + abort(); > +#endif > + } > + > + vsrc_a_uint = (vector unsigned int) { 1, 3, 5, 7 }; > + vsrc_b_uint = (vector unsigned int) { 2, 4, 6, 8 }; > + vsrc_c_uint = (vector unsigned int) { 0, 0x80000000, 0, 0x80000000 }; > + vresult_uint = (vector unsigned int) { 0, 0, 0, 0 }; > + expected_vresult_uint = (vector unsigned int) { 1, 4, 5, 8 }; > + > + vresult_uint = vec_blendv (vsrc_a_uint, vsrc_b_uint, vsrc_c_uint); > + > + if (!vec_all_eq (vresult_uint, expected_vresult_uint)) { > +#if DEBUG > + printf("ERROR, vec_blendv (vsrc_a_uint, vsrc_b_uint, vsrc_c_uint)\n"); > + for(i = 0; i < 4; i++) > + printf(" vresult_uint[%d] = %d, expected_vresult_uint[%d] = %d\n", > + i, vresult_uint[i], i, expected_vresult_uint[i]); > +#else > + abort(); > +#endif > + } > + > + vsrc_a_ll = (vector signed long long int) { -1, -3 }; > + vsrc_b_ll = (vector signed long long int) { 2, 4, }; > + vsrc_c_ull = (vector unsigned long long int) { 0, 0x8000000000000000ULL }; > + vresult_ll = (vector signed long long int) { 0, 0 }; > + expected_vresult_ll = (vector signed long long int) { -1, 4 }; > + > + vresult_ll = vec_blendv (vsrc_a_ll, vsrc_b_ll, vsrc_c_ull); > + > + if (!vec_all_eq (vresult_ll, expected_vresult_ll)) { > +#if DEBUG > + printf("ERROR, vec_blendv (vsrc_a_ll, vsrc_b_ll, vsrc_c_ull)\n"); > + for(i = 0; i < 2; i++) > + printf(" vresult_ll[%d] = %d, expected_vresult_ll[%d] = %d\n", > + i, vresult_ll[i], i, expected_vresult_ll[i]); > +#else > + abort(); > +#endif > + } > + > + vsrc_a_ull = (vector unsigned long long) { 1, 3 }; > + vsrc_b_ull = (vector unsigned long long) { 2, 4 }; > + vsrc_c_ull = (vector unsigned long long int) { 0, 0x8000000000000000ULL }; > + vresult_ull = (vector unsigned long long) { 0, 0 }; > + expected_vresult_ull = (vector unsigned long long) { 1, 4 }; > + > + vresult_ull = vec_blendv (vsrc_a_ull, vsrc_b_ull, vsrc_c_ull); > + > + if (!vec_all_eq (vresult_ull, expected_vresult_ull)) { > +#if DEBUG > + printf("ERROR, vec_blendv (vsrc_a_ull, vsrc_b_ull, vsrc_c_ull)\n"); > + for(i = 0; i < 2; i++) > + printf(" vresult_ull[%d] = %d, expected_vresult_ull[%d] = %d\n", > + i, vresult_ull[i], i, expected_vresult_ull[i]); > +#else > + abort(); > +#endif > + } > + > + vsrc_a_f = (vector float) { -1.0, -3.0, -5.0, -7.0 }; > + vsrc_b_f = (vector float) { 2.0, 4.0, 6.0, 8.0 }; > + vsrc_c_uint = (vector unsigned int) { 0, 0x80000000, 0, 0x80000000}; > + vresult_f = (vector float) { 0, 0, 0, 0 }; > + expected_vresult_f = (vector float) { -1, 4, -5, 8 }; > + > + vresult_f = vec_blendv (vsrc_a_f, vsrc_b_f, vsrc_c_uint); > + > + if (!vec_all_eq (vresult_f, expected_vresult_f)) { > +#if DEBUG > + printf("ERROR, vec_blendv (vsrc_a_f, vsrc_b_f, vsrc_c_uint)\n"); > + for(i = 0; i < 4; i++) > + printf(" vresult_f[%d] = %d, expected_vresult_f[%d] = %d\n", > + i, vresult_f[i], i, expected_vresult_f[i]); > +#else > + abort(); > +#endif > + } > + > + vsrc_a_d = (vector double) { -1.0, -3.0 }; > + vsrc_b_d = (vector double) { 2.0, 4.0 }; > + vsrc_c_ull = (vector unsigned long long int) { 0, 0x8000000000000000ULL }; > + vresult_d = (vector double) { 0, 0 }; > + expected_vresult_d = (vector double) { -1, 4 }; > + > + vresult_d = vec_blendv (vsrc_a_d, vsrc_b_d, vsrc_c_ull); > + > + if (!vec_all_eq (vresult_d, expected_vresult_d)) { > +#if DEBUG > + printf("ERROR, vec_blendv (vsrc_a_d, vsrc_b_d, vsrc_c_ull)\n"); > + for(i = 0; i < 4; i++) > + printf(" vresult_d[%d] = %d, expected_vresult_d[%d] = %d\n", > + i, vresult_d[i], i, expected_vresult_d[i]); > +#else > + abort(); > +#endif > + } > + > + return 0; > +} > + > +/* { dg-final { scan-assembler-times {\msplati\M} 6 } } */ > +/* { dg-final { scan-assembler-times {\msrdbi\M} 6 } } */ > + > + > diff --git a/gcc/testsuite/gcc.target/powerpc/vec-permute-ext-runnable.c > b/gcc/testsuite/gcc.target/powerpc/vec-permute-ext-runnable.c > new file mode 100644 > index 00000000000..f5d223d0530 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/vec-permute-ext-runnable.c > @@ -0,0 +1,294 @@ > +/* { dg-do run } */ > +/* { dg-require-effective-target powerpc_future_hw } */ > +/* { dg-options "-mdejagnu-cpu=future" } */ > +#include <altivec.h> > + > +#define DEBUG 1 > + > +#ifdef DEBUG > +#include <stdio.h> > +#endif > + > +extern void abort (void); > + > +int > +main (int argc, char *argv []) > +{ > + int i; > + vector signed char vsrc_a_char, vsrc_b_char; > + vector signed char vresult_char; > + vector signed char expected_vresult_char; > + > + vector unsigned char vsrc_a_uchar, vsrc_b_uchar, vsrc_c_uchar; > + vector unsigned char vresult_uchar; > + vector unsigned char expected_vresult_uchar; > + > + vector signed short vsrc_a_short, vsrc_b_short, vsrc_c_short; > + vector signed short vresult_short; > + vector signed short expected_vresult_short; > + > + vector unsigned short vsrc_a_ushort, vsrc_b_ushort, vsrc_c_ushort; > + vector unsigned short vresult_ushort; > + vector unsigned short expected_vresult_ushort; > + > + vector int vsrc_a_int, vsrc_b_int, vsrc_c_int; > + vector int vresult_int; > + vector int expected_vresult_int; > + > + vector unsigned int vsrc_a_uint, vsrc_b_uint, vsrc_c_uint; > + vector unsigned int vresult_uint; > + vector unsigned int expected_vresult_uint; > + > + vector long long int vsrc_a_ll, vsrc_b_ll, vsrc_c_ll; > + vector long long int vresult_ll; > + vector long long int expected_vresult_ll; > + > + vector unsigned long long int vsrc_a_ull, vsrc_b_ull, vsrc_c_ull; > + vector unsigned long long int vresult_ull; > + vector unsigned long long int expected_vresult_ull; > + > + vector float vresult_f; > + vector float expected_vresult_f; > + vector float vsrc_a_f, vsrc_b_f; > + > + vector double vsrc_a_d, vsrc_b_d; > + vector double vresult_d; > + vector double expected_vresult_d; > + > + /* Vector permx */ > + vsrc_a_char = (vector signed char) { -1, 3, 5, 7, 9, 11, 13, 15, > + 17, 19, 21, 23, 25, 27, 29 }; > + vsrc_b_char = (vector signed char) { 2, -4, 6, 8, 10, 12, 14, 16, > + 18, 20, 22, 24, 26, 28, 30, 32 }; > + vsrc_c_uchar = (vector unsigned char) { 0x0, 0x7, 0, 0x5, 0, 0x3, 0, 0x1, > + 0, 0x2, 0, 0x4, 0, 0x6, 0, 0x0 }; > + vresult_char = (vector signed char) { 0, 0, 0, 0, 0, 0, 0, 0, > + 0, 0, 0, 0, 0, 0, 0, 0 }; > + expected_vresult_char = (vector signed char) { -1, 15, -1, 11, > + -1, 7, -1, 3, > + -1, 5, -1, 9, > + -1, 13, -1, -1 }; > + > + vresult_char = vec_permx (vsrc_a_char, vsrc_b_char, vsrc_c_uchar, 0); > + > + if (!vec_all_eq (vresult_char, expected_vresult_char)) { > +#if DEBUG > + printf("ERROR, vec_permx (vsrc_a_char, vsrc_b_char, vsrc_c_uchar)\n"); > + for(i = 0; i < 16; i++) > + printf(" vresult_char[%d] = %d, expected_vresult_char[%d] = %d\n", > + i, vresult_char[i], i, expected_vresult_char[i]); > +#else > + abort(); > +#endif > + } > + > + vsrc_a_uchar = (vector unsigned char) { 1, 3, 5, 7, 9, 11, 13, 15, > + 17, 19, 21, 23, 25, 27, 29 }; > + vsrc_b_uchar = (vector unsigned char) { 2, 4, 6, 8, 10, 12, 14, 16, > + 18, 20, 22, 24, 26, 28, 30, 32 }; > + vsrc_c_uchar = (vector unsigned char) { 0x0, 0x7, 0, 0x5, 0, 0x3, 0, 0x1, > + 0, 0x2, 0, 0x4, 0, 0x6, 0, 0x0 }; > + vresult_uchar = (vector unsigned char) { 0, 0, 0, 0, 0, 0, 0, 0, > + 0, 0, 0, 0, 0, 0, 0, 0 }; > + expected_vresult_uchar = (vector unsigned char) { 1, 15, 1, 11, > + 1, 7, 1, 3, > + 1, 5, 1, 9, > + 1, 13, 1, 1 }; > + > + vresult_uchar = vec_permx (vsrc_a_uchar, vsrc_b_uchar, vsrc_c_uchar, 0); > + > + if (!vec_all_eq (vresult_uchar, expected_vresult_uchar)) { > +#if DEBUG > + printf("ERROR, vec_permx (vsrc_a_uchar, vsrc_b_uchar, vsrc_c_uchar)\n"); > + for(i = 0; i < 16; i++) > + printf(" vresult_uchar[%d] = %d, expected_vresult_uchar[%d] = %d\n", > + i, vresult_uchar[i], i, expected_vresult_uchar[i]); > +#else > + abort(); > +#endif > + } > + > + vsrc_a_short = (vector signed short int) { 1, -3, 5, 7, 9, 11, 13, 15 }; > + vsrc_b_short = (vector signed short int) { 2, 4, -6, 8, 10, 12, 14, 16 }; > + vsrc_c_uchar = (vector unsigned char) { 0x0, 0x1, 0x2, 0x3, > + 0x4, 0x5, 0x2, 0x3, > + 0x8, 0x9, 0x2, 0x3, > + 0x1E, 0x1F, 0x2, 0x3 }; > + vresult_short = (vector signed short int) { 0, 0, 0, 0, 0, 0, 0, 0 }; > + expected_vresult_short = (vector signed short int) { 1, -3, 5, -3, > + 9, -3, 16, -3 }; > + > + vresult_short = vec_permx (vsrc_a_short, vsrc_b_short, vsrc_c_uchar, 0); > + > + if (!vec_all_eq (vresult_short, expected_vresult_short)) { > +#if DEBUG > + printf("ERROR, vec_permx (vsrc_a_short, vsrc_b_short, vsrc_c_uchar)\n"); > + for(i = 0; i < 8; i++) > + printf(" vresult_short[%d] = %d, expected_vresult_short[%d] = %d\n", > + i, vresult_short[i], i, expected_vresult_short[i]); > +#else > + abort(); > +#endif > + } > + > + vsrc_a_ushort = (vector unsigned short int) { 1, 3, 5, 7, 9, 11, 13, 15 }; > + vsrc_b_ushort = (vector unsigned short int) { 2, 4, 6, 8, 10, 12, 14, 16 }; > + vsrc_c_uchar = (vector unsigned char) { 0x0, 0x1, 0x2, 0x3, > + 0x4, 0x5, 0x2, 0x3, > + 0x8, 0x9, 0x2, 0x3, > + 0x1E, 0x1F, 0x2, 0x3 }; > + vresult_ushort = (vector unsigned short int) { 0, 0, 0, 0, 0, 0, 0, 0 }; > + expected_vresult_ushort = (vector unsigned short int) { 1, 3, 5, 3, > + 9, 3, 16, 3 }; > + > + vresult_ushort = vec_permx (vsrc_a_ushort, vsrc_b_ushort, vsrc_c_uchar, 0); > + > + if (!vec_all_eq (vresult_ushort, expected_vresult_ushort)) { > +#if DEBUG > + printf("ERROR, vec_permx (vsrc_a_ushort, vsrc_b_ushort, > vsrc_c_uchar)\n"); > + for(i = 0; i < 8; i++) > + printf(" vresult_ushort[%d] = %d, expected_vresult_ushort[%d] = %d\n", > + i, vresult_ushort[i], i, expected_vresult_ushort[i]); > +#else > + abort(); > +#endif > + } > + > + vsrc_a_int = (vector signed int) { 1, -3, 5, 7 }; > + vsrc_b_int = (vector signed int) { 2, 4, -6, 8 }; > + vsrc_c_uchar = (vector unsigned char) { 0x0, 0x1, 0x2, 0x3, > + 0x4, 0x5, 0x6, 0x7, > + 0x18, 0x19, 0x1A, 0x1B, > + 0x1C, 0x1D, 0x1E, 0x1F }; > + vresult_int = (vector signed int) { 0, 0, 0, 0 }; > + expected_vresult_int = (vector signed int) { 1, -3, -6, 8 }; > + > + vresult_int = vec_permx (vsrc_a_int, vsrc_b_int, vsrc_c_uchar, 0); > + > + if (!vec_all_eq (vresult_int, expected_vresult_int)) { > +#if DEBUG > + printf("ERROR, vec_permx (vsrc_a_int, vsrc_b_int, vsrc_c_uchar)\n"); > + for(i = 0; i < 4; i++) > + printf(" vresult_int[%d] = %d, expected_vresult_int[%d] = %d\n", > + i, vresult_int[i], i, expected_vresult_int[i]); > +#else > + abort(); > +#endif > + } > + > + vsrc_a_uint = (vector unsigned int) { 1, 3, 5, 7 }; > + vsrc_b_uint = (vector unsigned int) { 10, 12, 14, 16 }; > + vsrc_c_uchar = (vector unsigned char) { 0x0, 0x1, 0x2, 0x3, > + 0x4, 0x5, 0x6, 0x7, > + 0x18, 0x19, 0x1A, 0x1B, > + 0x1C, 0x1D, 0x1E, 0x1F }; > + vresult_uint = (vector unsigned int) { 0, 0, 0, 0 }; > + expected_vresult_uint = (vector unsigned int) { 1, 3, 14, 16 }; > + > + vresult_uint = vec_permx (vsrc_a_uint, vsrc_b_uint, vsrc_c_uchar, 0); > + > + if (!vec_all_eq (vresult_uint, expected_vresult_uint)) { > +#if DEBUG > + printf("ERROR, vec_permx (vsrc_a_uint, vsrc_b_uint, vsrc_c_uchar)\n"); > + for(i = 0; i < 4; i++) > + printf(" vresult_uint[%d] = %d, expected_vresult_uint[%d] = %d\n", > + i, vresult_uint[i], i, expected_vresult_uint[i]); > +#else > + abort(); > +#endif > + } > + > + vsrc_a_ll = (vector signed long long int) { 1, -3 }; > + vsrc_b_ll = (vector signed long long int) { 2, -4 }; > + vsrc_c_uchar = (vector unsigned char) { 0x0, 0x1, 0x2, 0x3, > + 0x4, 0x5, 0x6, 0x7, > + 0x18, 0x19, 0x1A, 0x1B, > + 0x1C, 0x1D, 0x1E, 0x1F }; > + vresult_ll = (vector signed long long int) { 0, 0}; > + expected_vresult_ll = (vector signed long long int) { 1, -4 }; > + > + vresult_ll = vec_permx (vsrc_a_ll, vsrc_b_ll, vsrc_c_uchar, 0); > + > + if (!vec_all_eq (vresult_ll, expected_vresult_ll)) { > +#if DEBUG > + printf("ERROR, vec_permx (vsrc_a_ll, vsrc_b_ll, vsrc_c_uchar)\n"); > + for(i = 0; i < 2; i++) > + printf(" vresult_ll[%d] = %lld, expected_vresult_ll[%d] = %lld\n", > + i, vresult_ll[i], i, expected_vresult_ll[i]); > +#else > + abort(); > +#endif > + } > + > + vsrc_a_ull = (vector unsigned long long int) { 1, 3 }; > + vsrc_b_ull = (vector unsigned long long int) { 10, 12 }; > + vsrc_c_uchar = (vector unsigned char) { 0x0, 0x1, 0x2, 0x3, > + 0x4, 0x5, 0x6, 0x7, > + 0x18, 0x19, 0x1A, 0x1B, > + 0x1C, 0x1D, 0x1E, 0x1F }; > + vresult_ull = (vector unsigned long long int) { 0, 0 }; > + expected_vresult_ull = (vector unsigned long long int) { 1, 12 }; > + > + vresult_ull = vec_permx (vsrc_a_ull, vsrc_b_ull, vsrc_c_uchar, 0); > + > + if (!vec_all_eq (vresult_ull, expected_vresult_ull)) { > +#if DEBUG > + printf("ERROR, vec_permx (vsrc_a_ull, vsrc_b_ull, vsrc_c_uchar)\n"); > + for(i = 0; i < 2; i++) > + printf(" vresult_ull[%d] = %d, expected_vresult_ull[%d] = %d\n", > + i, vresult_ull[i], i, expected_vresult_ull[i]); > +#else > + abort(); > +#endif > + } > + > + vsrc_a_f = (vector float) { -3.0, 5.0, 7.0, 9.0 }; > + vsrc_b_f = (vector float) { 2.0, 4.0, 6.0, 8.0 }; > + vsrc_c_uchar = (vector unsigned char) { 0x0, 0x1, 0x2, 0x3, > + 0x4, 0x5, 0x6, 0x7, > + 0x18, 0x19, 0x1A, 0x1B, > + 0x1C, 0x1D, 0x1E, 0x1F }; > + vresult_f = (vector float) { 0.0, 0.0, 0.0, 0.0 }; > + expected_vresult_f = (vector float) { -3.0, 5.0, 6.0, 8.0 }; > + > + vresult_f = vec_permx (vsrc_a_f, vsrc_b_f, vsrc_c_uchar, 0); > + > + if (!vec_all_eq (vresult_f, expected_vresult_f)) { > +#if DEBUG > + printf("ERROR, vec_permx (vsrc_a_f, vsrc_b_f, vsrc_c_uchar)\n"); > + for(i = 0; i < 4; i++) > + printf(" vresult_f[%d] = %f, expected_vresult_f[%d] = %f\n", > + i, vresult_f[i], i, expected_vresult_f[i]); > +#else > + abort(); > +#endif > + } > + > + vsrc_a_d = (vector double) { 1.0, -3.0 }; > + vsrc_b_d = (vector double) { 2.0, -4.0 }; > + vsrc_c_uchar = (vector unsigned char) { 0x0, 0x1, 0x2, 0x3, > + 0x4, 0x5, 0x6, 0x7, > + 0x1A, 0x1B, 0x1C, 0x1B, > + 0x1C, 0x1D, 0x1E, 0x1F }; > + vresult_d = (vector double) { 0.0, 0.0 }; > + expected_vresult_d = (vector double) { 1.0, -4.0 }; > + > + vresult_d = vec_permx (vsrc_a_d, vsrc_b_d, vsrc_c_uchar, 0); > + > + if (!vec_all_eq (vresult_d, expected_vresult_d)) { > +#if DEBUG > + printf("ERROR, vec_permx (vsrc_a_d, vsrc_b_d, vsrc_c_uchar)\n"); > + for(i = 0; i < 2; i++) > + printf(" vresult_d[%d] = %f, expected_vresult_d[%d] = %f\n", > + i, vresult_d[i], i, expected_vresult_d[i]); > +#else > + abort(); > +#endif > + } > + > + return 0; > +} > + > +/* { dg-final { scan-assembler-times {\mxxpermx\M} 6 } } */ > + > +