This patch should address the comments in the last patch. I have tested this patch with bootstrap builds and make check regression tests on a little endian Power8 64-bit system and a big endian Power7 32/64-bit system with no regressions. Can I check this into the trunk?
[gcc] 2016-12-13 Michael Meissner <meiss...@linux.vnet.ibm.com> * config/rs6000/rs6000.c (rs6000_split_vec_extract_var): On ISA 3.0/power9, add support to use the VEXTU{B,H,W}{L,R}X extract instructions. * config/rs6000/vsx.md (VSr2): Add IEEE 128-bit floating point type constraint registers. (VSr3): Likewise. (FL_CONV): New mode iterator for binary floating types that have a direct conversion from 64-bit integer to floating point. (vsx_extract_<mode>_p9): Add support for the ISA 3.0/power9 VEXTU{B,H,W}{L,R}X extract instructions. (vsx_extract_<mode>_p9 splitter): Add splitter to load up the extract byte position into the GPR if we are using the VEXTU{B,H,W}{L,R}X extract instructions. (vsx_extract_<mode>_di_p9): Support extracts to GPRs. (vsx_extract_<mode>_store_p9): Support extracting to GPRs so that we can use reg+offset address instructions. (vsx_extract_<mode>_var): Support extracts to GPRs. (vsx_extract_<VSX_EXTRACT_I:mode>_<SDI:mode>_var): New combiner insn to combine vector extracts with zero_extend. (vsx_ext_<VSX_EXTRACT_I:VS_scalar>_fl_<FL_CONV:mode>): Optimize extracting a small integer vector element and converting it to a floating point type. (vsx_ext_<VSX_EXTRACT_I:VS_scalar>_ufl_<FL_CONV:mode>): Likewise. [gcc/testsuite] 2016-12-13 Michael Meissner <meiss...@linux.vnet.ibm.com> * gcc/testsuite/gcc.target/powerpc/vec-extract.h: If DO_TRACE is defined, add tracing of the various extracts to stderr. Add support for tests that convert the result to another type. * gcc/testsuite/gcc.target/powerpc/vec-extract-v2df.c: Likewise. * gcc/testsuite/gcc.target/powerpc/vec-extract-v4sf.c: Likewise. * gcc/testsuite/gcc.target/powerpc/vec-extract-v4si-df.c: Add new tests that do an extract and then convert the values double. * gcc/testsuite/gcc.target/powerpc/vec-extract-v4siu-df.c: Likewise. * gcc/testsuite/gcc.target/powerpc/vec-extract-v16qiu-df.c: Likewise. * gcc/testsuite/gcc.target/powerpc/vec-extract-v16qi-df.c: Likewise. * gcc/testsuite/gcc.target/powerpc/vec-extract-v8hiu-df.c: Likewise. * gcc/testsuite/gcc.target/powerpc/vec-extract-v8hi-df.c: Likewise. * gcc.target/powerpc/p9-extract-1.c: Update test to check for VEXTU{B,H,W}{L,R}X instructions being generated by default instead of VEXTRACTU{B,H} and XXEXTRACTUW. * gcc.target/powerpc/p9-extract-3.c: New test for combination of vec_extract and convert to floating point. -- Michael Meissner, IBM IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA email: meiss...@linux.vnet.ibm.com, phone: +1 (978) 899-4797
Index: gcc/config/rs6000/rs6000.c =================================================================== --- gcc/config/rs6000/rs6000.c (.../svn+ssh://meiss...@gcc.gnu.org/svn/gcc/trunk/gcc/config/rs6000) (revision 243590) +++ gcc/config/rs6000/rs6000.c (.../gcc/config/rs6000) (working copy) @@ -7519,6 +7519,52 @@ rs6000_split_vec_extract_var (rtx dest, { int bit_shift = byte_shift + 3; rtx element2; + int dest_regno = regno_or_subregno (dest); + int src_regno = regno_or_subregno (src); + int element_regno = regno_or_subregno (element); + + /* See if we want to generate VEXTU{B,H,W}{L,R}X if the destination is in + a general purpose register. */ + if (TARGET_P9_VECTOR + && (mode == V16QImode || mode == V8HImode || mode == V4SImode) + && INT_REGNO_P (dest_regno) + && ALTIVEC_REGNO_P (src_regno) + && INT_REGNO_P (element_regno)) + { + rtx dest_si = gen_rtx_REG (SImode, dest_regno); + rtx element_si = gen_rtx_REG (SImode, element_regno); + + if (mode == V16QImode) + emit_insn (VECTOR_ELT_ORDER_BIG + ? gen_vextublx (dest_si, element_si, src) + : gen_vextubrx (dest_si, element_si, src)); + + else if (mode == V8HImode) + { + rtx tmp_gpr_si = (GET_CODE (tmp_gpr) == SCRATCH + ? dest_si + : gen_rtx_REG (SImode, REGNO (tmp_gpr))); + emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const1_rtx)); + emit_insn (VECTOR_ELT_ORDER_BIG + ? gen_vextuhlx (dest_si, tmp_gpr_si, src) + : gen_vextuhrx (dest_si, tmp_gpr_si, src)); + } + + + else + { + rtx tmp_gpr_si = (GET_CODE (tmp_gpr) == SCRATCH + ? dest_si + : gen_rtx_REG (SImode, REGNO (tmp_gpr))); + emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const1_rtx)); + emit_insn (VECTOR_ELT_ORDER_BIG + ? gen_vextuwlx (dest_si, tmp_gpr_si, src) + : gen_vextuwrx (dest_si, tmp_gpr_si, src)); + } + + return; + } + gcc_assert (REG_P (tmp_gpr) && REG_P (tmp_altivec)); Index: gcc/config/rs6000/vsx.md =================================================================== --- gcc/config/rs6000/vsx.md (.../svn+ssh://meiss...@gcc.gnu.org/svn/gcc/trunk/gcc/config/rs6000) (revision 243590) +++ gcc/config/rs6000/vsx.md (.../gcc/config/rs6000) (working copy) @@ -119,13 +119,17 @@ (define_mode_attr VSr2 [(V2DF "wd") (V4SF "wf") (DF "ws") (SF "ww") - (DI "wi")]) + (DI "wi") + (KF "wq") + (TF "wp")]) (define_mode_attr VSr3 [(V2DF "wa") (V4SF "wa") (DF "ws") (SF "ww") - (DI "wi")]) + (DI "wi") + (KF "wq") + (TF "wp")]) ;; Map the register class for sp<->dp float conversions, destination (define_mode_attr VSr4 [(SF "ws") @@ -298,6 +302,14 @@ (define_mode_iterator VSX_EXTRACT_FL [SF || (FLOAT128_IEEE_P (TFmode) && TARGET_FLOAT128_HW)")]) +;; Mode iterator for binary floating types that have a direct conversion +;; from 64-bit integer to floating point +(define_mode_iterator FL_CONV [SF + DF + (KF "TARGET_FLOAT128_HW") + (TF "TARGET_FLOAT128_HW + && FLOAT128_IEEE_P (TFmode)")]) + ;; Iterator for the 2 short vector types to do a splat from an integer (define_mode_iterator VSX_SPLAT_I [V16QI V8HI]) @@ -2535,63 +2547,98 @@ (define_expand "vsx_extract_<mode>" }) (define_insn "vsx_extract_<mode>_p9" - [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=<VSX_EX>") + [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,<VSX_EX>") (vec_select:<VS_scalar> - (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "<VSX_EX>") - (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")])))] + (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "wK,<VSX_EX>") + (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n,n")]))) + (clobber (match_scratch:SI 3 "=r,X"))] "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB && TARGET_VSX_SMALL_INTEGER" { - HOST_WIDE_INT elt = INTVAL (operands[2]); - HOST_WIDE_INT elt_adj = (!VECTOR_ELT_ORDER_BIG - ? GET_MODE_NUNITS (<MODE>mode) - 1 - elt - : elt); - - HOST_WIDE_INT unit_size = GET_MODE_UNIT_SIZE (<MODE>mode); - HOST_WIDE_INT offset = unit_size * elt_adj; - - operands[2] = GEN_INT (offset); - if (unit_size == 4) - return "xxextractuw %x0,%x1,%2"; + if (which_alternative == 0) + return "#"; + else - return "vextractu<wd> %0,%1,%2"; + { + HOST_WIDE_INT elt = INTVAL (operands[2]); + HOST_WIDE_INT elt_adj = (!VECTOR_ELT_ORDER_BIG + ? GET_MODE_NUNITS (<MODE>mode) - 1 - elt + : elt); + + HOST_WIDE_INT unit_size = GET_MODE_UNIT_SIZE (<MODE>mode); + HOST_WIDE_INT offset = unit_size * elt_adj; + + operands[2] = GEN_INT (offset); + if (unit_size == 4) + return "xxextractuw %x0,%x1,%2"; + else + return "vextractu<wd> %0,%1,%2"; + } } [(set_attr "type" "vecsimple")]) +(define_split + [(set (match_operand:<VS_scalar> 0 "int_reg_operand") + (vec_select:<VS_scalar> + (match_operand:VSX_EXTRACT_I 1 "altivec_register_operand") + (parallel [(match_operand:QI 2 "const_int_operand")]))) + (clobber (match_operand:SI 3 "int_reg_operand"))] + "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB + && TARGET_VSX_SMALL_INTEGER && reload_completed" + [(const_int 0)] +{ + rtx op0_si = gen_rtx_REG (SImode, REGNO (operands[0])); + rtx op1 = operands[1]; + rtx op2 = operands[2]; + rtx op3 = operands[3]; + HOST_WIDE_INT offset = INTVAL (op2) * GET_MODE_UNIT_SIZE (<MODE>mode); + + emit_move_insn (op3, GEN_INT (offset)); + if (VECTOR_ELT_ORDER_BIG) + emit_insn (gen_vextu<wd>lx (op0_si, op3, op1)); + else + emit_insn (gen_vextu<wd>rx (op0_si, op3, op1)); + DONE; +}) + ;; Optimize zero extracts to eliminate the AND after the extract. (define_insn_and_split "*vsx_extract_<mode>_di_p9" - [(set (match_operand:DI 0 "gpc_reg_operand" "=<VSX_EX>") + [(set (match_operand:DI 0 "gpc_reg_operand" "=r,<VSX_EX>") (zero_extend:DI (vec_select:<VS_scalar> - (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "<VSX_EX>") - (parallel [(match_operand:QI 2 "const_int_operand" "n")]))))] + (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "wK,<VSX_EX>") + (parallel [(match_operand:QI 2 "const_int_operand" "n,n")])))) + (clobber (match_scratch:SI 3 "=r,X"))] "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB && TARGET_VSX_SMALL_INTEGER" "#" "&& reload_completed" - [(set (match_dup 3) - (vec_select:<VS_scalar> - (match_dup 1) - (parallel [(match_dup 2)])))] + [(parallel [(set (match_dup 4) + (vec_select:<VS_scalar> + (match_dup 1) + (parallel [(match_dup 2)]))) + (clobber (match_dup 3))])] { - operands[3] = gen_rtx_REG (<VS_scalar>mode, REGNO (operands[0])); + operands[4] = gen_rtx_REG (<VS_scalar>mode, REGNO (operands[0])); }) ;; Optimize stores to use the ISA 3.0 scalar store instructions (define_insn_and_split "*vsx_extract_<mode>_store_p9" - [(set (match_operand:<VS_scalar> 0 "memory_operand" "=Z") + [(set (match_operand:<VS_scalar> 0 "memory_operand" "=Z,m") (vec_select:<VS_scalar> - (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "<VSX_EX>") - (parallel [(match_operand:QI 2 "const_int_operand" "n")]))) - (clobber (match_scratch:<VS_scalar> 3 "=<VSX_EX>"))] + (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "<VSX_EX>,<VSX_EX>") + (parallel [(match_operand:QI 2 "const_int_operand" "n,n")]))) + (clobber (match_scratch:<VS_scalar> 3 "=<VSX_EX>,&r")) + (clobber (match_scratch:SI 4 "=X,&r"))] "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB && TARGET_VSX_SMALL_INTEGER" "#" "&& reload_completed" - [(set (match_dup 3) - (vec_select:<VS_scalar> - (match_dup 1) - (parallel [(match_dup 2)]))) + [(parallel [(set (match_dup 3) + (vec_select:<VS_scalar> + (match_dup 1) + (parallel [(match_dup 2)]))) + (clobber (match_dup 4))]) (set (match_dup 0) (match_dup 3))]) @@ -2721,13 +2768,13 @@ (define_insn_and_split "*vsx_extract_<mo ;; Variable V16QI/V8HI/V4SI extract (define_insn_and_split "vsx_extract_<mode>_var" - [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,r") + [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,r,r") (unspec:<VS_scalar> - [(match_operand:VSX_EXTRACT_I 1 "input_operand" "v,m") - (match_operand:DI 2 "gpc_reg_operand" "r,r")] + [(match_operand:VSX_EXTRACT_I 1 "input_operand" "wK,v,m") + (match_operand:DI 2 "gpc_reg_operand" "r,r,r")] UNSPEC_VSX_EXTRACT)) - (clobber (match_scratch:DI 3 "=r,&b")) - (clobber (match_scratch:V2DI 4 "=&v,X"))] + (clobber (match_scratch:DI 3 "=r,r,&b")) + (clobber (match_scratch:V2DI 4 "=X,&v,X"))] "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT" "#" "&& reload_completed" @@ -2738,6 +2785,27 @@ (define_insn_and_split "vsx_extract_<mod DONE; }) +(define_insn_and_split "*vsx_extract_<VSX_EXTRACT_I:mode>_<SDI:mode>_var" + [(set (match_operand:SDI 0 "gpc_reg_operand" "=r,r,r") + (zero_extend:SDI + (unspec:<VSX_EXTRACT_I:VS_scalar> + [(match_operand:VSX_EXTRACT_I 1 "input_operand" "wK,v,m") + (match_operand:DI 2 "gpc_reg_operand" "r,r,r")] + UNSPEC_VSX_EXTRACT))) + (clobber (match_scratch:DI 3 "=r,r,&b")) + (clobber (match_scratch:V2DI 4 "=X,&v,X"))] + "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT" + "#" + "&& reload_completed" + [(const_int 0)] +{ + machine_mode smode = <VSX_EXTRACT_I:MODE>mode; + rs6000_split_vec_extract_var (gen_rtx_REG (smode, REGNO (operands[0])), + operands[1], operands[2], + operands[3], operands[4]); + DONE; +}) + ;; VSX_EXTRACT optimizations ;; Optimize double d = (double) vec_extract (vi, <n>) ;; Get the element into the top position and use XVCVSWDP/XVCVUWDP @@ -2839,6 +2907,56 @@ (define_insn_and_split "*vsx_extract_si_ DONE; }) +;; Optimize <type> f = (<ftype>) vec_extract (<vtype>, <n>) +;; Where <ftype> is SFmode, DFmode (and KFmode/TFmode if those types are IEEE +;; 128-bit hardware types) and <vtype> is vector char, vector unsigned char, +;; vector short or vector unsigned short. +(define_insn_and_split "*vsx_ext_<VSX_EXTRACT_I:VS_scalar>_fl_<FL_CONV:mode>" + [(set (match_operand:FL_CONV 0 "gpc_reg_operand" "=<FL_CONV:VSr3>") + (float:FL_CONV + (vec_select:<VSX_EXTRACT_I:VS_scalar> + (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v") + (parallel [(match_operand:QI 2 "const_int_operand" "n")])))) + (clobber (match_scratch:<VSX_EXTRACT_I:VS_scalar> 3 "=v"))] + "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT + && TARGET_P9_VECTOR && TARGET_VSX_SMALL_INTEGER" + "#" + "&& reload_completed" + [(parallel [(set (match_dup 3) + (vec_select:<VSX_EXTRACT_I:VS_scalar> + (match_dup 1) + (parallel [(match_dup 2)]))) + (clobber (scratch:SI))]) + (set (match_dup 4) + (sign_extend:DI (match_dup 3))) + (set (match_dup 0) + (float:<FL_CONV:MODE> (match_dup 4)))] +{ + operands[4] = gen_rtx_REG (DImode, REGNO (operands[3])); +}) + +(define_insn_and_split "*vsx_ext_<VSX_EXTRACT_I:VS_scalar>_ufl_<FL_CONV:mode>" + [(set (match_operand:FL_CONV 0 "gpc_reg_operand" "=<FL_CONV:VSr3>") + (unsigned_float:FL_CONV + (vec_select:<VSX_EXTRACT_I:VS_scalar> + (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v") + (parallel [(match_operand:QI 2 "const_int_operand" "n")])))) + (clobber (match_scratch:<VSX_EXTRACT_I:VS_scalar> 3 "=v"))] + "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT + && TARGET_P9_VECTOR && TARGET_VSX_SMALL_INTEGER" + "#" + "&& reload_completed" + [(parallel [(set (match_dup 3) + (vec_select:<VSX_EXTRACT_I:VS_scalar> + (match_dup 1) + (parallel [(match_dup 2)]))) + (clobber (scratch:SI))]) + (set (match_dup 0) + (float:<FL_CONV:MODE> (match_dup 4)))] +{ + operands[4] = gen_rtx_REG (DImode, REGNO (operands[3])); +}) + ;; V4SI/V8HI/V16QI set operation on ISA 3.0 (define_insn "vsx_set_<mode>_p9" [(set (match_operand:VSX_EXTRACT_I 0 "gpc_reg_operand" "=<VSX_EX>") Index: gcc/testsuite/gcc.target/powerpc/vec-extract.h =================================================================== --- gcc/testsuite/gcc.target/powerpc/vec-extract.h (.../svn+ssh://meiss...@gcc.gnu.org/svn/gcc/trunk/gcc/testsuite/gcc.target/powerpc) (revision 243590) +++ gcc/testsuite/gcc.target/powerpc/vec-extract.h (.../gcc/testsuite/gcc.target/powerpc) (working copy) @@ -2,16 +2,53 @@ #include <stddef.h> #include <altivec.h> +#ifndef RTYPE +#define RTYPE TYPE +#endif + +#ifdef DO_TRACE +#include <stdio.h> + +#define TRACE(STRING, NUM) \ +do \ + { \ + fprintf (stderr, "%s%s: %2d\n", (NUM == 0) ? "\n" : "", \ + STRING, (int)NUM); \ + fflush (stderr); \ + } \ +while (0) + +#ifndef FAIL_FORMAT +#define FAIL_FORMAT "%ld" +#define FAIL_CAST(X) ((long)(X)) +#endif + +#define FAIL(EXP, GOT) \ +do \ + { \ + fprintf (stderr, "Expected: " FAIL_FORMAT ", got " FAIL_FORMAT "\n", \ + FAIL_CAST (EXP), FAIL_CAST (GOT)); \ + fflush (stderr); \ + abort (); \ + } \ +while (0) + +#else +#define TRACE(STRING, NUM) +#define FAIL(EXP, GOT) abort () +#endif + +static void check (RTYPE, RTYPE) __attribute__((__noinline__)); +static vector TYPE deoptimize (vector TYPE) __attribute__((__noinline__)); +static vector TYPE *deoptimize_ptr (vector TYPE *) __attribute__((__noinline__)); + static void -check (TYPE expected, TYPE got) +check (RTYPE expected, RTYPE got) { if (expected != got) - abort (); + FAIL (expected, got); } -static vector TYPE deoptimize (vector TYPE) __attribute__((__noinline__)); -static vector TYPE *deoptimize_ptr (vector TYPE *) __attribute__((__noinline__)); - static vector TYPE deoptimize (vector TYPE a) { @@ -29,116 +66,116 @@ deoptimize_ptr (vector TYPE *p) /* Tests for the normal case of vec_extract where the vector is in a register and returning the result in a register as a return value. */ -TYPE +RTYPE get_auto_n (vector TYPE a, ssize_t n) { - return vec_extract (a, n); + return (RTYPE) vec_extract (a, n); } -TYPE +RTYPE get_auto_0 (vector TYPE a) { - return vec_extract (a, 0); + return (RTYPE) vec_extract (a, 0); } -TYPE +RTYPE get_auto_1 (vector TYPE a) { - return vec_extract (a, 1); + return (RTYPE) vec_extract (a, 1); } #if ELEMENTS >= 4 -TYPE +RTYPE get_auto_2 (vector TYPE a) { - return vec_extract (a, 2); + return (RTYPE) vec_extract (a, 2); } -TYPE +RTYPE get_auto_3 (vector TYPE a) { - return vec_extract (a, 3); + return (RTYPE) vec_extract (a, 3); } #if ELEMENTS >= 8 -TYPE +RTYPE get_auto_4 (vector TYPE a) { - return vec_extract (a, 4); + return (RTYPE) vec_extract (a, 4); } -TYPE +RTYPE get_auto_5 (vector TYPE a) { - return vec_extract (a, 5); + return (RTYPE) vec_extract (a, 5); } -TYPE +RTYPE get_auto_6 (vector TYPE a) { - return vec_extract (a, 6); + return (RTYPE) vec_extract (a, 6); } -TYPE +RTYPE get_auto_7 (vector TYPE a) { - return vec_extract (a, 7); + return (RTYPE) vec_extract (a, 7); } #if ELEMENTS >= 16 -TYPE +RTYPE get_auto_8 (vector TYPE a) { - return vec_extract (a, 8); + return (RTYPE) vec_extract (a, 8); } -TYPE +RTYPE get_auto_9 (vector TYPE a) { - return vec_extract (a, 9); + return (RTYPE) vec_extract (a, 9); } -TYPE +RTYPE get_auto_10 (vector TYPE a) { - return vec_extract (a, 10); + return (RTYPE) vec_extract (a, 10); } -TYPE +RTYPE get_auto_11 (vector TYPE a) { - return vec_extract (a, 11); + return (RTYPE) vec_extract (a, 11); } -TYPE +RTYPE get_auto_12 (vector TYPE a) { - return vec_extract (a, 12); + return (RTYPE) vec_extract (a, 12); } -TYPE +RTYPE get_auto_13 (vector TYPE a) { - return vec_extract (a, 13); + return (RTYPE) vec_extract (a, 13); } -TYPE +RTYPE get_auto_14 (vector TYPE a) { - return vec_extract (a, 14); + return (RTYPE) vec_extract (a, 14); } -TYPE +RTYPE get_auto_15 (vector TYPE a) { - return vec_extract (a, 15); + return (RTYPE) vec_extract (a, 15); } #endif #endif #endif -typedef TYPE (*auto_func_type) (vector TYPE); +typedef RTYPE (*auto_func_type) (vector TYPE); static auto_func_type get_auto_const[] = { get_auto_0, @@ -173,7 +210,10 @@ do_auto (vector TYPE a) size_t i; for (i = 0; i < sizeof (get_auto_const) / sizeof (get_auto_const[0]); i++) - check (get_auto_n (a, i), (get_auto_const[i]) (a)); + { + TRACE ("auto", i); + check (get_auto_n (a, i), (get_auto_const[i]) (a)); + } } @@ -182,115 +222,115 @@ do_auto (vector TYPE a) in the right position to use a scalar store). */ void -get_store_n (TYPE *p, vector TYPE a, ssize_t n) +get_store_n (RTYPE *p, vector TYPE a, ssize_t n) { - *p = vec_extract (a, n); + *p = (RTYPE) vec_extract (a, n); } void -get_store_0 (TYPE *p, vector TYPE a) +get_store_0 (RTYPE *p, vector TYPE a) { - *p = vec_extract (a, 0); + *p = (RTYPE) vec_extract (a, 0); } void -get_store_1 (TYPE *p, vector TYPE a) +get_store_1 (RTYPE *p, vector TYPE a) { - *p = vec_extract (a, 1); + *p = (RTYPE) vec_extract (a, 1); } #if ELEMENTS >= 4 void -get_store_2 (TYPE *p, vector TYPE a) +get_store_2 (RTYPE *p, vector TYPE a) { - *p = vec_extract (a, 2); + *p = (RTYPE) vec_extract (a, 2); } void -get_store_3 (TYPE *p, vector TYPE a) +get_store_3 (RTYPE *p, vector TYPE a) { - *p = vec_extract (a, 3); + *p = (RTYPE) vec_extract (a, 3); } #if ELEMENTS >= 8 void -get_store_4 (TYPE *p, vector TYPE a) +get_store_4 (RTYPE *p, vector TYPE a) { - *p = vec_extract (a, 4); + *p = (RTYPE) vec_extract (a, 4); } void -get_store_5 (TYPE *p, vector TYPE a) +get_store_5 (RTYPE *p, vector TYPE a) { - *p = vec_extract (a, 5); + *p = (RTYPE) vec_extract (a, 5); } void -get_store_6 (TYPE *p, vector TYPE a) +get_store_6 (RTYPE *p, vector TYPE a) { - *p = vec_extract (a, 6); + *p = (RTYPE) vec_extract (a, 6); } void -get_store_7 (TYPE *p, vector TYPE a) +get_store_7 (RTYPE *p, vector TYPE a) { - *p = vec_extract (a, 7); + *p = (RTYPE) vec_extract (a, 7); } #if ELEMENTS >= 16 void -get_store_8 (TYPE *p, vector TYPE a) +get_store_8 (RTYPE *p, vector TYPE a) { - *p = vec_extract (a, 8); + *p = (RTYPE) vec_extract (a, 8); } void -get_store_9 (TYPE *p, vector TYPE a) +get_store_9 (RTYPE *p, vector TYPE a) { - *p = vec_extract (a, 9); + *p = (RTYPE) vec_extract (a, 9); } void -get_store_10 (TYPE *p, vector TYPE a) +get_store_10 (RTYPE *p, vector TYPE a) { - *p = vec_extract (a, 10); + *p = (RTYPE) vec_extract (a, 10); } void -get_store_11 (TYPE *p, vector TYPE a) +get_store_11 (RTYPE *p, vector TYPE a) { - *p = vec_extract (a, 11); + *p = (RTYPE) vec_extract (a, 11); } void -get_store_12 (TYPE *p, vector TYPE a) +get_store_12 (RTYPE *p, vector TYPE a) { - *p = vec_extract (a, 12); + *p = (RTYPE) vec_extract (a, 12); } void -get_store_13 (TYPE *p, vector TYPE a) +get_store_13 (RTYPE *p, vector TYPE a) { - *p = vec_extract (a, 13); + *p = (RTYPE) vec_extract (a, 13); } void -get_store_14 (TYPE *p, vector TYPE a) +get_store_14 (RTYPE *p, vector TYPE a) { - *p = vec_extract (a, 14); + *p = (RTYPE) vec_extract (a, 14); } void -get_store_15 (TYPE *p, vector TYPE a) +get_store_15 (RTYPE *p, vector TYPE a) { - *p = vec_extract (a, 15); + *p = (RTYPE) vec_extract (a, 15); } #endif #endif #endif -typedef void (*store_func_type) (TYPE *, vector TYPE); +typedef void (*store_func_type) (RTYPE *, vector TYPE); static store_func_type get_store_const[] = { get_store_0, @@ -323,10 +363,11 @@ void do_store (vector TYPE a) { size_t i; - TYPE result_var, result_const; + RTYPE result_var, result_const; for (i = 0; i < sizeof (get_store_const) / sizeof (get_store_const[0]); i++) { + TRACE ("store", i); get_store_n (&result_var, a, i); (get_store_const[i]) (&result_const, a); check (result_var, result_const); @@ -337,116 +378,116 @@ do_store (vector TYPE a) /* Tests for vec_extract where the vector comes from memory (the compiler can optimize this by doing a scalar load without having to load the whole vector). */ -TYPE +RTYPE get_pointer_n (vector TYPE *p, ssize_t n) { - return vec_extract (*p, n); + return (RTYPE) vec_extract (*p, n); } -TYPE +RTYPE get_pointer_0 (vector TYPE *p) { - return vec_extract (*p, 0); + return (RTYPE) vec_extract (*p, 0); } -TYPE +RTYPE get_pointer_1 (vector TYPE *p) { - return vec_extract (*p, 1); + return (RTYPE) vec_extract (*p, 1); } #if ELEMENTS >= 4 -TYPE +RTYPE get_pointer_2 (vector TYPE *p) { - return vec_extract (*p, 2); + return (RTYPE) vec_extract (*p, 2); } -TYPE +RTYPE get_pointer_3 (vector TYPE *p) { - return vec_extract (*p, 3); + return (RTYPE) vec_extract (*p, 3); } #if ELEMENTS >= 8 -TYPE +RTYPE get_pointer_4 (vector TYPE *p) { - return vec_extract (*p, 4); + return (RTYPE) vec_extract (*p, 4); } -static TYPE +RTYPE get_pointer_5 (vector TYPE *p) { - return vec_extract (*p, 5); + return (RTYPE) vec_extract (*p, 5); } -TYPE +RTYPE get_pointer_6 (vector TYPE *p) { - return vec_extract (*p, 6); + return (RTYPE) vec_extract (*p, 6); } -TYPE +RTYPE get_pointer_7 (vector TYPE *p) { - return vec_extract (*p, 7); + return (RTYPE) vec_extract (*p, 7); } #if ELEMENTS >= 16 -TYPE +RTYPE get_pointer_8 (vector TYPE *p) { - return vec_extract (*p, 8); + return (RTYPE) vec_extract (*p, 8); } -TYPE +RTYPE get_pointer_9 (vector TYPE *p) { - return vec_extract (*p, 9); + return (RTYPE) vec_extract (*p, 9); } -TYPE +RTYPE get_pointer_10 (vector TYPE *p) { - return vec_extract (*p, 10); + return (RTYPE) vec_extract (*p, 10); } -TYPE +RTYPE get_pointer_11 (vector TYPE *p) { - return vec_extract (*p, 11); + return (RTYPE) vec_extract (*p, 11); } -TYPE +RTYPE get_pointer_12 (vector TYPE *p) { - return vec_extract (*p, 12); + return (RTYPE) vec_extract (*p, 12); } -TYPE +RTYPE get_pointer_13 (vector TYPE *p) { - return vec_extract (*p, 13); + return (RTYPE) vec_extract (*p, 13); } -TYPE +RTYPE get_pointer_14 (vector TYPE *p) { - return vec_extract (*p, 14); + return (RTYPE) vec_extract (*p, 14); } -TYPE +RTYPE get_pointer_15 (vector TYPE *p) { - return vec_extract (*p, 15); + return (RTYPE) vec_extract (*p, 15); } #endif #endif #endif -typedef TYPE (*pointer_func_type) (vector TYPE *); +typedef RTYPE (*pointer_func_type) (vector TYPE *); static pointer_func_type get_pointer_const[] = { get_pointer_0, @@ -481,7 +522,10 @@ do_pointer (vector TYPE *p) size_t i; for (i = 0; i < sizeof (get_pointer_const) / sizeof (get_pointer_const[0]); i++) - check (get_pointer_n (p, i), (get_pointer_const[i]) (p)); + { + TRACE ("pointer", i); + check (get_pointer_n (p, i), (get_pointer_const[i]) (p)); + } } @@ -489,116 +533,116 @@ do_pointer (vector TYPE *p) operation. This is to make sure that if the compiler optimizes vec_extract from memory to be a scalar load, the address is correctly adjusted. */ -TYPE +RTYPE get_indexed_n (vector TYPE *p, size_t x, ssize_t n) { - return vec_extract (p[x], n); + return (RTYPE) vec_extract (p[x], n); } -TYPE +RTYPE get_indexed_0 (vector TYPE *p, size_t x) { - return vec_extract (p[x], 0); + return (RTYPE) vec_extract (p[x], 0); } -TYPE +RTYPE get_indexed_1 (vector TYPE *p, size_t x) { - return vec_extract (p[x], 1); + return (RTYPE) vec_extract (p[x], 1); } #if ELEMENTS >= 4 -TYPE +RTYPE get_indexed_2 (vector TYPE *p, size_t x) { - return vec_extract (p[x], 2); + return (RTYPE) vec_extract (p[x], 2); } -TYPE +RTYPE get_indexed_3 (vector TYPE *p, size_t x) { - return vec_extract (p[x], 3); + return (RTYPE) vec_extract (p[x], 3); } #if ELEMENTS >= 8 -TYPE +RTYPE get_indexed_4 (vector TYPE *p, size_t x) { - return vec_extract (p[x], 4); + return (RTYPE) vec_extract (p[x], 4); } -static TYPE +RTYPE get_indexed_5 (vector TYPE *p, size_t x) { - return vec_extract (p[x], 5); + return (RTYPE) vec_extract (p[x], 5); } -TYPE +RTYPE get_indexed_6 (vector TYPE *p, size_t x) { - return vec_extract (p[x], 6); + return (RTYPE) vec_extract (p[x], 6); } -TYPE +RTYPE get_indexed_7 (vector TYPE *p, size_t x) { - return vec_extract (p[x], 7); + return (RTYPE) vec_extract (p[x], 7); } #if ELEMENTS >= 16 -TYPE +RTYPE get_indexed_8 (vector TYPE *p, size_t x) { - return vec_extract (p[x], 8); + return (RTYPE) vec_extract (p[x], 8); } -TYPE +RTYPE get_indexed_9 (vector TYPE *p, size_t x) { - return vec_extract (p[x], 9); + return (RTYPE) vec_extract (p[x], 9); } -TYPE +RTYPE get_indexed_10 (vector TYPE *p, size_t x) { - return vec_extract (p[x], 10); + return (RTYPE) vec_extract (p[x], 10); } -TYPE +RTYPE get_indexed_11 (vector TYPE *p, size_t x) { - return vec_extract (p[x], 11); + return (RTYPE) vec_extract (p[x], 11); } -TYPE +RTYPE get_indexed_12 (vector TYPE *p, size_t x) { - return vec_extract (p[x], 12); + return (RTYPE) vec_extract (p[x], 12); } -TYPE +RTYPE get_indexed_13 (vector TYPE *p, size_t x) { - return vec_extract (p[x], 13); + return (RTYPE) vec_extract (p[x], 13); } -TYPE +RTYPE get_indexed_14 (vector TYPE *p, size_t x) { - return vec_extract (p[x], 14); + return (RTYPE) vec_extract (p[x], 14); } -TYPE +RTYPE get_indexed_15 (vector TYPE *p, size_t x) { - return vec_extract (p[x], 15); + return (RTYPE) vec_extract (p[x], 15); } #endif #endif #endif -typedef TYPE (*indexed_func_type) (vector TYPE *, size_t); +typedef RTYPE (*indexed_func_type) (vector TYPE *, size_t); static indexed_func_type get_indexed_const[] = { get_indexed_0, @@ -633,7 +677,10 @@ do_indexed (vector TYPE *p, size_t x) size_t i; for (i = 0; i < sizeof (get_indexed_const) / sizeof (get_indexed_const[0]); i++) - check (get_indexed_n (p, x, i), (get_indexed_const[i]) (p, x)); + { + TRACE ("indexed", i); + check (get_indexed_n (p, x, i), (get_indexed_const[i]) (p, x)); + } } @@ -641,116 +688,116 @@ do_indexed (vector TYPE *p, size_t x) with a pointer and a constant offset. This will occur in ISA 3.0 which added d-form memory addressing for vectors. */ -TYPE +RTYPE get_ptr_plus1_n (vector TYPE *p, ssize_t n) { - return vec_extract (p[1], n); + return (RTYPE) vec_extract (p[1], n); } -TYPE +RTYPE get_ptr_plus1_0 (vector TYPE *p) { - return vec_extract (p[1], 0); + return (RTYPE) vec_extract (p[1], 0); } -TYPE +RTYPE get_ptr_plus1_1 (vector TYPE *p) { - return vec_extract (p[1], 1); + return (RTYPE) vec_extract (p[1], 1); } #if ELEMENTS >= 4 -TYPE +RTYPE get_ptr_plus1_2 (vector TYPE *p) { - return vec_extract (p[1], 2); + return (RTYPE) vec_extract (p[1], 2); } -TYPE +RTYPE get_ptr_plus1_3 (vector TYPE *p) { - return vec_extract (p[1], 3); + return (RTYPE) vec_extract (p[1], 3); } #if ELEMENTS >= 8 -TYPE +RTYPE get_ptr_plus1_4 (vector TYPE *p) { - return vec_extract (p[1], 4); + return (RTYPE) vec_extract (p[1], 4); } -static TYPE +RTYPE get_ptr_plus1_5 (vector TYPE *p) { - return vec_extract (p[1], 5); + return (RTYPE) vec_extract (p[1], 5); } -TYPE +RTYPE get_ptr_plus1_6 (vector TYPE *p) { - return vec_extract (p[1], 6); + return (RTYPE) vec_extract (p[1], 6); } -TYPE +RTYPE get_ptr_plus1_7 (vector TYPE *p) { - return vec_extract (p[1], 7); + return (RTYPE) vec_extract (p[1], 7); } #if ELEMENTS >= 16 -TYPE +RTYPE get_ptr_plus1_8 (vector TYPE *p) { - return vec_extract (p[1], 8); + return (RTYPE) vec_extract (p[1], 8); } -TYPE +RTYPE get_ptr_plus1_9 (vector TYPE *p) { - return vec_extract (p[1], 9); + return (RTYPE) vec_extract (p[1], 9); } -TYPE +RTYPE get_ptr_plus1_10 (vector TYPE *p) { - return vec_extract (p[1], 10); + return (RTYPE) vec_extract (p[1], 10); } -TYPE +RTYPE get_ptr_plus1_11 (vector TYPE *p) { - return vec_extract (p[1], 11); + return (RTYPE) vec_extract (p[1], 11); } -TYPE +RTYPE get_ptr_plus1_12 (vector TYPE *p) { - return vec_extract (p[1], 12); + return (RTYPE) vec_extract (p[1], 12); } -TYPE +RTYPE get_ptr_plus1_13 (vector TYPE *p) { - return vec_extract (p[1], 13); + return (RTYPE) vec_extract (p[1], 13); } -TYPE +RTYPE get_ptr_plus1_14 (vector TYPE *p) { - return vec_extract (p[1], 14); + return (RTYPE) vec_extract (p[1], 14); } -TYPE +RTYPE get_ptr_plus1_15 (vector TYPE *p) { - return vec_extract (p[1], 15); + return (RTYPE) vec_extract (p[1], 15); } #endif #endif #endif -typedef TYPE (*pointer_func_type) (vector TYPE *); +typedef RTYPE (*pointer_func_type) (vector TYPE *); static pointer_func_type get_ptr_plus1_const[] = { get_ptr_plus1_0, @@ -785,7 +832,10 @@ do_ptr_plus1 (vector TYPE *p) size_t i; for (i = 0; i < sizeof (get_ptr_plus1_const) / sizeof (get_ptr_plus1_const[0]); i++) - check (get_ptr_plus1_n (p, i), (get_ptr_plus1_const[i]) (p)); + { + TRACE ("ptr_plus1", i); + check (get_ptr_plus1_n (p, i), (get_ptr_plus1_const[i]) (p)); + } } @@ -793,116 +843,116 @@ do_ptr_plus1 (vector TYPE *p) static vector TYPE s; -TYPE +RTYPE get_static_n (ssize_t n) { - return vec_extract (s, n); + return (RTYPE) vec_extract (s, n); } -TYPE +RTYPE get_static_0 (void) { - return vec_extract (s, 0); + return (RTYPE) vec_extract (s, 0); } -TYPE +RTYPE get_static_1 (void) { - return vec_extract (s, 1); + return (RTYPE) vec_extract (s, 1); } #if ELEMENTS >= 4 -TYPE +RTYPE get_static_2 (void) { - return vec_extract (s, 2); + return (RTYPE) vec_extract (s, 2); } -TYPE +RTYPE get_static_3 (void) { - return vec_extract (s, 3); + return (RTYPE) vec_extract (s, 3); } #if ELEMENTS >= 8 -TYPE +RTYPE get_static_4 (void) { - return vec_extract (s, 4); + return (RTYPE) vec_extract (s, 4); } -TYPE +RTYPE get_static_5 (void) { - return vec_extract (s, 5); + return (RTYPE) vec_extract (s, 5); } -TYPE +RTYPE get_static_6 (void) { - return vec_extract (s, 6); + return (RTYPE) vec_extract (s, 6); } -TYPE +RTYPE get_static_7 (void) { - return vec_extract (s, 7); + return (RTYPE) vec_extract (s, 7); } #if ELEMENTS >= 16 -TYPE +RTYPE get_static_8 (void) { - return vec_extract (s, 8); + return (RTYPE) vec_extract (s, 8); } -TYPE +RTYPE get_static_9 (void) { - return vec_extract (s, 9); + return (RTYPE) vec_extract (s, 9); } -TYPE +RTYPE get_static_10 (void) { - return vec_extract (s, 10); + return (RTYPE) vec_extract (s, 10); } -TYPE +RTYPE get_static_11 (void) { - return vec_extract (s, 11); + return (RTYPE) vec_extract (s, 11); } -TYPE +RTYPE get_static_12 (void) { - return vec_extract (s, 12); + return (RTYPE) vec_extract (s, 12); } -TYPE +RTYPE get_static_13 (void) { - return vec_extract (s, 13); + return (RTYPE) vec_extract (s, 13); } -TYPE +RTYPE get_static_14 (void) { - return vec_extract (s, 14); + return (RTYPE) vec_extract (s, 14); } -TYPE +RTYPE get_static_15 (void) { - return vec_extract (s, 15); + return (RTYPE) vec_extract (s, 15); } #endif #endif #endif -typedef TYPE (*static_func_type) (void); +typedef RTYPE (*static_func_type) (void); static static_func_type get_static_const[] = { get_static_0, @@ -937,7 +987,10 @@ do_static (void) size_t i; for (i = 0; i < sizeof (get_static_const) / sizeof (get_static_const[0]); i++) - check (get_static_n (i), (get_static_const[i]) ()); + { + TRACE ("static", i); + check (get_static_n (i), (get_static_const[i]) ()); + } } @@ -945,116 +998,116 @@ do_static (void) vector TYPE g; -TYPE +RTYPE get_global_n (ssize_t n) { - return vec_extract (g, n); + return (RTYPE) vec_extract (g, n); } -TYPE +RTYPE get_global_0 (void) { - return vec_extract (g, 0); + return (RTYPE) vec_extract (g, 0); } -TYPE +RTYPE get_global_1 (void) { - return vec_extract (g, 1); + return (RTYPE) vec_extract (g, 1); } #if ELEMENTS >= 4 -TYPE +RTYPE get_global_2 (void) { - return vec_extract (g, 2); + return (RTYPE) vec_extract (g, 2); } -TYPE +RTYPE get_global_3 (void) { - return vec_extract (g, 3); + return (RTYPE) vec_extract (g, 3); } #if ELEMENTS >= 8 -TYPE +RTYPE get_global_4 (void) { - return vec_extract (g, 4); + return (RTYPE) vec_extract (g, 4); } -TYPE +RTYPE get_global_5 (void) { - return vec_extract (g, 5); + return (RTYPE) vec_extract (g, 5); } -TYPE +RTYPE get_global_6 (void) { - return vec_extract (g, 6); + return (RTYPE) vec_extract (g, 6); } -TYPE +RTYPE get_global_7 (void) { - return vec_extract (g, 7); + return (RTYPE) vec_extract (g, 7); } #if ELEMENTS >= 16 -TYPE +RTYPE get_global_8 (void) { - return vec_extract (g, 8); + return (RTYPE) vec_extract (g, 8); } -TYPE +RTYPE get_global_9 (void) { - return vec_extract (g, 9); + return (RTYPE) vec_extract (g, 9); } -TYPE +RTYPE get_global_10 (void) { - return vec_extract (g, 10); + return (RTYPE) vec_extract (g, 10); } -TYPE +RTYPE get_global_11 (void) { - return vec_extract (g, 11); + return (RTYPE) vec_extract (g, 11); } -TYPE +RTYPE get_global_12 (void) { - return vec_extract (g, 12); + return (RTYPE) vec_extract (g, 12); } -TYPE +RTYPE get_global_13 (void) { - return vec_extract (g, 13); + return (RTYPE) vec_extract (g, 13); } -TYPE +RTYPE get_global_14 (void) { - return vec_extract (g, 14); + return (RTYPE) vec_extract (g, 14); } -TYPE +RTYPE get_global_15 (void) { - return vec_extract (g, 15); + return (RTYPE) vec_extract (g, 15); } #endif #endif #endif -typedef TYPE (*global_func_type) (void); +typedef RTYPE (*global_func_type) (void); static global_func_type get_global_const[] = { get_global_0, @@ -1089,7 +1142,10 @@ do_global (void) size_t i; for (i = 0; i < sizeof (get_global_const) / sizeof (get_global_const[0]); i++) - check (get_global_n (i), (get_global_const[i]) ()); + { + TRACE ("global", i); + check (get_global_n (i), (get_global_const[i]) ()); + } } Index: gcc/testsuite/gcc.target/powerpc/vec-extract-v2df.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/vec-extract-v2df.c (.../svn+ssh://meiss...@gcc.gnu.org/svn/gcc/trunk/gcc/testsuite/gcc.target/powerpc) (revision 243590) +++ gcc/testsuite/gcc.target/powerpc/vec-extract-v2df.c (.../gcc/testsuite/gcc.target/powerpc) (working copy) @@ -3,6 +3,8 @@ /* { dg-options "-O2 -mvsx" } */ #define TYPE double +#define FAIL_FORMAT "%g" +#define FAIL_CAST(X) ((double)(X)) #define ELEMENTS 2 #define INITIAL { 10.0, -20.0 } Index: gcc/testsuite/gcc.target/powerpc/vec-extract-v4sf.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/vec-extract-v4sf.c (.../svn+ssh://meiss...@gcc.gnu.org/svn/gcc/trunk/gcc/testsuite/gcc.target/powerpc) (revision 243590) +++ gcc/testsuite/gcc.target/powerpc/vec-extract-v4sf.c (.../gcc/testsuite/gcc.target/powerpc) (working copy) @@ -3,6 +3,8 @@ /* { dg-options "-O2 -mvsx" } */ #define TYPE float +#define FAIL_FORMAT "%g" +#define FAIL_CAST(X) ((double)(X)) #define ELEMENTS 4 #define INITIAL { 10.0f, -20.0f, 30.0f, -40.0f } Index: gcc/testsuite/gcc.target/powerpc/vec-extract-v4si-df.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/vec-extract-v4si-df.c (.../svn+ssh://meiss...@gcc.gnu.org/svn/gcc/trunk/gcc/testsuite/gcc.target/powerpc) (revision 0) +++ gcc/testsuite/gcc.target/powerpc/vec-extract-v4si-df.c (.../gcc/testsuite/gcc.target/powerpc) (revision 243608) @@ -0,0 +1,12 @@ +/* { dg-do run { target { powerpc*-*-* } } } */ +/* { dg-require-effective-target vsx_hw } */ +/* { dg-options "-O2 -mvsx" } */ + +#define TYPE int +#define RTYPE double +#define FAIL_FORMAT "%g" +#define FAIL_CAST(X) ((double)(X)) +#define ELEMENTS 4 +#define INITIAL { 10, -20, 30, -40 } + +#include "vec-extract.h" Index: gcc/testsuite/gcc.target/powerpc/vec-extract-v4siu-df.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/vec-extract-v4siu-df.c (.../svn+ssh://meiss...@gcc.gnu.org/svn/gcc/trunk/gcc/testsuite/gcc.target/powerpc) (revision 0) +++ gcc/testsuite/gcc.target/powerpc/vec-extract-v4siu-df.c (.../gcc/testsuite/gcc.target/powerpc) (revision 243608) @@ -0,0 +1,12 @@ +/* { dg-do run { target { powerpc*-*-* } } } */ +/* { dg-require-effective-target vsx_hw } */ +/* { dg-options "-O2 -mvsx" } */ + +#define TYPE unsigned int +#define RTYPE double +#define FAIL_FORMAT "%g" +#define FAIL_CAST(X) ((double)(X)) +#define ELEMENTS 4 +#define INITIAL { 1, 2, 0xff03, 0xff04 } + +#include "vec-extract.h" Index: gcc/testsuite/gcc.target/powerpc/vec-extract-v8hi-df.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/vec-extract-v8hi-df.c (.../svn+ssh://meiss...@gcc.gnu.org/svn/gcc/trunk/gcc/testsuite/gcc.target/powerpc) (revision 0) +++ gcc/testsuite/gcc.target/powerpc/vec-extract-v8hi-df.c (.../gcc/testsuite/gcc.target/powerpc) (revision 243608) @@ -0,0 +1,12 @@ +/* { dg-do run { target { powerpc*-*-* } } } */ +/* { dg-require-effective-target vsx_hw } */ +/* { dg-options "-O2 -mvsx" } */ + +#define TYPE short +#define RTYPE double +#define FAIL_FORMAT "%g" +#define FAIL_CAST(X) ((double)(X)) +#define ELEMENTS 8 +#define INITIAL { 10, -20, 30, -40, 50, -60, 70, 80 } + +#include "vec-extract.h" Index: gcc/testsuite/gcc.target/powerpc/vec-extract-v8hiu-df.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/vec-extract-v8hiu-df.c (.../svn+ssh://meiss...@gcc.gnu.org/svn/gcc/trunk/gcc/testsuite/gcc.target/powerpc) (revision 0) +++ gcc/testsuite/gcc.target/powerpc/vec-extract-v8hiu-df.c (.../gcc/testsuite/gcc.target/powerpc) (revision 243608) @@ -0,0 +1,12 @@ +/* { dg-do run { target { powerpc*-*-* } } } */ +/* { dg-require-effective-target vsx_hw } */ +/* { dg-options "-O2 -mvsx" } */ + +#define TYPE unsigned short +#define RTYPE double +#define FAIL_FORMAT "%g" +#define FAIL_CAST(X) ((double)(X)) +#define ELEMENTS 8 +#define INITIAL { 1, 2, 3, 4, 0xf1, 0xf2, 0xf3, 0xf4 } + +#include "vec-extract.h" Index: gcc/testsuite/gcc.target/powerpc/vec-extract-v16qi-df.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/vec-extract-v16qi-df.c (.../svn+ssh://meiss...@gcc.gnu.org/svn/gcc/trunk/gcc/testsuite/gcc.target/powerpc) (revision 0) +++ gcc/testsuite/gcc.target/powerpc/vec-extract-v16qi-df.c (.../gcc/testsuite/gcc.target/powerpc) (revision 243608) @@ -0,0 +1,14 @@ +/* { dg-do run { target { powerpc*-*-* } } } */ +/* { dg-require-effective-target vsx_hw } */ +/* { dg-options "-O2 -mvsx" } */ + +#define TYPE signed char +#define RTYPE double +#define FAIL_FORMAT "%g" +#define FAIL_CAST(X) ((double)(X)) +#define ELEMENTS 16 +#define INITIAL \ + { 10, -20, 30, -40, 50, -60, 70, -80, \ + 90, -100, 110, -120, 30, -40, 50, -60 } + +#include "vec-extract.h" Index: gcc/testsuite/gcc.target/powerpc/vec-extract-v16qiu-df.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/vec-extract-v16qiu-df.c (.../svn+ssh://meiss...@gcc.gnu.org/svn/gcc/trunk/gcc/testsuite/gcc.target/powerpc) (revision 0) +++ gcc/testsuite/gcc.target/powerpc/vec-extract-v16qiu-df.c (.../gcc/testsuite/gcc.target/powerpc) (revision 243608) @@ -0,0 +1,13 @@ +/* { dg-do run { target { powerpc*-*-* } } } */ +/* { dg-require-effective-target vsx_hw } */ +/* { dg-options "-O2 -mvsx" } */ + +#define TYPE unsigned char +#define RTYPE double +#define FAIL_FORMAT "%g" +#define FAIL_CAST(X) ((double)(X)) +#define ELEMENTS 16 +#define INITIAL \ + { 1, 2, 3, 4, 5, 6, 7, 8, 240, 241, 242, 243, 244, 245, 246, 247 } + +#include "vec-extract.h" Index: gcc/testsuite/gcc.target/powerpc/p9-extract-1.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/p9-extract-1.c (.../svn+ssh://meiss...@gcc.gnu.org/svn/gcc/trunk/gcc/testsuite/gcc.target/powerpc) (revision 243590) +++ gcc/testsuite/gcc.target/powerpc/p9-extract-1.c (.../gcc/testsuite/gcc.target/powerpc) (working copy) @@ -3,24 +3,107 @@ /* { dg-require-effective-target powerpc_p9vector_ok } */ /* { dg-options "-mcpu=power9 -O2" } */ +/* Test to make sure VEXTU{B,H,W}{L,R}X is generated for various vector extract + operations for ISA 3.0 (-mcpu=power9). In addition, make sure that neither + of the the the old methods of doing vector extracts are done either by + explict stores to the stack or by using direct move instructions. */ + #include <altivec.h> -int extract_int_0 (vector int a) { return vec_extract (a, 0); } -int extract_int_3 (vector int a) { return vec_extract (a, 3); } +int +extract_int_0 (vector int a) +{ + int b = vec_extract (a, 0); + return b; +} + +int +extract_int_3 (vector int a) +{ + int b = vec_extract (a, 3); + return b; +} + +unsigned int +extract_uint_0 (vector unsigned int a) +{ + unsigned int b = vec_extract (a, 0); + return b; +} + +unsigned int +extract_uint_3 (vector unsigned int a) +{ + unsigned int b = vec_extract (a, 3); + return b; +} + +short +extract_short_0 (vector short a) +{ + short b = vec_extract (a, 0); + return b; +} + +short +extract_short_7 (vector short a) +{ + short b = vec_extract (a, 7); + return b; +} + +unsigned short +extract_ushort_0 (vector unsigned short a) +{ + unsigned short b = vec_extract (a, 0); + return b; +} + +unsigned short +extract_ushort_7 (vector unsigned short a) +{ + unsigned short b = vec_extract (a, 7); + return b; +} + +signed char +extract_schar_0 (vector signed char a) +{ + signed char b = vec_extract (a, 0); + return b; +} + +signed char +extract_schar_15 (vector signed char a) +{ + signed char b = vec_extract (a, 15); + return b; +} -int extract_short_0 (vector short a) { return vec_extract (a, 0); } -int extract_short_3 (vector short a) { return vec_extract (a, 7); } +unsigned char +extract_uchar_0 (vector unsigned char a) +{ + unsigned char b = vec_extract (a, 0); + return b; +} -int extract_schar_0 (vector signed char a) { return vec_extract (a, 0); } -int extract_schar_3 (vector signed char a) { return vec_extract (a, 15); } +unsigned char +extract_uchar_15 (vector unsigned char a) +{ + signed char b = vec_extract (a, 15); + return b; +} -/* { dg-final { scan-assembler "vextractub" } } */ -/* { dg-final { scan-assembler "vextractuh" } } */ -/* { dg-final { scan-assembler "xxextractuw" } } */ -/* { dg-final { scan-assembler "mfvsr" } } */ -/* { dg-final { scan-assembler-not "stxvd2x" } } */ -/* { dg-final { scan-assembler-not "stxv" } } */ -/* { dg-final { scan-assembler-not "lwa" } } */ -/* { dg-final { scan-assembler-not "lwz" } } */ -/* { dg-final { scan-assembler-not "lha" } } */ -/* { dg-final { scan-assembler-not "lhz" } } */ +/* { dg-final { scan-assembler "vextub\[lr\]x " } } */ +/* { dg-final { scan-assembler "vextuh\[lr\]x " } } */ +/* { dg-final { scan-assembler "vextuw\[lr\]x " } } */ +/* { dg-final { scan-assembler "extsb " } } */ +/* { dg-final { scan-assembler "extsh " } } */ +/* { dg-final { scan-assembler "extsw " } } */ +/* { dg-final { scan-assembler-not "m\[ft\]vsr" } } */ +/* { dg-final { scan-assembler-not "stxvd2x " } } */ +/* { dg-final { scan-assembler-not "stxv " } } */ +/* { dg-final { scan-assembler-not "lwa " } } */ +/* { dg-final { scan-assembler-not "lwz " } } */ +/* { dg-final { scan-assembler-not "lha " } } */ +/* { dg-final { scan-assembler-not "lhz " } } */ Index: gcc/testsuite/gcc.target/powerpc/p9-extract-3.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/p9-extract-3.c (.../svn+ssh://meiss...@gcc.gnu.org/svn/gcc/trunk/gcc/testsuite/gcc.target/powerpc) (revision 0) +++ gcc/testsuite/gcc.target/powerpc/p9-extract-3.c (.../gcc/testsuite/gcc.target/powerpc) (revision 243608) @@ -0,0 +1,108 @@ +/* { dg-do compile { target { powerpc64*-*-* && lp64 } } } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */ +/* { dg-require-effective-target powerpc_p9vector_ok } */ +/* { dg-options "-mcpu=power9 -O2" } */ + +/* Test that under ISA 3.0 (-mcpu=power9), the compiler optimizes conversion to + double after a vec_extract to use the VEXTRACTU{B,H} or XXEXTRACTUW + instructions (which leaves the result in a vector register), and not the + VEXTU{B,H,W}{L,R}X instructions (which needs a direct move to do the floating + point conversion). */ + +#include <altivec.h> + +double +fpcvt_int_0 (vector int a) +{ + int b = vec_extract (a, 0); + return (double)b; +} + +double +fpcvt_int_3 (vector int a) +{ + int b = vec_extract (a, 3); + return (double)b; +} + +double +fpcvt_uint_0 (vector unsigned int a) +{ + unsigned int b = vec_extract (a, 0); + return (double)b; +} + +double +fpcvt_uint_3 (vector unsigned int a) +{ + unsigned int b = vec_extract (a, 3); + return (double)b; +} + +double +fpcvt_short_0 (vector short a) +{ + short b = vec_extract (a, 0); + return (double)b; +} + +double +fpcvt_short_7 (vector short a) +{ + short b = vec_extract (a, 7); + return (double)b; +} + +double +fpcvt_ushort_0 (vector unsigned short a) +{ + unsigned short b = vec_extract (a, 0); + return (double)b; +} + +double +fpcvt_ushort_7 (vector unsigned short a) +{ + unsigned short b = vec_extract (a, 7); + return (double)b; +} + +double +fpcvt_schar_0 (vector signed char a) +{ + signed char b = vec_extract (a, 0); + return (double)b; +} + +double +fpcvt_schar_15 (vector signed char a) +{ + signed char b = vec_extract (a, 15); + return (double)b; +} + +double +fpcvt_uchar_0 (vector unsigned char a) +{ + unsigned char b = vec_extract (a, 0); + return (double)b; +} + +double +fpcvt_uchar_15 (vector unsigned char a) +{ + signed char b = vec_extract (a, 15); + return (double)b; +} + +/* { dg-final { scan-assembler "vextractu\[bh\] " } } */ +/* { dg-final { scan-assembler "vexts\[bh\]2d " } } */ +/* { dg-final { scan-assembler "vspltw " } } */ +/* { dg-final { scan-assembler "xscvsxddp " } } */ +/* { dg-final { scan-assembler "xvcvsxwdp " } } */ +/* { dg-final { scan-assembler "xvcvuxwdp " } } */ +/* { dg-final { scan-assembler-not "exts\[bhw\] " } } */ +/* { dg-final { scan-assembler-not "stxv" } } */ +/* { dg-final { scan-assembler-not "m\[ft\]vsrd " } } */ +/* { dg-final { scan-assembler-not "m\[ft\]vsrw\[az\] " } } */ +/* { dg-final { scan-assembler-not "l\[hw\]\[az\] " } } */