On Tue, Nov 08, 2011 at 10:03:23AM +0200, Ira Rosen wrote: > The second option would be nicer. ...
Thanks. Here is an updated patch, will bootstrap/regtest it now. Ok for trunk if it passes? 2011-11-08 Jakub Jelinek <ja...@redhat.com> * tree-vect-stmts.c (vectorizable_call): Add SLP_NODE argument. Handle vectorization of SLP calls. (vect_analyze_stmt): Adjust caller, add call to it for SLP too. (vect_transform_stmt): Adjust vectorizable_call caller, remove assertion. * tree-vect-slp.c (vect_get_and_check_slp_defs): For calls start with op_idx 3. (vect_build_slp_tree): Allow CALL_EXPR. * lib/target-supports.exp (check_effective_target_vect_call_sqrtf, check_effective_target_vect_call_copysignf, check_effective_target_vect_call_lrint): New procedures. * gcc.dg/vect/vect.exp: Run fast-math-bb-slp* tests using $VECT_SLP_CFLAGS with -ffast-math. * gcc.dg/vect/fast-math-vect-call-1.c: New test. * gcc.dg/vect/fast-math-vect-call-2.c: New test. * gcc.dg/vect/fast-math-bb-slp-call-1.c: New test. * gcc.dg/vect/fast-math-bb-slp-call-2.c: New test. --- gcc/tree-vect-slp.c.jj 2011-11-07 20:32:03.000000000 +0100 +++ gcc/tree-vect-slp.c 2011-11-08 09:28:12.000000000 +0100 @@ -202,7 +202,10 @@ vect_get_and_check_slp_defs (loop_vec_in loop = LOOP_VINFO_LOOP (loop_vinfo); if (is_gimple_call (stmt)) - number_of_oprnds = gimple_call_num_args (stmt); + { + number_of_oprnds = gimple_call_num_args (stmt); + op_idx = 3; + } else if (is_gimple_assign (stmt)) { number_of_oprnds = gimple_num_ops (stmt) - 1; @@ -558,7 +561,25 @@ vect_build_slp_tree (loop_vec_info loop_ ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype); if (is_gimple_call (stmt)) - rhs_code = CALL_EXPR; + { + rhs_code = CALL_EXPR; + if (gimple_call_internal_p (stmt) + || gimple_call_tail_p (stmt) + || gimple_call_noreturn_p (stmt) + || !gimple_call_nothrow_p (stmt) + || gimple_call_chain (stmt)) + { + if (vect_print_dump_info (REPORT_SLP)) + { + fprintf (vect_dump, + "Build SLP failed: unsupported call type "); + print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); + } + + vect_free_oprnd_info (&oprnds_info, true); + return false; + } + } else rhs_code = gimple_assign_rhs_code (stmt); @@ -653,6 +674,27 @@ vect_build_slp_tree (loop_vec_info loop_ vect_free_oprnd_info (&oprnds_info, true); return false; } + + if (rhs_code == CALL_EXPR) + { + gimple first_stmt = VEC_index (gimple, stmts, 0); + if (gimple_call_num_args (stmt) != nops + || !operand_equal_p (gimple_call_fn (first_stmt), + gimple_call_fn (stmt), 0) + || gimple_call_fntype (first_stmt) + != gimple_call_fntype (stmt)) + { + if (vect_print_dump_info (REPORT_SLP)) + { + fprintf (vect_dump, + "Build SLP failed: different calls in "); + print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); + } + + vect_free_oprnd_info (&oprnds_info, true); + return false; + } + } } /* Strided store or load. */ @@ -786,7 +828,8 @@ vect_build_slp_tree (loop_vec_info loop_ /* Not memory operation. */ if (TREE_CODE_CLASS (rhs_code) != tcc_binary && TREE_CODE_CLASS (rhs_code) != tcc_unary - && rhs_code != COND_EXPR) + && rhs_code != COND_EXPR + && rhs_code != CALL_EXPR) { if (vect_print_dump_info (REPORT_SLP)) { --- gcc/tree-vect-stmts.c.jj 2011-11-07 20:32:09.000000000 +0100 +++ gcc/tree-vect-stmts.c 2011-11-08 09:28:55.000000000 +0100 @@ -1521,7 +1521,8 @@ vectorizable_function (gimple call, tree Return FALSE if not a vectorizable STMT, TRUE otherwise. */ static bool -vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt) +vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, + slp_tree slp_node) { tree vec_dest; tree scalar_dest; @@ -1532,6 +1533,7 @@ vectorizable_call (gimple stmt, gimple_s int nunits_in; int nunits_out; loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); + bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); tree fndecl, new_temp, def, rhs_type; gimple def_stmt; enum vect_def_type dt[3] @@ -1543,19 +1545,12 @@ vectorizable_call (gimple stmt, gimple_s size_t i, nargs; tree lhs; - /* FORNOW: unsupported in basic block SLP. */ - gcc_assert (loop_vinfo); - - if (!STMT_VINFO_RELEVANT_P (stmt_info)) + if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) return false; if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def) return false; - /* FORNOW: SLP not supported. */ - if (STMT_SLP_TYPE (stmt_info)) - return false; - /* Is STMT a vectorizable call? */ if (!is_gimple_call (stmt)) return false; @@ -1596,7 +1591,7 @@ vectorizable_call (gimple stmt, gimple_s if (!rhs_type) rhs_type = TREE_TYPE (op); - if (!vect_is_simple_use_1 (op, loop_vinfo, NULL, + if (!vect_is_simple_use_1 (op, loop_vinfo, bb_vinfo, &def_stmt, &def, &dt[i], &opvectype)) { if (vect_print_dump_info (REPORT_DETAILS)) @@ -1658,7 +1653,9 @@ vectorizable_call (gimple stmt, gimple_s gcc_assert (!gimple_vuse (stmt)); - if (modifier == NARROW) + if (slp_node || PURE_SLP_STMT (stmt_info)) + ncopies = 1; + else if (modifier == NARROW) ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out; else ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in; @@ -1697,6 +1694,50 @@ vectorizable_call (gimple stmt, gimple_s else VEC_truncate (tree, vargs, 0); + if (slp_node) + { + VEC (slp_void_p, heap) *vec_defs + = VEC_alloc (slp_void_p, heap, nargs); + VEC (tree, heap) *vec_oprnds0; + + for (i = 0; i < nargs; i++) + VEC_quick_push (tree, vargs, gimple_call_arg (stmt, i)); + vect_get_slp_defs (vargs, slp_node, &vec_defs, -1); + vec_oprnds0 + = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 0); + + /* Arguments are ready. Create the new vector stmt. */ + FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vec_oprnd0) + { + size_t k; + for (k = 0; k < nargs; k++) + { + VEC (tree, heap) *vec_oprndsk + = (VEC (tree, heap) *) + VEC_index (slp_void_p, vec_defs, k); + VEC_replace (tree, vargs, k, + VEC_index (tree, vec_oprndsk, i)); + } + new_stmt = gimple_build_call_vec (fndecl, vargs); + new_temp = make_ssa_name (vec_dest, new_stmt); + gimple_call_set_lhs (new_stmt, new_temp); + vect_finish_stmt_generation (stmt, new_stmt, gsi); + mark_symbols_for_renaming (new_stmt); + VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), + new_stmt); + } + + for (i = 0; i < nargs; i++) + { + VEC (tree, heap) *vec_oprndsi + = (VEC (tree, heap) *) + VEC_index (slp_void_p, vec_defs, i); + VEC_free (tree, heap, vec_oprndsi); + } + VEC_free (slp_void_p, heap, vec_defs); + continue; + } + for (i = 0; i < nargs; i++) { op = gimple_call_arg (stmt, i); @@ -1739,6 +1780,54 @@ vectorizable_call (gimple stmt, gimple_s else VEC_truncate (tree, vargs, 0); + if (slp_node) + { + VEC (slp_void_p, heap) *vec_defs + = VEC_alloc (slp_void_p, heap, nargs); + VEC (tree, heap) *vec_oprnds0; + + for (i = 0; i < nargs; i++) + VEC_quick_push (tree, vargs, gimple_call_arg (stmt, i)); + vect_get_slp_defs (vargs, slp_node, &vec_defs, -1); + vec_oprnds0 + = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 0); + + /* Arguments are ready. Create the new vector stmt. */ + for (i = 0; VEC_iterate (tree, vec_oprnds0, i, vec_oprnd0); + i += 2) + { + size_t k; + VEC_truncate (tree, vargs, 0); + for (k = 0; k < nargs; k++) + { + VEC (tree, heap) *vec_oprndsk + = (VEC (tree, heap) *) + VEC_index (slp_void_p, vec_defs, k); + VEC_quick_push (tree, vargs, + VEC_index (tree, vec_oprndsk, i)); + VEC_quick_push (tree, vargs, + VEC_index (tree, vec_oprndsk, i + 1)); + } + new_stmt = gimple_build_call_vec (fndecl, vargs); + new_temp = make_ssa_name (vec_dest, new_stmt); + gimple_call_set_lhs (new_stmt, new_temp); + vect_finish_stmt_generation (stmt, new_stmt, gsi); + mark_symbols_for_renaming (new_stmt); + VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), + new_stmt); + } + + for (i = 0; i < nargs; i++) + { + VEC (tree, heap) *vec_oprndsi + = (VEC (tree, heap) *) + VEC_index (slp_void_p, vec_defs, i); + VEC_free (tree, heap, vec_oprndsi); + } + VEC_free (slp_void_p, heap, vec_defs); + continue; + } + for (i = 0; i < nargs; i++) { op = gimple_call_arg (stmt, i); @@ -1804,7 +1893,8 @@ vectorizable_call (gimple stmt, gimple_s lhs = gimple_call_lhs (stmt); new_stmt = gimple_build_assign (lhs, build_zero_cst (type)); set_vinfo_for_stmt (new_stmt, stmt_info); - set_vinfo_for_stmt (stmt, NULL); + if (!slp_node) + set_vinfo_for_stmt (stmt, NULL); STMT_VINFO_STMT (stmt_info) = new_stmt; gsi_replace (gsi, new_stmt, false); SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt; @@ -5265,7 +5355,7 @@ vect_analyze_stmt (gimple stmt, bool *ne || vectorizable_operation (stmt, NULL, NULL, NULL) || vectorizable_assignment (stmt, NULL, NULL, NULL) || vectorizable_load (stmt, NULL, NULL, NULL, NULL) - || vectorizable_call (stmt, NULL, NULL) + || vectorizable_call (stmt, NULL, NULL, NULL) || vectorizable_store (stmt, NULL, NULL, NULL) || vectorizable_reduction (stmt, NULL, NULL, NULL) || vectorizable_condition (stmt, NULL, NULL, NULL, 0, NULL)); @@ -5277,6 +5367,7 @@ vect_analyze_stmt (gimple stmt, bool *ne || vectorizable_operation (stmt, NULL, NULL, node) || vectorizable_assignment (stmt, NULL, NULL, node) || vectorizable_load (stmt, NULL, NULL, node, NULL) + || vectorizable_call (stmt, NULL, NULL, node) || vectorizable_store (stmt, NULL, NULL, node) || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)); } @@ -5391,8 +5482,7 @@ vect_transform_stmt (gimple stmt, gimple break; case call_vec_info_type: - gcc_assert (!slp_node); - done = vectorizable_call (stmt, gsi, &vec_stmt); + done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node); stmt = gsi_stmt (*gsi); break; --- gcc/testsuite/lib/target-supports.exp.jj 2011-11-08 09:26:58.000000000 +0100 +++ gcc/testsuite/lib/target-supports.exp 2011-11-08 10:15:38.000000000 +0100 @@ -3520,6 +3520,58 @@ proc check_effective_target_vect64 { } { return $et_vect64_saved } +# Return 1 if the target supports vector copysignf calls. + +proc check_effective_target_vect_call_copysignf { } { + global et_vect_call_copysignf_saved + + if [info exists et_vect_call_copysignf_saved] { + verbose "check_effective_target_vect_call_copysignf: using cached result" 2 + } else { + set et_vect_call_copysignf_saved 0 + if { [istarget i?86-*-*] + || [istarget x86_64-*-*] + || [istarget powerpc*-*-*] } { + set et_vect_call_copysignf_saved 1 + } + } + + verbose "check_effective_target_vect_call_copysignf: returning $et_vect_call_copysignf_saved" 2 + return $et_vect_call_copysignf_saved +} + +# Return 1 if the target supports vector sqrtf calls. + +proc check_effective_target_vect_call_sqrtf { } { + global et_vect_call_sqrtf_saved + + if [info exists et_vect_call_sqrtf_saved] { + verbose "check_effective_target_vect_call_sqrtf: using cached result" 2 + } else { + set et_vect_call_sqrtf_saved 0 + if { [istarget i?86-*-*] + || [istarget x86_64-*-*] + || ([istarget powerpc*-*-*] && [check_vsx_hw_available]) } { + set et_vect_call_sqrtf_saved 1 + } + } + + verbose "check_effective_target_vect_call_sqrtf: returning $et_vect_call_sqrtf_saved" 2 + return $et_vect_call_sqrtf_saved +} + +# Return 1 if the target supports vector lrint calls. + +proc check_effective_target_vect_call_lrint { } { + set et_vect_call_lrint 0 + if { ([istarget i?86-*-*] || [istarget x86_64-*-*]) && [check_effective_target_ilp32] } { + set et_vect_call_lrint 1 + } + + verbose "check_effective_target_vect_call_lrint: returning $et_vect_call_lrint" 2 + return $et_vect_call_lrint +} + # Return 1 if the target supports section-anchors proc check_effective_target_section_anchors { } { --- gcc/testsuite/gcc.dg/vect/vect.exp.jj 2011-10-24 12:21:08.000000000 +0200 +++ gcc/testsuite/gcc.dg/vect/vect.exp 2011-11-08 10:09:27.000000000 +0100 @@ -104,9 +104,15 @@ dg-runtest [lsort [glob -nocomplain $src # -ffast-math tests set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS lappend DEFAULT_VECTCFLAGS "-ffast-math" -dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/fast-math-*.\[cS\]]] \ +dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/fast-math-\[ipsv\]*.\[cS\]]] \ "" $DEFAULT_VECTCFLAGS +# -ffast-math SLP tests +set VECT_SLP_CFLAGS $SAVED_VECT_SLP_CFLAGS +lappend VECT_SLP_CFLAGS "-ffast-math" +dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/fast-math-bb-slp-*.\[cS\]]] \ + "" $VECT_SLP_CFLAGS + # -fno-fast-math tests set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS lappend DEFAULT_VECTCFLAGS "-fno-fast-math" --- gcc/testsuite/gcc.dg/vect/fast-math-vect-call-1.c.jj 2011-11-08 09:28:12.000000000 +0100 +++ gcc/testsuite/gcc.dg/vect/fast-math-vect-call-1.c 2011-11-08 09:57:19.000000000 +0100 @@ -0,0 +1,81 @@ +#include "tree-vect.h" + +extern float copysignf (float, float); +extern float sqrtf (float); +extern float fabsf (float); +extern void abort (void); +float a[64], b[64], c[64], d[64]; + +__attribute__((noinline, noclone)) void +f1 (int n) +{ + int i; + for (i = 0; i < n; i++) + { + a[4 * i + 0] = copysignf (b[4 * i + 0], c[4 * i + 0]) + 1.0f + sqrtf (d[4 * i + 0]); + a[4 * i + 1] = copysignf (b[4 * i + 1], c[4 * i + 1]) + 2.0f + sqrtf (d[4 * i + 1]); + a[4 * i + 2] = copysignf (b[4 * i + 2], c[4 * i + 2]) + 3.0f + sqrtf (d[4 * i + 2]); + a[4 * i + 3] = copysignf (b[4 * i + 3], c[4 * i + 3]) + 4.0f + sqrtf (d[4 * i + 3]); + } +} + +__attribute__((noinline, noclone)) void +f2 (int n) +{ + int i; + for (i = 0; i < 2 * n; i++) + { + a[2 * i + 0] = copysignf (b[2 * i + 0], c[2 * i + 0]) + 1.0f + sqrtf (d[2 * i + 0]); + a[2 * i + 1] = copysignf (b[2 * i + 1], c[2 * i + 1]) + 2.0f + sqrtf (d[2 * i + 1]); + } +} + +__attribute__((noinline, noclone)) void +f3 (void) +{ + int i; + for (i = 0; i < 64; i++) + a[i] = copysignf (b[i], c[i]) + 1.0f + sqrtf (d[i]); +} + +__attribute__((noinline, noclone)) int +main1 () +{ + int i; + + for (i = 0; i < 64; i++) + { + asm (""); + b[i] = (i & 1) ? -4 * i : 4 * i; + c[i] = (i & 2) ? -8 * i : 8 * i; + d[i] = i * i; + } + f1 (16); + for (i = 0; i < 64; i++) + if (fabsf (((i & 2) ? -4 * i : 4 * i) + 1 + (i & 3) + i - a[i]) >= 0.0001f) + abort (); + else + a[i] = 131.25; + f2 (16); + for (i = 0; i < 64; i++) + if (fabsf (((i & 2) ? -4 * i : 4 * i) + 1 + (i & 1) + i - a[i]) >= 0.0001f) + abort (); + else + a[i] = 131.25; + f3 (); + for (i = 0; i < 64; i++) + if (fabsf (((i & 2) ? -4 * i : 4 * i) + 1 + i - a[i]) >= 0.0001f) + abort (); + return 0; +} + +int +main () +{ + check_vect (); + return main1 (); +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 3 "vect" { target { vect_call_copysignf && vect_call_sqrtf } } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target { vect_call_copysignf && vect_call_sqrtf } } } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ --- gcc/testsuite/gcc.dg/vect/fast-math-vect-call-2.c.jj 2011-11-08 09:28:12.000000000 +0100 +++ gcc/testsuite/gcc.dg/vect/fast-math-vect-call-2.c 2011-11-08 10:03:37.000000000 +0100 @@ -0,0 +1,128 @@ +#include "tree-vect.h" + +extern long int lrint (double); +extern void abort (void); +long int a[64]; +double b[64]; + +__attribute__((noinline, noclone)) void +f1 (int n) +{ + int i; + for (i = 0; i < n; i++) + { + a[4 * i + 0] = lrint (b[4 * i + 0]) + 1; + a[4 * i + 1] = lrint (b[4 * i + 1]) + 2; + a[4 * i + 2] = lrint (b[4 * i + 2]) + 3; + a[4 * i + 3] = lrint (b[4 * i + 3]) + 4; + } +} + +__attribute__((noinline, noclone)) void +f2 (int n) +{ + int i; + for (i = 0; i < 2 * n; i++) + { + a[2 * i + 0] = lrint (b[2 * i + 0]) + 1; + a[2 * i + 1] = lrint (b[2 * i + 1]) + 2; + } +} + +__attribute__((noinline, noclone)) void +f3 (void) +{ + int i; + for (i = 0; i < 64; i++) + a[i] = lrint (b[i]) + 1; +} + +__attribute__((noinline, noclone)) void +f4 (int n) +{ + int i; + for (i = 0; i < n; i++) + { + a[4 * i + 0] = lrint (b[4 * i + 0]); + a[4 * i + 1] = lrint (b[4 * i + 1]); + a[4 * i + 2] = lrint (b[4 * i + 2]); + a[4 * i + 3] = lrint (b[4 * i + 3]); + } +} + +__attribute__((noinline, noclone)) void +f5 (int n) +{ + int i; + for (i = 0; i < 2 * n; i++) + { + a[2 * i + 0] = lrint (b[2 * i + 0]); + a[2 * i + 1] = lrint (b[2 * i + 1]); + } +} + +__attribute__((noinline, noclone)) void +f6 (void) +{ + int i; + for (i = 0; i < 64; i++) + a[i] = lrint (b[i]); +} + +__attribute__((noinline, noclone)) int +main1 () +{ + int i; + + for (i = 0; i < 64; i++) + { + asm (""); + b[i] = ((i & 1) ? -4 * i : 4 * i) + 0.25; + } + f1 (16); + for (i = 0; i < 64; i++) + if (a[i] != ((i & 1) ? -4 * i : 4 * i) + 1 + (i & 3)) + abort (); + else + a[i] = 131.25; + f2 (16); + for (i = 0; i < 64; i++) + if (a[i] != ((i & 1) ? -4 * i : 4 * i) + 1 + (i & 1)) + abort (); + else + a[i] = 131.25; + f3 (); + for (i = 0; i < 64; i++) + if (a[i] != ((i & 1) ? -4 * i : 4 * i) + 1) + abort (); + else + a[i] = 131.25; + f4 (16); + for (i = 0; i < 64; i++) + if (a[i] != ((i & 1) ? -4 * i : 4 * i)) + abort (); + else + a[i] = 131.25; + f5 (16); + for (i = 0; i < 64; i++) + if (a[i] != ((i & 1) ? -4 * i : 4 * i)) + abort (); + else + a[i] = 131.25; + f6 (); + for (i = 0; i < 64; i++) + if (a[i] != ((i & 1) ? -4 * i : 4 * i)) + abort (); + return 0; +} + +int +main () +{ + check_vect (); + return main1 (); +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 6 "vect" { target vect_call_lrint } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "vect" { target vect_call_lrint } } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ --- gcc/testsuite/gcc.dg/vect/fast-math-bb-slp-call-1.c.jj 2011-11-08 09:46:00.000000000 +0100 +++ gcc/testsuite/gcc.dg/vect/fast-math-bb-slp-call-1.c 2011-11-08 09:49:49.000000000 +0100 @@ -0,0 +1,49 @@ +#include "tree-vect.h" + +extern float copysignf (float, float); +extern float sqrtf (float); +extern float fabsf (float); +extern void abort (void); +float a[64], b[64], c[64], d[64]; + +__attribute__((noinline, noclone)) void +f1 (void) +{ + a[0] = copysignf (b[0], c[0]) + 1.0f + sqrtf (d[0]); + a[1] = copysignf (b[1], c[1]) + 2.0f + sqrtf (d[1]); + a[2] = copysignf (b[2], c[2]) + 3.0f + sqrtf (d[2]); + a[3] = copysignf (b[3], c[3]) + 4.0f + sqrtf (d[3]); + a[4] = copysignf (b[4], c[4]) + 5.0f + sqrtf (d[4]); + a[5] = copysignf (b[5], c[5]) + 6.0f + sqrtf (d[5]); + a[6] = copysignf (b[6], c[6]) + 7.0f + sqrtf (d[6]); + a[7] = copysignf (b[7], c[7]) + 8.0f + sqrtf (d[7]); +} + +__attribute__((noinline, noclone)) int +main1 () +{ + int i; + + for (i = 0; i < 8; i++) + { + asm (""); + b[i] = (i & 1) ? -4 * i : 4 * i; + c[i] = (i & 2) ? -8 * i : 8 * i; + d[i] = i * i; + } + f1 (); + for (i = 0; i < 8; i++) + if (fabsf (((i & 2) ? -4 * i : 4 * i) + 1 + i + i - a[i]) >= 0.0001f) + abort (); + return 0; +} + +int +main () +{ + check_vect (); + return main1 (); +} + +/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target { vect_call_copysignf && vect_call_sqrtf } } } } */ +/* { dg-final { cleanup-tree-dump "slp" } } */ --- gcc/testsuite/gcc.dg/vect/fast-math-bb-slp-call-2.c.jj 2011-11-08 09:46:04.000000000 +0100 +++ gcc/testsuite/gcc.dg/vect/fast-math-bb-slp-call-2.c 2011-11-08 10:11:20.000000000 +0100 @@ -0,0 +1,65 @@ +#include "tree-vect.h" + +extern long int lrint (double); +extern void abort (void); +long int a[64]; +double b[64]; + +__attribute__((noinline, noclone)) void +f1 (void) +{ + a[0] = lrint (b[0]) + 1; + a[1] = lrint (b[1]) + 2; + a[2] = lrint (b[2]) + 3; + a[3] = lrint (b[3]) + 4; + a[4] = lrint (b[4]) + 5; + a[5] = lrint (b[5]) + 6; + a[6] = lrint (b[6]) + 7; + a[7] = lrint (b[7]) + 8; +} + +__attribute__((noinline, noclone)) void +f2 (void) +{ + a[0] = lrint (b[0]); + a[1] = lrint (b[1]); + a[2] = lrint (b[2]); + a[3] = lrint (b[3]); + a[4] = lrint (b[4]); + a[5] = lrint (b[5]); + a[6] = lrint (b[6]); + a[7] = lrint (b[7]); +} + +__attribute__((noinline, noclone)) int +main1 () +{ + int i; + + for (i = 0; i < 8; i++) + { + asm (""); + b[i] = ((i & 1) ? -4 * i : 4 * i) + 0.25; + } + f1 (); + for (i = 0; i < 8; i++) + if (a[i] != ((i & 1) ? -4 * i : 4 * i) + 1 + i) + abort (); + else + a[i] = 131.25; + f2 (); + for (i = 0; i < 8; i++) + if (a[i] != ((i & 1) ? -4 * i : 4 * i)) + abort (); + return 0; +} + +int +main () +{ + check_vect (); + return main1 (); +} + +/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 2 "slp" { target vect_call_lrint } } } */ +/* { dg-final { cleanup-tree-dump "slp" } } */ Jakub