On Fri, 6 Nov 2015, Richard Biener wrote: > > The following patch makes the BB vectorizer not only handle BB heads > (until the first stmt with a data reference it cannot handle) but > arbitrary regions in a BB separated by such stmts. > > This improves the number of BB vectorizations from 469 to 556 > in a quick test on SPEC CPU 2006 with -Ofast on x86_64 and > 1x400.perlbench 1x410.bwaves 1x416.gamess 1x450.soplex 1x453.povray > 1x481.wrf failing both patched and unpatched (have to update my > config used for such experiments it seems ...) > > Bootstrapped and tested on x86_64-unknown-linux-gnu, aarch64 cross built. > > I'm currently re-testing for a cosmetic change I made when writing > the changelog. > > I expected (and there are) some issues with compile-time. Left > is unpatched and right is patched. > > '403.gcc': 00:00:54 (54) | '403.gcc': 00:00:55 (55) > '483.xalancbmk': 00:02:20 (140) | '483.xalancbmk': 00:02:24 (144) > '416.gamess': 00:02:36 (156) | '416.gamess': 00:02:37 (157) > '435.gromacs': 00:00:18 (18) | '435.gromacs': 00:00:19 (19) > '447.dealII': 00:01:31 (91) | '447.dealII': 00:01:33 (93) > '453.povray': 00:04:54 (294) | '453.povray': 00:08:54 (534) > '454.calculix': 00:00:34 (34) | '454.calculix': 00:00:52 (52) > '481.wrf': 00:01:57 (117) | '481.wrf': 00:01:59 (119) > > other benchmarks are unchanged. I'm double-checking now that a followup > patch I have which re-implements BB vectorization dependence checking > fixes this (that's the only quadraticness I know of).
Fixes all but '453.povray': 00:04:54 (294) | '453.povray': 00:06:46 (406) it even improves compile-time on some: '464.h264ref': 00:00:26 (26) | '464.h264ref': 00:00:21 (21) it also increases the number of vectorized BBs to 722. Needs some work still though. Richard. > Richard. > > 2015-11-06 Richard Biener <rguent...@suse.de> > > * tree-vectorizer.h (struct _bb_vec_info): Add region_begin/end > members. > (vect_stmt_in_region_p): Declare. > * tree-vect-slp.c (new_bb_vec_info): Work on a region. > (destroy_bb_vec_info): Likewise. > (vect_bb_slp_scalar_cost): Use vect_stmt_in_region_p. > (vect_get_and_check_slp_defs): Likewise. > (vect_slp_analyze_bb_1): Refactor to make it work on sub-BBs. > (vect_slp_bb): Likewise. > * tree-vect-patterns.c (vect_same_loop_or_bb_p): Implement > in terms of vect_stmt_in_region_p. > (vect_pattern_recog): Iterate over the BB region. > * tree-vect-stmts.c (vect_is_simple_use): Use vect_stmt_in_region_p. > * tree-vectorizer.c (vect_stmt_in_region_p): New function. > (pass_slp_vectorize::execute): Initialize all stmt UIDs to -1. > > * config/i386/i386.c: Include gimple-iterator.h. > * config/aarch64/aarch64.c: Likewise. > > * gcc.dg/vect/bb-slp-38.c: New testcase. > > Index: gcc/tree-vectorizer.h > =================================================================== > *** gcc/tree-vectorizer.h.orig 2015-11-05 09:52:00.640227178 +0100 > --- gcc/tree-vectorizer.h 2015-11-05 13:20:58.385786476 +0100 > *************** nested_in_vect_loop_p (struct loop *loop > *** 390,395 **** > --- 390,397 ---- > typedef struct _bb_vec_info : public vec_info > { > basic_block bb; > + gimple_stmt_iterator region_begin; > + gimple_stmt_iterator region_end; > } *bb_vec_info; > > #define BB_VINFO_BB(B) (B)->bb > *************** void vect_pattern_recog (vec_info *); > *** 1085,1089 **** > --- 1087,1092 ---- > /* In tree-vectorizer.c. */ > unsigned vectorize_loops (void); > void vect_destroy_datarefs (vec_info *); > + bool vect_stmt_in_region_p (vec_info *, gimple *); > > #endif /* GCC_TREE_VECTORIZER_H */ > Index: gcc/tree-vect-slp.c > =================================================================== > *** gcc/tree-vect-slp.c.orig 2015-11-05 09:52:00.640227178 +0100 > --- gcc/tree-vect-slp.c 2015-11-06 10:22:56.707880233 +0100 > *************** vect_get_and_check_slp_defs (vec_info *v > *** 209,215 **** > unsigned int i, number_of_oprnds; > gimple *def_stmt; > enum vect_def_type dt = vect_uninitialized_def; > - struct loop *loop = NULL; > bool pattern = false; > slp_oprnd_info oprnd_info; > int first_op_idx = 1; > --- 209,214 ---- > *************** vect_get_and_check_slp_defs (vec_info *v > *** 218,226 **** > bool first = stmt_num == 0; > bool second = stmt_num == 1; > > - if (is_a <loop_vec_info> (vinfo)) > - loop = LOOP_VINFO_LOOP (as_a <loop_vec_info> (vinfo)); > - > if (is_gimple_call (stmt)) > { > number_of_oprnds = gimple_call_num_args (stmt); > --- 217,222 ---- > *************** again: > *** 276,286 **** > from the pattern. Check that all the stmts of the node are in the > pattern. */ > if (def_stmt && gimple_bb (def_stmt) > ! && ((is_a <loop_vec_info> (vinfo) > ! && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))) > ! || (is_a <bb_vec_info> (vinfo) > ! && gimple_bb (def_stmt) == as_a <bb_vec_info> (vinfo)->bb > ! && gimple_code (def_stmt) != GIMPLE_PHI)) > && vinfo_for_stmt (def_stmt) > && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt)) > && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt)) > --- 272,278 ---- > from the pattern. Check that all the stmts of the node are in the > pattern. */ > if (def_stmt && gimple_bb (def_stmt) > ! && vect_stmt_in_region_p (vinfo, def_stmt) > && vinfo_for_stmt (def_stmt) > && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt)) > && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt)) > *************** vect_detect_hybrid_slp (loop_vec_info lo > *** 2076,2091 **** > stmt_vec_info structs for all the stmts in it. */ > > static bb_vec_info > ! new_bb_vec_info (basic_block bb) > { > bb_vec_info res = NULL; > gimple_stmt_iterator gsi; > > res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info)); > res->kind = vec_info::bb; > BB_VINFO_BB (res) = bb; > > ! for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) > { > gimple *stmt = gsi_stmt (gsi); > gimple_set_uid (stmt, 0); > --- 2068,2088 ---- > stmt_vec_info structs for all the stmts in it. */ > > static bb_vec_info > ! new_bb_vec_info (gimple_stmt_iterator region_begin, > ! gimple_stmt_iterator region_end) > { > + basic_block bb = gsi_bb (region_begin); > bb_vec_info res = NULL; > gimple_stmt_iterator gsi; > > res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info)); > res->kind = vec_info::bb; > BB_VINFO_BB (res) = bb; > + res->region_begin = region_begin; > + res->region_end = region_end; > > ! for (gsi = region_begin; gsi_stmt (gsi) != gsi_stmt (region_end); > ! gsi_next (&gsi)) > { > gimple *stmt = gsi_stmt (gsi); > gimple_set_uid (stmt, 0); > *************** destroy_bb_vec_info (bb_vec_info bb_vinf > *** 2118,2124 **** > > bb = BB_VINFO_BB (bb_vinfo); > > ! for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) > { > gimple *stmt = gsi_stmt (si); > stmt_vec_info stmt_info = vinfo_for_stmt (stmt); > --- 2115,2122 ---- > > bb = BB_VINFO_BB (bb_vinfo); > > ! for (si = bb_vinfo->region_begin; > ! gsi_stmt (si) != gsi_stmt (bb_vinfo->region_end); gsi_next (&si)) > { > gimple *stmt = gsi_stmt (si); > stmt_vec_info stmt_info = vinfo_for_stmt (stmt); > *************** destroy_bb_vec_info (bb_vec_info bb_vinf > *** 2126,2131 **** > --- 2124,2132 ---- > if (stmt_info) > /* Free stmt_vec_info. */ > free_stmt_vec_info (stmt); > + > + /* Reset region marker. */ > + gimple_set_uid (stmt, -1); > } > > vect_destroy_datarefs (bb_vinfo); > *************** vect_bb_slp_scalar_cost (basic_block bb, > *** 2247,2254 **** > gimple *use_stmt; > FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p)) > if (!is_gimple_debug (use_stmt) > ! && (gimple_code (use_stmt) == GIMPLE_PHI > ! || gimple_bb (use_stmt) != bb > || !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (use_stmt)))) > { > (*life)[i] = true; > --- 2248,2255 ---- > gimple *use_stmt; > FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p)) > if (!is_gimple_debug (use_stmt) > ! && (! vect_stmt_in_region_p (vinfo_for_stmt (stmt)->vinfo, > ! use_stmt) > || !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (use_stmt)))) > { > (*life)[i] = true; > *************** vect_bb_vectorization_profitable_p (bb_v > *** 2327,2366 **** > /* Check if the basic block can be vectorized. */ > > static bb_vec_info > ! vect_slp_analyze_bb_1 (basic_block bb) > { > bb_vec_info bb_vinfo; > vec<slp_instance> slp_instances; > slp_instance instance; > int i; > int min_vf = 2; > - unsigned n_stmts = 0; > > ! bb_vinfo = new_bb_vec_info (bb); > if (!bb_vinfo) > return NULL; > > ! /* Gather all data references in the basic-block. */ > ! > ! for (gimple_stmt_iterator gsi = gsi_start_bb (bb); > ! !gsi_end_p (gsi); gsi_next (&gsi)) > ! { > ! gimple *stmt = gsi_stmt (gsi); > ! if (is_gimple_debug (stmt)) > ! continue; > ! ++n_stmts; > ! if (!find_data_references_in_stmt (NULL, stmt, > ! &BB_VINFO_DATAREFS (bb_vinfo))) > ! { > ! /* Mark the rest of the basic-block as unvectorizable. */ > ! for (; !gsi_end_p (gsi); gsi_next (&gsi)) > ! { > ! stmt = gsi_stmt (gsi); > ! STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)) = false; > ! } > ! break; > ! } > ! } > > /* Analyze the data references. */ > > --- 2328,2358 ---- > /* Check if the basic block can be vectorized. */ > > static bb_vec_info > ! vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin, > ! gimple_stmt_iterator region_end, > ! vec<data_reference_p> datarefs, int n_stmts) > { > bb_vec_info bb_vinfo; > vec<slp_instance> slp_instances; > slp_instance instance; > int i; > int min_vf = 2; > > ! if (n_stmts > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB)) > ! { > ! if (dump_enabled_p ()) > ! dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, > ! "not vectorized: too many instructions in " > ! "basic block.\n"); > ! free_data_refs (datarefs); > ! return NULL; > ! } > ! > ! bb_vinfo = new_bb_vec_info (region_begin, region_end); > if (!bb_vinfo) > return NULL; > > ! BB_VINFO_DATAREFS (bb_vinfo) = datarefs; > > /* Analyze the data references. */ > > *************** vect_slp_analyze_bb_1 (basic_block bb) > *** 2438,2445 **** > } > > /* Mark all the statements that we do not want to vectorize. */ > ! for (gimple_stmt_iterator gsi = gsi_start_bb (BB_VINFO_BB (bb_vinfo)); > ! !gsi_end_p (gsi); gsi_next (&gsi)) > { > stmt_vec_info vinfo = vinfo_for_stmt (gsi_stmt (gsi)); > if (STMT_SLP_TYPE (vinfo) != pure_slp) > --- 2430,2437 ---- > } > > /* Mark all the statements that we do not want to vectorize. */ > ! for (gimple_stmt_iterator gsi = bb_vinfo->region_begin; > ! gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi)) > { > stmt_vec_info vinfo = vinfo_for_stmt (gsi_stmt (gsi)); > if (STMT_SLP_TYPE (vinfo) != pure_slp) > *************** bool > *** 2509,2585 **** > vect_slp_bb (basic_block bb) > { > bb_vec_info bb_vinfo; > - int insns = 0; > gimple_stmt_iterator gsi; > unsigned int vector_sizes; > > if (dump_enabled_p ()) > dump_printf_loc (MSG_NOTE, vect_location, > "===vect_slp_analyze_bb===\n"); > > - for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) > - { > - gimple *stmt = gsi_stmt (gsi); > - if (!is_gimple_debug (stmt) > - && !gimple_nop_p (stmt) > - && gimple_code (stmt) != GIMPLE_LABEL) > - insns++; > - if (gimple_location (stmt) != UNKNOWN_LOCATION) > - vect_location = gimple_location (stmt); > - } > - > - if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB)) > - { > - if (dump_enabled_p ()) > - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, > - "not vectorized: too many instructions in " > - "basic block.\n"); > - > - return false; > - } > - > /* Autodetect first vector size we try. */ > current_vector_size = 0; > vector_sizes = targetm.vectorize.autovectorize_vector_sizes (); > > while (1) > { > ! bb_vinfo = vect_slp_analyze_bb_1 (bb); > ! if (bb_vinfo) > { > ! if (!dbg_cnt (vect_slp)) > ! { > ! destroy_bb_vec_info (bb_vinfo); > ! return false; > ! } > > if (dump_enabled_p ()) > ! dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB\n"); > > vect_schedule_slp (bb_vinfo); > > if (dump_enabled_p ()) > dump_printf_loc (MSG_NOTE, vect_location, > ! "BASIC BLOCK VECTORIZED\n"); > > destroy_bb_vec_info (bb_vinfo); > > ! return true; > } > > ! destroy_bb_vec_info (bb_vinfo); > > vector_sizes &= ~current_vector_size; > ! if (vector_sizes == 0 > ! || current_vector_size == 0) > ! return false; > > ! /* Try the next biggest vector size. */ > ! current_vector_size = 1 << floor_log2 (vector_sizes); > ! if (dump_enabled_p ()) > ! dump_printf_loc (MSG_NOTE, vect_location, > ! "***** Re-trying analysis with " > ! "vector size %d\n", current_vector_size); > } > } > > > --- 2501,2605 ---- > vect_slp_bb (basic_block bb) > { > bb_vec_info bb_vinfo; > gimple_stmt_iterator gsi; > unsigned int vector_sizes; > + bool any_vectorized = false; > > if (dump_enabled_p ()) > dump_printf_loc (MSG_NOTE, vect_location, > "===vect_slp_analyze_bb===\n"); > > /* Autodetect first vector size we try. */ > current_vector_size = 0; > vector_sizes = targetm.vectorize.autovectorize_vector_sizes (); > > + gsi = gsi_start_bb (bb); > + > while (1) > { > ! if (gsi_end_p (gsi)) > ! break; > ! > ! gimple_stmt_iterator region_begin = gsi; > ! vec<data_reference_p> datarefs = vNULL; > ! int insns = 0; > ! > ! for (; !gsi_end_p (gsi); gsi_next (&gsi)) > { > ! gimple *stmt = gsi_stmt (gsi); > ! if (is_gimple_debug (stmt)) > ! continue; > ! insns++; > ! > ! if (gimple_location (stmt) != UNKNOWN_LOCATION) > ! vect_location = gimple_location (stmt); > ! > ! if (!find_data_references_in_stmt (NULL, stmt, &datarefs)) > ! break; > ! } > ! > ! /* Skip leading unhandled stmts. */ > ! if (gsi_stmt (region_begin) == gsi_stmt (gsi)) > ! { > ! gsi_next (&gsi); > ! continue; > ! } > ! > ! gimple_stmt_iterator region_end = gsi; > > + bool vectorized = false; > + bb_vinfo = vect_slp_analyze_bb_1 (region_begin, region_end, > + datarefs, insns); > + if (bb_vinfo > + && dbg_cnt (vect_slp)) > + { > if (dump_enabled_p ()) > ! dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB part\n"); > > vect_schedule_slp (bb_vinfo); > > if (dump_enabled_p ()) > dump_printf_loc (MSG_NOTE, vect_location, > ! "basic block part vectorized\n"); > > destroy_bb_vec_info (bb_vinfo); > > ! vectorized = true; > } > + else > + destroy_bb_vec_info (bb_vinfo); > > ! any_vectorized |= vectorized; > > vector_sizes &= ~current_vector_size; > ! if (vectorized > ! || vector_sizes == 0 > ! || current_vector_size == 0) > ! { > ! if (gsi_end_p (region_end)) > ! break; > ! > ! /* Skip the unhandled stmt. */ > ! gsi_next (&gsi); > ! > ! /* And reset vector sizes. */ > ! current_vector_size = 0; > ! vector_sizes = targetm.vectorize.autovectorize_vector_sizes (); > ! } > ! else > ! { > ! /* Try the next biggest vector size. */ > ! current_vector_size = 1 << floor_log2 (vector_sizes); > ! if (dump_enabled_p ()) > ! dump_printf_loc (MSG_NOTE, vect_location, > ! "***** Re-trying analysis with " > ! "vector size %d\n", current_vector_size); > > ! /* Start over. */ > ! gsi = region_begin; > ! } > } > + > + return any_vectorized; > } > > > Index: gcc/tree-vect-patterns.c > =================================================================== > *** gcc/tree-vect-patterns.c.orig 2015-11-05 09:52:00.640227178 +0100 > --- gcc/tree-vect-patterns.c 2015-11-05 13:25:46.060011765 +0100 > *************** static bool > *** 107,133 **** > vect_same_loop_or_bb_p (gimple *stmt1, gimple *stmt2) > { > stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt1); > ! loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); > ! bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo); > ! > ! if (!gimple_bb (stmt2)) > ! return false; > ! > ! if (loop_vinfo) > ! { > ! struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); > ! if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt2))) > ! return false; > ! } > ! else > ! { > ! if (gimple_bb (stmt2) != BB_VINFO_BB (bb_vinfo) > ! || gimple_code (stmt2) == GIMPLE_PHI) > ! return false; > ! } > ! > ! gcc_assert (vinfo_for_stmt (stmt2)); > ! return true; > } > > /* If the LHS of DEF_STMT has a single use, and that statement is > --- 107,113 ---- > vect_same_loop_or_bb_p (gimple *stmt1, gimple *stmt2) > { > stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt1); > ! return vect_stmt_in_region_p (stmt_vinfo->vinfo, stmt2); > } > > /* If the LHS of DEF_STMT has a single use, and that statement is > *************** vect_pattern_recog (vec_info *vinfo) > *** 3611,3643 **** > loop = LOOP_VINFO_LOOP (loop_vinfo); > bbs = LOOP_VINFO_BBS (loop_vinfo); > nbbs = loop->num_nodes; > } > else > { > ! bbs = &as_a <bb_vec_info> (vinfo)->bb; > ! nbbs = 1; > ! } > ! > ! /* Scan through the loop stmts, applying the pattern recognition > ! functions starting at each stmt visited: */ > ! for (i = 0; i < nbbs; i++) > ! { > ! basic_block bb = bbs[i]; > ! for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) > ! { > ! if (is_a <bb_vec_info> (vinfo) > ! && (stmt = gsi_stmt (si)) > && vinfo_for_stmt (stmt) > && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt))) > ! continue; > > ! /* Scan over all generic vect_recog_xxx_pattern functions. */ > ! for (j = 0; j < NUM_PATTERNS; j++) > ! { > vect_recog_func = vect_vect_recog_func_ptrs[j]; > vect_pattern_recog_1 (vect_recog_func, si, > &stmts_to_replace); > ! } > ! } > } > } > --- 3591,3632 ---- > loop = LOOP_VINFO_LOOP (loop_vinfo); > bbs = LOOP_VINFO_BBS (loop_vinfo); > nbbs = loop->num_nodes; > + > + /* Scan through the loop stmts, applying the pattern recognition > + functions starting at each stmt visited: */ > + for (i = 0; i < nbbs; i++) > + { > + basic_block bb = bbs[i]; > + for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) > + { > + /* Scan over all generic vect_recog_xxx_pattern functions. */ > + for (j = 0; j < NUM_PATTERNS; j++) > + { > + vect_recog_func = vect_vect_recog_func_ptrs[j]; > + vect_pattern_recog_1 (vect_recog_func, si, > + &stmts_to_replace); > + } > + } > + } > } > else > { > ! bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo); > ! for (si = bb_vinfo->region_begin; > ! gsi_stmt (si) != gsi_stmt (bb_vinfo->region_end); gsi_next (&si)) > ! { > ! if ((stmt = gsi_stmt (si)) > && vinfo_for_stmt (stmt) > && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt))) > ! continue; > > ! /* Scan over all generic vect_recog_xxx_pattern functions. */ > ! for (j = 0; j < NUM_PATTERNS; j++) > ! { > vect_recog_func = vect_vect_recog_func_ptrs[j]; > vect_pattern_recog_1 (vect_recog_func, si, > &stmts_to_replace); > ! } > ! } > } > } > Index: gcc/config/i386/i386.c > =================================================================== > *** gcc/config/i386/i386.c.orig 2015-11-05 09:52:42.239687133 +0100 > --- gcc/config/i386/i386.c 2015-11-05 11:09:09.451774562 +0100 > *************** along with GCC; see the file COPYING3. > *** 64,69 **** > --- 64,70 ---- > #include "context.h" > #include "pass_manager.h" > #include "target-globals.h" > + #include "gimple-iterator.h" > #include "tree-vectorizer.h" > #include "shrink-wrap.h" > #include "builtins.h" > Index: gcc/testsuite/gcc.dg/vect/bb-slp-38.c > =================================================================== > *** /dev/null 1970-01-01 00:00:00.000000000 +0000 > --- gcc/testsuite/gcc.dg/vect/bb-slp-38.c 2015-11-05 14:00:48.177644327 > +0100 > *************** > *** 0 **** > --- 1,44 ---- > + /* { dg-require-effective-target vect_int } */ > + > + #include "tree-vect.h" > + > + extern void abort (void); > + > + int a[8], b[8]; > + int x; > + > + void __attribute__((noinline,noclone)) > + bar (void) > + { > + x = 1; > + } > + > + void __attribute__((noinline,noclone)) > + foo(void) > + { > + a[0] = b[0]; > + a[1] = b[0]; > + a[2] = b[3]; > + a[3] = b[3]; > + bar (); > + a[4] = b[4]; > + a[5] = b[7]; > + a[6] = b[4]; > + a[7] = b[7]; > + } > + > + int main() > + { > + int i; > + check_vect (); > + for (i = 0; i < 8; ++i) > + b[i] = i; > + foo (); > + if (a[0] != 0 || a[1] != 0 || a[2] != 3 || a[3] != 3 > + || a[4] != 4 || a[5] != 7 || a[6] != 4 || a[7] != 7) > + abort (); > + return 0; > + } > + > + /* { dg-final { scan-tree-dump "basic block vectorized" "slp2" { target > vect_perm } } } */ > + /* { dg-final { scan-tree-dump-times "basic block part vectorized" 2 "slp2" > { target vect_perm } } } */ > Index: gcc/tree-vect-stmts.c > =================================================================== > *** gcc/tree-vect-stmts.c.orig 2015-11-02 12:37:11.074249388 +0100 > --- gcc/tree-vect-stmts.c 2015-11-05 13:29:21.413423692 +0100 > *************** vect_is_simple_use (tree operand, vec_in > *** 8196,8207 **** > dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0); > } > > ! basic_block bb = gimple_bb (*def_stmt); > ! if ((is_a <loop_vec_info> (vinfo) > ! && !flow_bb_inside_loop_p (as_a <loop_vec_info> (vinfo)->loop, bb)) > ! || (is_a <bb_vec_info> (vinfo) > ! && (bb != as_a <bb_vec_info> (vinfo)->bb > ! || gimple_code (*def_stmt) == GIMPLE_PHI))) > *dt = vect_external_def; > else > { > --- 8196,8202 ---- > dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0); > } > > ! if (! vect_stmt_in_region_p (vinfo, *def_stmt)) > *dt = vect_external_def; > else > { > Index: gcc/tree-vectorizer.c > =================================================================== > *** gcc/tree-vectorizer.c.orig 2015-11-04 09:23:53.724687806 +0100 > --- gcc/tree-vectorizer.c 2015-11-05 13:55:08.299817570 +0100 > *************** vect_destroy_datarefs (vec_info *vinfo) > *** 350,355 **** > --- 350,382 ---- > } > > > + /* Return whether STMT is inside the region we try to vectorize. */ > + > + bool > + vect_stmt_in_region_p (vec_info *vinfo, gimple *stmt) > + { > + if (!gimple_bb (stmt)) > + return false; > + > + if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo)) > + { > + struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); > + if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt))) > + return false; > + } > + else > + { > + bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo); > + if (gimple_bb (stmt) != BB_VINFO_BB (bb_vinfo) > + || gimple_uid (stmt) == -1U > + || gimple_code (stmt) == GIMPLE_PHI) > + return false; > + } > + > + return true; > + } > + > + > /* If LOOP has been versioned during ifcvt, return the internal call > guarding it. */ > > *************** pass_slp_vectorize::execute (function *f > *** 692,697 **** > --- 719,732 ---- > scev_initialize (); > } > > + /* Mark all stmts as not belonging to the current region. */ > + FOR_EACH_BB_FN (bb, fun) > + { > + for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); > + gsi_next (&gsi)) > + gimple_set_uid (gsi_stmt (gsi), -1); > + } > + > init_stmt_vec_info_vec (); > > FOR_EACH_BB_FN (bb, fun) > Index: gcc/config/aarch64/aarch64.c > =================================================================== > *** gcc/config/aarch64/aarch64.c.orig 2015-10-28 11:22:25.290823112 +0100 > --- gcc/config/aarch64/aarch64.c 2015-11-06 10:24:21.539818027 +0100 > *************** > *** 52,57 **** > --- 52,58 ---- > #include "params.h" > #include "gimplify.h" > #include "dwarf2.h" > + #include "gimple-iterator.h" > #include "tree-vectorizer.h" > #include "aarch64-cost-tables.h" > #include "dumpfile.h" > -- Richard Biener <rguent...@suse.de> SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)