On Sun, Sep 25, 2011 at 12:59 PM, Ira Rosen <ira.ro...@linaro.org> wrote: > Hi, > > This patch supports an automatic choice of vector size in basic block > vectorization similar to the loop vectorization case. > > I am not sure about the new keyword.
The testsuite one? I guess we should name them vect128, vect256, etc., as testcases will be looking for an absolute size, not a relative ("half") one. Richard. > Bootstrapped on powerpc64-suse-linux, tested on powerpc64-suse-linux > and arm-linux-gnueabi. > > Thanks, > Ira > > ChangeLog: > > * tree-vect-slp.c (vect_slp_analyze_bb_1): Split out core part > of vect_analyze_bb here. > (vect_analyze_bb): Loop over vector sizes calling vect_analyze_bb_1. > > testsuite/ChangeLog: > > * lib/target-supports.exp (check_effective_target_vect_half_size): New. > * gcc.dg/vect/bb-slp-11.c: Expect the error message twice in case > of multiple vector sizes. > * gcc.dg/vect/bb-slp-26.c: New. > > Index: testsuite/lib/target-supports.exp > =================================================================== > --- testsuite/lib/target-supports.exp (revision 179159) > +++ testsuite/lib/target-supports.exp (working copy) > @@ -3393,6 +3393,24 @@ proc check_effective_target_vect_multiple_sizes { > return $et_vect_multiple_sizes_saved > } > > +# Return 1 if the target supports vectors of 8 chars, 4 shorts and 2 ints. > + > +proc check_effective_target_vect_half_size { } { > + global et_vect_half_size > + > + if [info exists et_vect_half_size_saved] { > + verbose "check_effective_target_vect_half_size: using cached result" > 2 > + } else { > + set et_vect_half_size_saved 0 > + if { ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok]) } > { > + set et_vect_half_size_saved 1 > + } > + } > + > + verbose "check_effective_target_vect_half_size: returning > $et_vect_half_size_saved" 2 > + return $et_vect_half_size_saved > +} > + > # Return 1 if the target supports section-anchors > > proc check_effective_target_section_anchors { } { > Index: testsuite/gcc.dg/vect/bb-slp-26.c > =================================================================== > --- testsuite/gcc.dg/vect/bb-slp-26.c (revision 0) > +++ testsuite/gcc.dg/vect/bb-slp-26.c (revision 0) > @@ -0,0 +1,59 @@ > +/* { dg-require-effective-target vect_int } */ > + > +#include <stdarg.h> > +#include "tree-vect.h" > + > +#define A 3 > +#define B 4 > +#define N 256 > + > +char src[N], dst[N]; > + > +void foo (char * __restrict__ dst, char * __restrict__ src, int h, > int stride, int dummy) > +{ > + int i; > + h /= 16; > + for (i = 0; i < h; i++) > + { > + dst[0] += A*src[0] + src[stride]; > + dst[1] += A*src[1] + src[1+stride]; > + dst[2] += A*src[2] + src[2+stride]; > + dst[3] += A*src[3] + src[3+stride]; > + dst[4] += A*src[4] + src[4+stride]; > + dst[5] += A*src[5] + src[5+stride]; > + dst[6] += A*src[6] + src[6+stride]; > + dst[7] += A*src[7] + src[7+stride]; > + dst += 8; > + src += 8; > + if (dummy == 32) > + abort (); > + } > +} > + > + > +int main (void) > +{ > + int i; > + > + check_vect (); > + > + for (i = 0; i < N; i++) > + { > + dst[i] = 0; > + src[i] = i/8; > + } > + > + foo (dst, src, N, 8, 0); > + > + for (i = 0; i < N/2; i++) > + { > + if (dst[i] != A * src[i] + src[i+8]) > + abort (); > + } > + > + return 0; > +} > + > +/* { dg-final { scan-tree-dump-times "basic block vectorized using > SLP" 1 "slp" { target vect_half_size } } } */ > +/* { dg-final { cleanup-tree-dump "slp" } } */ > + > Index: testsuite/gcc.dg/vect/bb-slp-11.c > =================================================================== > --- testsuite/gcc.dg/vect/bb-slp-11.c (revision 179159) > +++ testsuite/gcc.dg/vect/bb-slp-11.c (working copy) > @@ -49,6 +49,7 @@ int main (void) > } > > /* { dg-final { scan-tree-dump-times "basic block vectorized using > SLP" 0 "slp" } } */ > -/* { dg-final { scan-tree-dump-times "SLP with multiple types" 1 "slp" } } */ > +/* { dg-final { scan-tree-dump-times "SLP with multiple types" 1 > "slp" { xfail vect_multiple_sizes } } } */ > +/* { dg-final { scan-tree-dump-times "SLP with multiple types" 2 > "slp" { target vect_multiple_sizes } } } */ > /* { dg-final { cleanup-tree-dump "slp" } } */ > > Index: tree-vect-slp.c > =================================================================== > --- tree-vect-slp.c (revision 179159) > +++ tree-vect-slp.c (working copy) > @@ -1694,42 +1694,18 @@ vect_bb_vectorization_profitable_p (bb_vec_info bb > > /* Check if the basic block can be vectorized. */ > > -bb_vec_info > -vect_slp_analyze_bb (basic_block bb) > +static bb_vec_info > +vect_slp_analyze_bb_1 (basic_block bb) > { > bb_vec_info bb_vinfo; > VEC (ddr_p, heap) *ddrs; > VEC (slp_instance, heap) *slp_instances; > slp_instance instance; > - int i, insns = 0; > - gimple_stmt_iterator gsi; > + int i; > int min_vf = 2; > int max_vf = MAX_VECTORIZATION_FACTOR; > bool data_dependence_in_bb = false; > > - current_vector_size = 0; > - > - if (vect_print_dump_info (REPORT_DETAILS)) > - fprintf (vect_dump, "===vect_slp_analyze_bb===\n"); > - > - for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) > - { > - gimple stmt = gsi_stmt (gsi); > - if (!is_gimple_debug (stmt) > - && !gimple_nop_p (stmt) > - && gimple_code (stmt) != GIMPLE_LABEL) > - insns++; > - } > - > - if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB)) > - { > - if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) > - fprintf (vect_dump, "not vectorized: too many instructions in basic " > - "block.\n"); > - > - return NULL; > - } > - > bb_vinfo = new_bb_vec_info (bb); > if (!bb_vinfo) > return NULL; > @@ -1849,6 +1825,61 @@ vect_bb_vectorization_profitable_p (bb_vec_info bb > } > > > +bb_vec_info > +vect_slp_analyze_bb (basic_block bb) > +{ > + bb_vec_info bb_vinfo; > + int insns = 0; > + gimple_stmt_iterator gsi; > + unsigned int vector_sizes; > + > + if (vect_print_dump_info (REPORT_DETAILS)) > + fprintf (vect_dump, "===vect_slp_analyze_bb===\n"); > + > + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) > + { > + gimple stmt = gsi_stmt (gsi); > + if (!is_gimple_debug (stmt) > + && !gimple_nop_p (stmt) > + && gimple_code (stmt) != GIMPLE_LABEL) > + insns++; > + } > + > + if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB)) > + { > + if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) > + fprintf (vect_dump, "not vectorized: too many instructions in basic " > + "block.\n"); > + > + return NULL; > + } > + > + /* Autodetect first vector size we try. */ > + current_vector_size = 0; > + vector_sizes = targetm.vectorize.autovectorize_vector_sizes (); > + > + while (1) > + { > + bb_vinfo = vect_slp_analyze_bb_1 (bb); > + if (bb_vinfo) > + return bb_vinfo; > + > + destroy_bb_vec_info (bb_vinfo); > + > + vector_sizes &= ~current_vector_size; > + if (vector_sizes == 0 > + || current_vector_size == 0) > + return NULL; > + > + /* Try the next biggest vector size. */ > + current_vector_size = 1 << floor_log2 (vector_sizes); > + if (vect_print_dump_info (REPORT_DETAILS)) > + fprintf (vect_dump, "***** Re-trying analysis with " > + "vector size %d\n", current_vector_size); > + } > +} > + > + > /* SLP costs are calculated according to SLP instance unrolling factor (i.e., > the number of created vector stmts depends on the unrolling factor). > However, the actual number of vector stmts for every SLP node depends on >