On 25 September 2011 14:45, Richard Guenther <[email protected]> wrote:
> On Sun, Sep 25, 2011 at 12:59 PM, Ira Rosen <[email protected]> wrote:
>> Hi,
>>
>> This patch supports an automatic choice of vector size in basic block
>> vectorization similar to the loop vectorization case.
>>
>> I am not sure about the new keyword.
>
> The testsuite one? I guess we should name them vect128, vect256, etc.,
> as testcases will be looking for an absolute size, not a relative ("half")
> one.
OK, changing it to:
Index: testsuite/lib/target-supports.exp
===================================================================
--- testsuite/lib/target-supports.exp (revision 179159)
+++ testsuite/lib/target-supports.exp (working copy)
@@ -3393,6 +3393,24 @@ proc check_effective_target_vect_multiple_sizes {
return $et_vect_multiple_sizes_saved
}
+# Return 1 if the target supports vectors of 64 bits.
+
+proc check_effective_target_vect64 { } {
+ global et_vect64
+
+ if [info exists et_vect64_saved] {
+ verbose "check_effective_target_vect64: using cached result" 2
+ } else {
+ set et_vect64_saved 0
+ if { ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok]) } {
+ set et_vect64_saved 1
+ }
+ }
+
+ verbose "check_effective_target_vect64: returning $et_vect64_saved" 2
+ return $et_vect64_saved
+}
+
# Return 1 if the target supports section-anchors
proc check_effective_target_section_anchors { } {
Thanks,
Ira
>
> Richard.
>
>> Bootstrapped on powerpc64-suse-linux, tested on powerpc64-suse-linux
>> and arm-linux-gnueabi.
>>
>> Thanks,
>> Ira
>>
>> ChangeLog:
>>
>> * tree-vect-slp.c (vect_slp_analyze_bb_1): Split out core part
>> of vect_analyze_bb here.
>> (vect_analyze_bb): Loop over vector sizes calling vect_analyze_bb_1.
>>
>> testsuite/ChangeLog:
>>
>> * lib/target-supports.exp (check_effective_target_vect_half_size):
>> New.
>> * gcc.dg/vect/bb-slp-11.c: Expect the error message twice in case
>> of multiple vector sizes.
>> * gcc.dg/vect/bb-slp-26.c: New.
>>
>> Index: testsuite/lib/target-supports.exp
>> ===================================================================
>> --- testsuite/lib/target-supports.exp (revision 179159)
>> +++ testsuite/lib/target-supports.exp (working copy)
>> @@ -3393,6 +3393,24 @@ proc check_effective_target_vect_multiple_sizes {
>> return $et_vect_multiple_sizes_saved
>> }
>>
>> +# Return 1 if the target supports vectors of 8 chars, 4 shorts and 2 ints.
>> +
>> +proc check_effective_target_vect_half_size { } {
>> + global et_vect_half_size
>> +
>> + if [info exists et_vect_half_size_saved] {
>> + verbose "check_effective_target_vect_half_size: using cached
>> result" 2
>> + } else {
>> + set et_vect_half_size_saved 0
>> + if { ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok])
>> } {
>> + set et_vect_half_size_saved 1
>> + }
>> + }
>> +
>> + verbose "check_effective_target_vect_half_size: returning
>> $et_vect_half_size_saved" 2
>> + return $et_vect_half_size_saved
>> +}
>> +
>> # Return 1 if the target supports section-anchors
>>
>> proc check_effective_target_section_anchors { } {
>> Index: testsuite/gcc.dg/vect/bb-slp-26.c
>> ===================================================================
>> --- testsuite/gcc.dg/vect/bb-slp-26.c (revision 0)
>> +++ testsuite/gcc.dg/vect/bb-slp-26.c (revision 0)
>> @@ -0,0 +1,59 @@
>> +/* { dg-require-effective-target vect_int } */
>> +
>> +#include <stdarg.h>
>> +#include "tree-vect.h"
>> +
>> +#define A 3
>> +#define B 4
>> +#define N 256
>> +
>> +char src[N], dst[N];
>> +
>> +void foo (char * __restrict__ dst, char * __restrict__ src, int h,
>> int stride, int dummy)
>> +{
>> + int i;
>> + h /= 16;
>> + for (i = 0; i < h; i++)
>> + {
>> + dst[0] += A*src[0] + src[stride];
>> + dst[1] += A*src[1] + src[1+stride];
>> + dst[2] += A*src[2] + src[2+stride];
>> + dst[3] += A*src[3] + src[3+stride];
>> + dst[4] += A*src[4] + src[4+stride];
>> + dst[5] += A*src[5] + src[5+stride];
>> + dst[6] += A*src[6] + src[6+stride];
>> + dst[7] += A*src[7] + src[7+stride];
>> + dst += 8;
>> + src += 8;
>> + if (dummy == 32)
>> + abort ();
>> + }
>> +}
>> +
>> +
>> +int main (void)
>> +{
>> + int i;
>> +
>> + check_vect ();
>> +
>> + for (i = 0; i < N; i++)
>> + {
>> + dst[i] = 0;
>> + src[i] = i/8;
>> + }
>> +
>> + foo (dst, src, N, 8, 0);
>> +
>> + for (i = 0; i < N/2; i++)
>> + {
>> + if (dst[i] != A * src[i] + src[i+8])
>> + abort ();
>> + }
>> +
>> + return 0;
>> +}
>> +
>> +/* { dg-final { scan-tree-dump-times "basic block vectorized using
>> SLP" 1 "slp" { target vect_half_size } } } */
>> +/* { dg-final { cleanup-tree-dump "slp" } } */
>> +
>> Index: testsuite/gcc.dg/vect/bb-slp-11.c
>> ===================================================================
>> --- testsuite/gcc.dg/vect/bb-slp-11.c (revision 179159)
>> +++ testsuite/gcc.dg/vect/bb-slp-11.c (working copy)
>> @@ -49,6 +49,7 @@ int main (void)
>> }
>>
>> /* { dg-final { scan-tree-dump-times "basic block vectorized using
>> SLP" 0 "slp" } } */
>> -/* { dg-final { scan-tree-dump-times "SLP with multiple types" 1 "slp" } }
>> */
>> +/* { dg-final { scan-tree-dump-times "SLP with multiple types" 1
>> "slp" { xfail vect_multiple_sizes } } } */
>> +/* { dg-final { scan-tree-dump-times "SLP with multiple types" 2
>> "slp" { target vect_multiple_sizes } } } */
>> /* { dg-final { cleanup-tree-dump "slp" } } */
>>
>> Index: tree-vect-slp.c
>> ===================================================================
>> --- tree-vect-slp.c (revision 179159)
>> +++ tree-vect-slp.c (working copy)
>> @@ -1694,42 +1694,18 @@ vect_bb_vectorization_profitable_p (bb_vec_info bb
>>
>> /* Check if the basic block can be vectorized. */
>>
>> -bb_vec_info
>> -vect_slp_analyze_bb (basic_block bb)
>> +static bb_vec_info
>> +vect_slp_analyze_bb_1 (basic_block bb)
>> {
>> bb_vec_info bb_vinfo;
>> VEC (ddr_p, heap) *ddrs;
>> VEC (slp_instance, heap) *slp_instances;
>> slp_instance instance;
>> - int i, insns = 0;
>> - gimple_stmt_iterator gsi;
>> + int i;
>> int min_vf = 2;
>> int max_vf = MAX_VECTORIZATION_FACTOR;
>> bool data_dependence_in_bb = false;
>>
>> - current_vector_size = 0;
>> -
>> - if (vect_print_dump_info (REPORT_DETAILS))
>> - fprintf (vect_dump, "===vect_slp_analyze_bb===\n");
>> -
>> - for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
>> - {
>> - gimple stmt = gsi_stmt (gsi);
>> - if (!is_gimple_debug (stmt)
>> - && !gimple_nop_p (stmt)
>> - && gimple_code (stmt) != GIMPLE_LABEL)
>> - insns++;
>> - }
>> -
>> - if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
>> - {
>> - if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
>> - fprintf (vect_dump, "not vectorized: too many instructions in basic
>> "
>> - "block.\n");
>> -
>> - return NULL;
>> - }
>> -
>> bb_vinfo = new_bb_vec_info (bb);
>> if (!bb_vinfo)
>> return NULL;
>> @@ -1849,6 +1825,61 @@ vect_bb_vectorization_profitable_p (bb_vec_info bb
>> }
>>
>>
>> +bb_vec_info
>> +vect_slp_analyze_bb (basic_block bb)
>> +{
>> + bb_vec_info bb_vinfo;
>> + int insns = 0;
>> + gimple_stmt_iterator gsi;
>> + unsigned int vector_sizes;
>> +
>> + if (vect_print_dump_info (REPORT_DETAILS))
>> + fprintf (vect_dump, "===vect_slp_analyze_bb===\n");
>> +
>> + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
>> + {
>> + gimple stmt = gsi_stmt (gsi);
>> + if (!is_gimple_debug (stmt)
>> + && !gimple_nop_p (stmt)
>> + && gimple_code (stmt) != GIMPLE_LABEL)
>> + insns++;
>> + }
>> +
>> + if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
>> + {
>> + if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
>> + fprintf (vect_dump, "not vectorized: too many instructions in basic
>> "
>> + "block.\n");
>> +
>> + return NULL;
>> + }
>> +
>> + /* Autodetect first vector size we try. */
>> + current_vector_size = 0;
>> + vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
>> +
>> + while (1)
>> + {
>> + bb_vinfo = vect_slp_analyze_bb_1 (bb);
>> + if (bb_vinfo)
>> + return bb_vinfo;
>> +
>> + destroy_bb_vec_info (bb_vinfo);
>> +
>> + vector_sizes &= ~current_vector_size;
>> + if (vector_sizes == 0
>> + || current_vector_size == 0)
>> + return NULL;
>> +
>> + /* Try the next biggest vector size. */
>> + current_vector_size = 1 << floor_log2 (vector_sizes);
>> + if (vect_print_dump_info (REPORT_DETAILS))
>> + fprintf (vect_dump, "***** Re-trying analysis with "
>> + "vector size %d\n", current_vector_size);
>> + }
>> +}
>> +
>> +
>> /* SLP costs are calculated according to SLP instance unrolling factor
>> (i.e.,
>> the number of created vector stmts depends on the unrolling factor).
>> However, the actual number of vector stmts for every SLP node depends on
>>
>