On Sun, Sep 25, 2011 at 12:59 PM, Ira Rosen <ira.ro...@linaro.org> wrote:
> Hi,
>
> This patch supports an automatic choice of vector size in basic block
> vectorization similar to the loop vectorization case.
>
> I am not sure about the new keyword.

The testsuite one?  I guess we should name them vect128, vect256, etc.,
as testcases will be looking for an absolute size, not a relative ("half") one.

Richard.

> Bootstrapped on powerpc64-suse-linux, tested on powerpc64-suse-linux
> and arm-linux-gnueabi.
>
> Thanks,
> Ira
>
> ChangeLog:
>
>        * tree-vect-slp.c (vect_slp_analyze_bb_1): Split out core part
>        of vect_analyze_bb here.
>        (vect_analyze_bb): Loop over vector sizes calling vect_analyze_bb_1.
>
> testsuite/ChangeLog:
>
>        * lib/target-supports.exp (check_effective_target_vect_half_size): New.
>        * gcc.dg/vect/bb-slp-11.c: Expect the error message twice in case
>        of multiple vector sizes.
>        * gcc.dg/vect/bb-slp-26.c: New.
>
> Index: testsuite/lib/target-supports.exp
> ===================================================================
> --- testsuite/lib/target-supports.exp   (revision 179159)
> +++ testsuite/lib/target-supports.exp   (working copy)
> @@ -3393,6 +3393,24 @@ proc check_effective_target_vect_multiple_sizes {
>     return $et_vect_multiple_sizes_saved
>  }
>
> +# Return 1 if the target supports vectors of 8 chars, 4 shorts and 2 ints.
> +
> +proc check_effective_target_vect_half_size { } {
> +    global et_vect_half_size
> +
> +    if [info exists et_vect_half_size_saved] {
> +        verbose "check_effective_target_vect_half_size: using cached result" 
> 2
> +    } else {
> +        set et_vect_half_size_saved 0
> +        if { ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok]) } 
> {
> +           set et_vect_half_size_saved 1
> +        }
> +    }
> +
> +    verbose "check_effective_target_vect_half_size: returning
> $et_vect_half_size_saved" 2
> +    return $et_vect_half_size_saved
> +}
> +
>  # Return 1 if the target supports section-anchors
>
>  proc check_effective_target_section_anchors { } {
> Index: testsuite/gcc.dg/vect/bb-slp-26.c
> ===================================================================
> --- testsuite/gcc.dg/vect/bb-slp-26.c   (revision 0)
> +++ testsuite/gcc.dg/vect/bb-slp-26.c   (revision 0)
> @@ -0,0 +1,59 @@
> +/* { dg-require-effective-target vect_int } */
> +
> +#include <stdarg.h>
> +#include "tree-vect.h"
> +
> +#define A 3
> +#define B 4
> +#define N 256
> +
> +char src[N], dst[N];
> +
> +void foo (char * __restrict__ dst, char * __restrict__ src, int h,
> int stride, int dummy)
> +{
> +  int i;
> +  h /= 16;
> +  for (i = 0; i < h; i++)
> +    {
> +      dst[0] += A*src[0] + src[stride];
> +      dst[1] += A*src[1] + src[1+stride];
> +      dst[2] += A*src[2] + src[2+stride];
> +      dst[3] += A*src[3] + src[3+stride];
> +      dst[4] += A*src[4] + src[4+stride];
> +      dst[5] += A*src[5] + src[5+stride];
> +      dst[6] += A*src[6] + src[6+stride];
> +      dst[7] += A*src[7] + src[7+stride];
> +      dst += 8;
> +      src += 8;
> +      if (dummy == 32)
> +        abort ();
> +   }
> +}
> +
> +
> +int main (void)
> +{
> +  int i;
> +
> +  check_vect ();
> +
> +  for (i = 0; i < N; i++)
> +    {
> +       dst[i] = 0;
> +       src[i] = i/8;
> +    }
> +
> +  foo (dst, src, N, 8, 0);
> +
> +  for (i = 0; i < N/2; i++)
> +    {
> +      if (dst[i] != A * src[i] + src[i+8])
> +        abort ();
> +    }
> +
> +  return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "basic block vectorized using
> SLP" 1 "slp" { target vect_half_size } } } */
> +/* { dg-final { cleanup-tree-dump "slp" } } */
> +
> Index: testsuite/gcc.dg/vect/bb-slp-11.c
> ===================================================================
> --- testsuite/gcc.dg/vect/bb-slp-11.c   (revision 179159)
> +++ testsuite/gcc.dg/vect/bb-slp-11.c   (working copy)
> @@ -49,6 +49,7 @@ int main (void)
>  }
>
>  /* { dg-final { scan-tree-dump-times "basic block vectorized using
> SLP" 0 "slp" } } */
> -/* { dg-final { scan-tree-dump-times "SLP with multiple types" 1 "slp" } } */
> +/* { dg-final { scan-tree-dump-times "SLP with multiple types" 1
> "slp" { xfail vect_multiple_sizes } } } */
> +/* { dg-final { scan-tree-dump-times "SLP with multiple types" 2
> "slp" { target vect_multiple_sizes } } } */
>  /* { dg-final { cleanup-tree-dump "slp" } } */
>
> Index: tree-vect-slp.c
> ===================================================================
> --- tree-vect-slp.c     (revision 179159)
> +++ tree-vect-slp.c     (working copy)
> @@ -1694,42 +1694,18 @@ vect_bb_vectorization_profitable_p (bb_vec_info bb
>
>  /* Check if the basic block can be vectorized.  */
>
> -bb_vec_info
> -vect_slp_analyze_bb (basic_block bb)
> +static bb_vec_info
> +vect_slp_analyze_bb_1 (basic_block bb)
>  {
>   bb_vec_info bb_vinfo;
>   VEC (ddr_p, heap) *ddrs;
>   VEC (slp_instance, heap) *slp_instances;
>   slp_instance instance;
> -  int i, insns = 0;
> -  gimple_stmt_iterator gsi;
> +  int i;
>   int min_vf = 2;
>   int max_vf = MAX_VECTORIZATION_FACTOR;
>   bool data_dependence_in_bb = false;
>
> -  current_vector_size = 0;
> -
> -  if (vect_print_dump_info (REPORT_DETAILS))
> -    fprintf (vect_dump, "===vect_slp_analyze_bb===\n");
> -
> -  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
> -    {
> -      gimple stmt = gsi_stmt (gsi);
> -      if (!is_gimple_debug (stmt)
> -         && !gimple_nop_p (stmt)
> -         && gimple_code (stmt) != GIMPLE_LABEL)
> -       insns++;
> -    }
> -
> -  if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
> -    {
> -      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
> -        fprintf (vect_dump, "not vectorized: too many instructions in basic "
> -                            "block.\n");
> -
> -      return NULL;
> -    }
> -
>   bb_vinfo = new_bb_vec_info (bb);
>   if (!bb_vinfo)
>     return NULL;
> @@ -1849,6 +1825,61 @@ vect_bb_vectorization_profitable_p (bb_vec_info bb
>  }
>
>
> +bb_vec_info
> +vect_slp_analyze_bb (basic_block bb)
> +{
> +  bb_vec_info bb_vinfo;
> +  int insns = 0;
> +  gimple_stmt_iterator gsi;
> +  unsigned int vector_sizes;
> +
> +  if (vect_print_dump_info (REPORT_DETAILS))
> +    fprintf (vect_dump, "===vect_slp_analyze_bb===\n");
> +
> +  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
> +    {
> +      gimple stmt = gsi_stmt (gsi);
> +      if (!is_gimple_debug (stmt)
> +          && !gimple_nop_p (stmt)
> +          && gimple_code (stmt) != GIMPLE_LABEL)
> +        insns++;
> +    }
> +
> +  if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
> +    {
> +      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
> +        fprintf (vect_dump, "not vectorized: too many instructions in basic "
> +                            "block.\n");
> +
> +      return NULL;
> +    }
> +
> +  /* Autodetect first vector size we try.  */
> +  current_vector_size = 0;
> +  vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
> +
> +  while (1)
> +    {
> +      bb_vinfo = vect_slp_analyze_bb_1 (bb);
> +      if (bb_vinfo)
> +        return bb_vinfo;
> +
> +      destroy_bb_vec_info (bb_vinfo);
> +
> +      vector_sizes &= ~current_vector_size;
> +      if (vector_sizes == 0
> +          || current_vector_size == 0)
> +        return NULL;
> +
> +      /* Try the next biggest vector size.  */
> +      current_vector_size = 1 << floor_log2 (vector_sizes);
> +      if (vect_print_dump_info (REPORT_DETAILS))
> +        fprintf (vect_dump, "***** Re-trying analysis with "
> +                 "vector size %d\n", current_vector_size);
> +    }
> +}
> +
> +
>  /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
>    the number of created vector stmts depends on the unrolling factor).
>    However, the actual number of vector stmts for every SLP node depends on
>

Reply via email to