On Wed, 28 Jun 2023, Tamar Christina wrote:

> Hi,
> 
> With the patch enabling the vectorization of early-breaks, we'd like to allow
> bitfield lowering in such loops, which requires the relaxation of allowing
> multiple exits when doing so.  In order to avoid a similar issue to PR107275,
> the code that rejects loops with certain types of gimple_stmts was hoisted 
> from
> 'if_convertible_loop_p_1' to 'get_loop_body_in_if_conv_order', to avoid trying
> to lower bitfields in loops we are not going to vectorize anyway.
> 
> This also ensures 'ifcvt_local_dec' doesn't accidentally remove statements it
> shouldn't as it will never come across them.  I made sure to add a comment to
> make clear that there is a direct connection between the two and if we were to
> enable vectorization of any other gimple statement we should make sure both
> handle it.
> 
> NOTE: This patch accepted before but never committed because it is a no-op
> without the early break patch.   This is a respun version of Andre's patch and
> rebased to changes in ifcvt and updated to handle multiple exits.
> 
> Bootstrappend and regression tested on aarch64-none-linux-gnu and
> x86_64-pc-linux-gnu and no issues.

OK.

> gcc/ChangeLog:
> 
>       * tree-if-conv.cc (if_convertible_loop_p_1): Move check from here ...
>       (get_loop_body_if_conv_order): ... to here.
>       (if_convertible_loop_p): Remove single_exit check.
>       (tree_if_conversion): Move single_exit check to if-conversion part and
>       support multiple exits.
> 
> gcc/testsuite/ChangeLog:
> 
>       * gcc.dg/vect/vect-bitfield-read-1-not.c: New test.
>       * gcc.dg/vect/vect-bitfield-read-2-not.c: New test.
>       * gcc.dg/vect/vect-bitfield-read-8.c: New test.
>       * gcc.dg/vect/vect-bitfield-read-9.c: New test.
> 
> Co-Authored-By:  Andre Vieira <andre.simoesdiasvie...@arm.com>
> 
> --- inline copy of patch -- 
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-1-not.c 
> b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-1-not.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..0d91067ebb27b1db2b2352975c43bce8b4171e3f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-1-not.c
> @@ -0,0 +1,60 @@
> +/* { dg-require-effective-target vect_shift } */
> +/* { dg-require-effective-target vect_long_long } */
> +/* { dg-additional-options { "-fdump-tree-ifcvt-all" } } */
> +
> +#include <stdarg.h>
> +#include "tree-vect.h"
> +
> +extern void abort(void);
> +
> +struct s {
> +    char a : 4;
> +};
> +
> +#define N 32
> +#define ELT0 {0}
> +#define ELT1 {1}
> +#define ELT2 {2}
> +#define ELT3 {3}
> +#define RES 56
> +struct s A[N]
> +  = { ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
> +      ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
> +      ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
> +      ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3};
> +
> +int __attribute__ ((noipa))
> +f(struct s *ptr, unsigned n) {
> +    int res = 0;
> +    for (int i = 0; i < n; ++i)
> +      {
> +     switch (ptr[i].a)
> +       {
> +       case 0:
> +         res += ptr[i].a + 1;
> +         break;
> +       case 1:
> +       case 2:
> +       case 3:
> +         res += ptr[i].a;
> +         break;
> +       default:
> +         return 0;
> +       }
> +      }
> +    return res;
> +}
> +
> +int main (void)
> +{
> +  check_vect ();
> +
> +  if (f(&A[0], N) != RES)
> +    abort ();
> +
> +  return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump-not "Bitfield OK to lower." "ifcvt" } } */
> +
> +
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-2-not.c 
> b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-2-not.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..4ac7b3fc0dfd1c9d0b5e94a2ba6a745545577ec1
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-2-not.c
> @@ -0,0 +1,49 @@
> +/* { dg-require-effective-target vect_shift } */
> +/* { dg-require-effective-target vect_long_long } */
> +/* { dg-additional-options { "-fdump-tree-ifcvt-all" } } */
> +
> +#include <stdarg.h>
> +#include "tree-vect.h"
> +
> +extern void abort(void);
> +
> +struct s {
> +    char a : 4;
> +};
> +
> +#define N 32
> +#define ELT0 {0}
> +#define ELT1 {1}
> +#define ELT2 {2}
> +#define ELT3 {3}
> +#define RES 48
> +struct s A[N]
> +  = { ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
> +      ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
> +      ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
> +      ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3};
> +
> +int __attribute__ ((noipa))
> +f(struct s *ptr, unsigned n) {
> +    int res = 0;
> +    for (int i = 0; i < n; ++i)
> +      {
> +     asm volatile ("" ::: "memory");
> +     res += ptr[i].a;
> +      }
> +    return res;
> +}
> +
> +int main (void)
> +{
> +  check_vect ();
> +
> +  if (f(&A[0], N) != RES)
> +    abort ();
> +
> +  return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump-not "Bitfield OK to lower." "ifcvt" } } */
> +
> +
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-8.c 
> b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-8.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..52cfd33d937ae90f3fe9556716c90e098b768ac8
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-8.c
> @@ -0,0 +1,49 @@
> +/* { dg-require-effective-target vect_int } */
> +/* { dg-require-effective-target vect_shift } */
> +/* { dg-additional-options { "-fdump-tree-ifcvt-all" } } */
> +
> +#include <stdarg.h>
> +#include "tree-vect.h"
> +
> +extern void abort(void);
> +
> +struct s { int i : 31; };
> +
> +#define ELT0 {0}
> +#define ELT1 {1}
> +#define ELT2 {2}
> +#define ELT3 {3}
> +#define ELT4 {4}
> +#define N 32
> +#define RES 25
> +struct s A[N]
> +  = { ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
> +      ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
> +      ELT0, ELT1, ELT4, ELT3, ELT0, ELT1, ELT2, ELT3,
> +      ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3};
> +
> +int __attribute__ ((noipa))
> +f(struct s *ptr, unsigned n) {
> +    int res = 0;
> +    for (int i = 0; i < n; ++i)
> +      {
> +     if (ptr[i].i == 4)
> +       return res;
> +     res += ptr[i].i;
> +      }
> +
> +    return res;
> +}
> +
> +int main (void)
> +{
> +  check_vect ();
> +
> +  if (f(&A[0], N) != RES)
> +    abort ();
> +
> +  return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump "Bitfield OK to lower." "ifcvt" } } */
> +
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-9.c 
> b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-9.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..ab814698131a5905def181eeed85d8a3c62b924b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-9.c
> @@ -0,0 +1,51 @@
> +/* { dg-require-effective-target vect_shift } */
> +/* { dg-require-effective-target vect_long_long } */
> +/* { dg-additional-options { "-fdump-tree-ifcvt-all" } } */
> +
> +#include <stdarg.h>
> +#include "tree-vect.h"
> +
> +extern void abort(void);
> +
> +struct s {
> +    unsigned i : 31;
> +    char a : 4;
> +};
> +
> +#define N 32
> +#define ELT0 {0x7FFFFFFFUL, 0}
> +#define ELT1 {0x7FFFFFFFUL, 1}
> +#define ELT2 {0x7FFFFFFFUL, 2}
> +#define ELT3 {0x7FFFFFFFUL, 3}
> +#define ELT4 {0x7FFFFFFFUL, 4}
> +#define RES 9
> +struct s A[N]
> +  = { ELT0, ELT4, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
> +      ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
> +      ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
> +      ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3};
> +
> +int __attribute__ ((noipa))
> +f(struct s *ptr, unsigned n) {
> +    int res = 0;
> +    for (int i = 0; i < n; ++i)
> +      {
> +     if (ptr[i].a)
> +       return 9;
> +     res += ptr[i].a;
> +      }
> +    return res;
> +}
> +
> +int main (void)
> +{
> +  check_vect ();
> +
> +  if (f(&A[0], N) != RES)
> +    abort ();
> +
> +  return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump "Bitfield OK to lower." "ifcvt" } } */
> +
> diff --git a/gcc/tree-if-conv.cc b/gcc/tree-if-conv.cc
> index 
> e342532a343a3c066142adeec5fdfaf736a653e5..cdb0fe4c29dfa531e3277925022d127b13ffcc16
>  100644
> --- a/gcc/tree-if-conv.cc
> +++ b/gcc/tree-if-conv.cc
> @@ -586,7 +586,7 @@ add_to_dst_predicate_list (class loop *loop, edge e,
>  /* Return true if one of the successor edges of BB exits LOOP.  */
>  
>  static bool
> -bb_with_exit_edge_p (class loop *loop, basic_block bb)
> +bb_with_exit_edge_p (const class loop *loop, basic_block bb)
>  {
>    edge e;
>    edge_iterator ei;
> @@ -1268,6 +1268,44 @@ get_loop_body_in_if_conv_order (const class loop *loop)
>      }
>    free (blocks_in_bfs_order);
>    BITMAP_FREE (visited);
> +
> +  /* Go through loop and reject if-conversion or lowering of bitfields if we
> +     encounter statements we do not believe the vectorizer will be able to
> +     handle.  If adding a new type of statement here, make sure
> +     'ifcvt_local_dce' is also able to handle it propertly.  */
> +  for (index = 0; index < loop->num_nodes; index++)
> +    {
> +      basic_block bb = blocks[index];
> +      gimple_stmt_iterator gsi;
> +
> +      bool may_have_nonlocal_labels
> +     = bb_with_exit_edge_p (loop, bb) || bb == loop->latch;
> +      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
> +     switch (gimple_code (gsi_stmt (gsi)))
> +       {
> +       case GIMPLE_LABEL:
> +         if (!may_have_nonlocal_labels)
> +           {
> +             tree label
> +               = gimple_label_label (as_a <glabel *> (gsi_stmt (gsi)));
> +             if (DECL_NONLOCAL (label) || FORCED_LABEL (label))
> +               {
> +                 free (blocks);
> +                 return NULL;
> +               }
> +           }
> +         /* Fallthru.  */
> +       case GIMPLE_ASSIGN:
> +       case GIMPLE_CALL:
> +       case GIMPLE_DEBUG:
> +       case GIMPLE_COND:
> +         gimple_set_uid (gsi_stmt (gsi), 0);
> +         break;
> +       default:
> +         free (blocks);
> +         return NULL;
> +       }
> +    }
>    return blocks;
>  }
>  
> @@ -1438,36 +1476,6 @@ if_convertible_loop_p_1 (class loop *loop, 
> vec<data_reference_p> *refs)
>       exit_bb = bb;
>      }
>  
> -  for (i = 0; i < loop->num_nodes; i++)
> -    {
> -      basic_block bb = ifc_bbs[i];
> -      gimple_stmt_iterator gsi;
> -
> -      bool may_have_nonlocal_labels
> -     = bb_with_exit_edge_p (loop, bb) || bb == loop->latch;
> -      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
> -     switch (gimple_code (gsi_stmt (gsi)))
> -       {
> -       case GIMPLE_LABEL:
> -         if (!may_have_nonlocal_labels)
> -           {
> -             tree label
> -               = gimple_label_label (as_a <glabel *> (gsi_stmt (gsi)));
> -             if (DECL_NONLOCAL (label) || FORCED_LABEL (label))
> -               return false;
> -           }
> -         /* Fallthru.  */
> -       case GIMPLE_ASSIGN:
> -       case GIMPLE_CALL:
> -       case GIMPLE_DEBUG:
> -       case GIMPLE_COND:
> -         gimple_set_uid (gsi_stmt (gsi), 0);
> -         break;
> -       default:
> -         return false;
> -       }
> -    }
> -
>    data_reference_p dr;
>  
>    innermost_DR_map
> @@ -1579,14 +1587,6 @@ if_convertible_loop_p (class loop *loop, 
> vec<data_reference_p> *refs)
>        return false;
>      }
>  
> -  /* More than one loop exit is too much to handle.  */
> -  if (!single_exit (loop))
> -    {
> -      if (dump_file && (dump_flags & TDF_DETAILS))
> -     fprintf (dump_file, "multiple exits\n");
> -      return false;
> -    }
> -
>    /* If one of the loop header's edge is an exit edge then do not
>       apply if-conversion.  */
>    FOR_EACH_EDGE (e, ei, loop->header->succs)
> @@ -3566,9 +3566,6 @@ tree_if_conversion (class loop *loop, vec<gimple *> 
> *preds)
>       aggressive_if_conv = true;
>      }
>  
> -  if (!single_exit (loop))
> -    goto cleanup;
> -
>    /* If there are more than two BBs in the loop then there is at least one if
>       to convert.  */
>    if (loop->num_nodes > 2
> @@ -3588,15 +3585,25 @@ tree_if_conversion (class loop *loop, vec<gimple *> 
> *preds)
>  
>    if (loop->num_nodes > 2)
>      {
> -      need_to_ifcvt = true;
> +      /* More than one loop exit is too much to handle.  */
> +      if (!single_exit (loop))
> +     {
> +       if (dump_file && (dump_flags & TDF_DETAILS))
> +         fprintf (dump_file, "Can not ifcvt due to multiple exits\n");
> +     }
> +      else
> +     {
> +       need_to_ifcvt = true;
>  
> -      if (!if_convertible_loop_p (loop, &refs) || !dbg_cnt 
> (if_conversion_tree))
> -     goto cleanup;
> +       if (!if_convertible_loop_p (loop, &refs)
> +           || !dbg_cnt (if_conversion_tree))
> +         goto cleanup;
>  
> -      if ((need_to_predicate || any_complicated_phi)
> -       && ((!flag_tree_loop_vectorize && !loop->force_vectorize)
> -           || loop->dont_vectorize))
> -     goto cleanup;
> +       if ((need_to_predicate || any_complicated_phi)
> +           && ((!flag_tree_loop_vectorize && !loop->force_vectorize)
> +               || loop->dont_vectorize))
> +         goto cleanup;
> +     }
>      }
>  
>    if ((flag_tree_loop_vectorize || loop->force_vectorize)
> @@ -3687,7 +3694,8 @@ tree_if_conversion (class loop *loop, vec<gimple *> 
> *preds)
>       PHIs, those are to be kept in sync with the non-if-converted copy.
>       ???  We'll still keep dead stores though.  */
>    exit_bbs = BITMAP_ALLOC (NULL);
> -  bitmap_set_bit (exit_bbs, single_exit (loop)->dest->index);
> +  for (edge exit : get_loop_exit_edges (loop))
> +    bitmap_set_bit (exit_bbs, exit->dest->index);
>    bitmap_set_bit (exit_bbs, loop->latch->index);
>  
>    std::pair <tree, tree> *name_pair;
> 
> 
> 
> 
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE Software Solutions Germany GmbH, Frankenstrasse 146, 90461 Nuernberg,
Germany; GF: Ivo Totev, Andrew Myers, Andrew McDonald, Boudien Moerman;
HRB 36809 (AG Nuernberg)

Reply via email to