This finally fixes PR59058.

Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.

Richard.

2013-12-05  Richard Biener  <rguent...@suse.de>

        PR tree-optimization/59058
        * tree-vectorizer.h (struct _loop_vec_info): Add num_itersm1
        member.
        (LOOP_VINFO_NITERSM1): New macro.
        * tree-vect-loop-manip.c (slpeel_tree_peel_loop_to_edge): Express
        the vector loop entry test in terms of scalar latch executions.
        (vect_do_peeling_for_alignment): Update LOOP_VINFO_NITERSM1.
        * tree-vect-loop.c (vect_get_loop_niters): Also return the
        number of latch executions.
        (new_loop_vec_info): Initialize LOOP_VINFO_NITERSM1.
        (vect_analyze_loop_form): Likewise.
        (vect_generate_tmps_on_preheader): Compute the number of
        vectorized iterations differently.

        * gcc.dg/torture/pr59058.c: New testcase.

Index: trunk/gcc/testsuite/gcc.dg/torture/pr59058.c
===================================================================
*** /dev/null   1970-01-01 00:00:00.000000000 +0000
--- trunk/gcc/testsuite/gcc.dg/torture/pr59058.c        2013-12-05 
14:51:54.231300709 +0100
***************
*** 0 ****
--- 1,19 ----
+ /* { dg-do run } */
+ 
+ extern void abort (void);
+ 
+ short b = 0;
+ 
+ int
+ main ()
+ {
+   int c = 0;
+ l1:
+   b++;
+   c |= b;
+   if (b)
+     goto l1;
+   if (c != -1)
+     abort ();
+   return 0;
+ }
Index: trunk/gcc/tree-vect-loop-manip.c
===================================================================
*** trunk.orig/gcc/tree-vect-loop-manip.c       2013-12-05 12:27:23.000000000 
+0100
--- trunk/gcc/tree-vect-loop-manip.c    2013-12-05 14:50:59.468666726 +0100
*************** slpeel_tree_peel_loop_to_edge (struct lo
*** 1061,1067 ****
    gimple_stmt_iterator gsi;
    edge exit_e = single_exit (loop);
    source_location loop_loc;
-   tree cost_pre_condition = NULL_TREE;
    /* There are many aspects to how likely the first loop is going to be 
executed.
       Without histogram we can't really do good job.  Simply set it to
       2/3, so the first loop is not reordered to the end of function and
--- 1061,1066 ----
*************** slpeel_tree_peel_loop_to_edge (struct lo
*** 1263,1283 ****
    /* Epilogue peeling.  */
    if (!update_first_loop_count)
      {
        pre_condition =
!       fold_build2 (LE_EXPR, boolean_type_node, *first_niters,
!                    build_int_cst (TREE_TYPE (*first_niters), 0));
!       if (check_profitability)
!       {
!         tree scalar_loop_iters
!           = unshare_expr (LOOP_VINFO_NITERS_UNCHANGED
!                                       (loop_vec_info_for_loop (loop)));
!         cost_pre_condition =
!           fold_build2 (LE_EXPR, boolean_type_node, scalar_loop_iters,
!                        build_int_cst (TREE_TYPE (scalar_loop_iters), th));
! 
!         pre_condition = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
!                                      cost_pre_condition, pre_condition);
!       }
        if (cond_expr)
        {
          pre_condition =
--- 1262,1278 ----
    /* Epilogue peeling.  */
    if (!update_first_loop_count)
      {
+       loop_vec_info loop_vinfo = loop_vec_info_for_loop (loop);
+       tree scalar_loop_iters = LOOP_VINFO_NITERSM1 (loop_vinfo);
+       unsigned limit = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 1;
+       if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
+       limit = limit + 1;
+       if (check_profitability
+         && th > limit)
+       limit = th;
        pre_condition =
!       fold_build2 (LT_EXPR, boolean_type_node, scalar_loop_iters,
!                    build_int_cst (TREE_TYPE (scalar_loop_iters), limit));
        if (cond_expr)
        {
          pre_condition =
*************** vect_do_peeling_for_alignment (loop_vec_
*** 1922,1927 ****
--- 1917,1925 ----
    /* Update number of times loop executes.  */
    LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR,
                TREE_TYPE (ni_name), ni_name, niters_of_prolog_loop);
+   LOOP_VINFO_NITERSM1 (loop_vinfo) = fold_build2 (MINUS_EXPR,
+               TREE_TYPE (ni_name),
+               LOOP_VINFO_NITERSM1 (loop_vinfo), niters_of_prolog_loop);
  
    if (types_compatible_p (sizetype, TREE_TYPE (niters_of_prolog_loop)))
      wide_prolog_niters = niters_of_prolog_loop;
Index: trunk/gcc/tree-vect-loop.c
===================================================================
*** trunk.orig/gcc/tree-vect-loop.c     2013-12-05 12:27:23.000000000 +0100
--- trunk/gcc/tree-vect-loop.c  2013-12-05 14:36:14.917389587 +0100
*************** vect_analyze_scalar_cycles (loop_vec_inf
*** 791,802 ****
  /* Function vect_get_loop_niters.
  
     Determine how many iterations the loop is executed and place it
!    in NUMBER_OF_ITERATIONS.
  
     Return the loop exit condition.  */
  
  static gimple
! vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
  {
    tree niters;
  
--- 791,804 ----
  /* Function vect_get_loop_niters.
  
     Determine how many iterations the loop is executed and place it
!    in NUMBER_OF_ITERATIONS.  Place the number of latch iterations
!    in NUMBER_OF_ITERATIONSM1.
  
     Return the loop exit condition.  */
  
  static gimple
! vect_get_loop_niters (struct loop *loop, tree *number_of_iterations,
!                     tree *number_of_iterationsm1)
  {
    tree niters;
  
*************** vect_get_loop_niters (struct loop *loop,
*** 805,816 ****
                     "=== get_loop_niters ===\n");
  
    niters = number_of_latch_executions (loop);
    /* We want the number of loop header executions which is the number
       of latch executions plus one.
       ???  For UINT_MAX latch executions this number overflows to zero
       for loops like do { n++; } while (n != 0);  */
    if (niters && !chrec_contains_undetermined (niters))
!     niters = fold_build2 (PLUS_EXPR, TREE_TYPE (niters), niters,
                          build_int_cst (TREE_TYPE (niters), 1));
    *number_of_iterations = niters;
  
--- 807,820 ----
                     "=== get_loop_niters ===\n");
  
    niters = number_of_latch_executions (loop);
+   *number_of_iterationsm1 = niters;
+ 
    /* We want the number of loop header executions which is the number
       of latch executions plus one.
       ???  For UINT_MAX latch executions this number overflows to zero
       for loops like do { n++; } while (n != 0);  */
    if (niters && !chrec_contains_undetermined (niters))
!     niters = fold_build2 (PLUS_EXPR, TREE_TYPE (niters), unshare_expr 
(niters),
                          build_int_cst (TREE_TYPE (niters), 1));
    *number_of_iterations = niters;
  
*************** new_loop_vec_info (struct loop *loop)
*** 916,921 ****
--- 920,926 ----
     gcc_assert (nbbs == loop->num_nodes);
  
    LOOP_VINFO_BBS (res) = bbs;
+   LOOP_VINFO_NITERSM1 (res) = NULL;
    LOOP_VINFO_NITERS (res) = NULL;
    LOOP_VINFO_NITERS_UNCHANGED (res) = NULL;
    LOOP_VINFO_COST_MODEL_MIN_ITERS (res) = 0;
*************** vect_analyze_loop_form (struct loop *loo
*** 1071,1077 ****
  {
    loop_vec_info loop_vinfo;
    gimple loop_cond;
!   tree number_of_iterations = NULL;
    loop_vec_info inner_loop_vinfo = NULL;
  
    if (dump_enabled_p ())
--- 1076,1082 ----
  {
    loop_vec_info loop_vinfo;
    gimple loop_cond;
!   tree number_of_iterations = NULL, number_of_iterationsm1 = NULL;
    loop_vec_info inner_loop_vinfo = NULL;
  
    if (dump_enabled_p ())
*************** vect_analyze_loop_form (struct loop *loo
*** 1246,1252 ****
        }
      }
  
!   loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
    if (!loop_cond)
      {
        if (dump_enabled_p ())
--- 1251,1258 ----
        }
      }
  
!   loop_cond = vect_get_loop_niters (loop, &number_of_iterations,
!                                   &number_of_iterationsm1);
    if (!loop_cond)
      {
        if (dump_enabled_p ())
*************** vect_analyze_loop_form (struct loop *loo
*** 1280,1285 ****
--- 1286,1292 ----
      }
  
    loop_vinfo = new_loop_vec_info (loop);
+   LOOP_VINFO_NITERSM1 (loop_vinfo) = number_of_iterationsm1;
    LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
    LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = number_of_iterations;
  
*************** vect_generate_tmps_on_preheader (loop_ve
*** 5637,5648 ****
    tree var;
    tree ratio_name;
    tree ratio_mult_vf_name;
-   tree ni = LOOP_VINFO_NITERS (loop_vinfo);
    int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
    edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
    tree log_vf;
  
!   log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf));
  
    /* If epilogue loop is required because of data accesses with gaps, we
       subtract one iteration from the total number of iterations here for
--- 5644,5654 ----
    tree var;
    tree ratio_name;
    tree ratio_mult_vf_name;
    int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
    edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
    tree log_vf;
  
!   log_vf = build_int_cst (TREE_TYPE (ni_name), exact_log2 (vf));
  
    /* If epilogue loop is required because of data accesses with gaps, we
       subtract one iteration from the total number of iterations here for
*************** vect_generate_tmps_on_preheader (loop_ve
*** 5654,5660 ****
                                       build_one_cst (TREE_TYPE (ni_name)));
        if (!is_gimple_val (ni_minus_gap_name))
        {
!         var = create_tmp_var (TREE_TYPE (ni), "ni_gap");
            gimple stmts = NULL;
            ni_minus_gap_name = force_gimple_operand (ni_minus_gap_name, &stmts,
                                                    true, var);
--- 5660,5666 ----
                                       build_one_cst (TREE_TYPE (ni_name)));
        if (!is_gimple_val (ni_minus_gap_name))
        {
!         var = create_tmp_var (TREE_TYPE (ni_name), "ni_gap");
            gimple stmts = NULL;
            ni_minus_gap_name = force_gimple_operand (ni_minus_gap_name, &stmts,
                                                    true, var);
*************** vect_generate_tmps_on_preheader (loop_ve
*** 5665,5676 ****
      ni_minus_gap_name = ni_name;
  
    /* Create: ratio = ni >> log2(vf) */
! 
!   ratio_name = fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_minus_gap_name),
!                           ni_minus_gap_name, log_vf);
    if (!is_gimple_val (ratio_name))
      {
!       var = create_tmp_var (TREE_TYPE (ni), "bnd");
        gimple stmts = NULL;
        ratio_name = force_gimple_operand (ratio_name, &stmts, true, var);
        gsi_insert_seq_on_edge_immediate (pe, stmts);
--- 5671,5692 ----
      ni_minus_gap_name = ni_name;
  
    /* Create: ratio = ni >> log2(vf) */
!   /* ???  As we have ni == number of latch executions + 1, ni could
!      have overflown to zero.  So avoid computing ratio based on ni
!      but compute it using the fact that we know ratio will be at least
!      one, thus via (ni - vf) >> log2(vf) + 1.  */
!   ratio_name
!     = fold_build2 (PLUS_EXPR, TREE_TYPE (ni_name),
!                  fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_name),
!                               fold_build2 (MINUS_EXPR, TREE_TYPE (ni_name),
!                                            ni_minus_gap_name,
!                                            build_int_cst
!                                              (TREE_TYPE (ni_name), vf)),
!                               log_vf),
!                  build_int_cst (TREE_TYPE (ni_name), 1));
    if (!is_gimple_val (ratio_name))
      {
!       var = create_tmp_var (TREE_TYPE (ni_name), "bnd");
        gimple stmts = NULL;
        ratio_name = force_gimple_operand (ratio_name, &stmts, true, var);
        gsi_insert_seq_on_edge_immediate (pe, stmts);
*************** vect_generate_tmps_on_preheader (loop_ve
*** 5685,5691 ****
                                        ratio_name, log_vf);
        if (!is_gimple_val (ratio_mult_vf_name))
        {
!         var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
          gimple stmts = NULL;
          ratio_mult_vf_name = force_gimple_operand (ratio_mult_vf_name, &stmts,
                                                     true, var);
--- 5701,5707 ----
                                        ratio_name, log_vf);
        if (!is_gimple_val (ratio_mult_vf_name))
        {
!         var = create_tmp_var (TREE_TYPE (ni_name), "ratio_mult_vf");
          gimple stmts = NULL;
          ratio_mult_vf_name = force_gimple_operand (ratio_mult_vf_name, &stmts,
                                                     true, var);
Index: trunk/gcc/tree-vectorizer.h
===================================================================
*** trunk.orig/gcc/tree-vectorizer.h    2013-11-28 13:44:24.000000000 +0100
--- trunk/gcc/tree-vectorizer.h 2013-12-05 14:34:07.955901102 +0100
*************** typedef struct _loop_vec_info {
*** 250,257 ****
--- 250,260 ----
    /* The loop basic blocks.  */
    basic_block *bbs;
  
+   /* Number of latch executions.  */
+   tree num_itersm1;
    /* Number of iterations.  */
    tree num_iters;
+   /* Number of iterations of the original loop.  */
    tree num_iters_unchanged;
  
    /* Minimum number of iterations below which vectorization is expected to
*************** typedef struct _loop_vec_info {
*** 349,357 ****
  /* Access Functions.  */
  #define LOOP_VINFO_LOOP(L)                 (L)->loop
  #define LOOP_VINFO_BBS(L)                  (L)->bbs
  #define LOOP_VINFO_NITERS(L)               (L)->num_iters
! /* Since LOOP_VINFO_NITERS can change after prologue peeling
!    retain total unchanged scalar loop iterations for cost model.  */
  #define LOOP_VINFO_NITERS_UNCHANGED(L)     (L)->num_iters_unchanged
  #define LOOP_VINFO_COST_MODEL_MIN_ITERS(L) (L)->min_profitable_iters
  #define LOOP_VINFO_VECTORIZABLE_P(L)       (L)->vectorizable
--- 352,362 ----
  /* Access Functions.  */
  #define LOOP_VINFO_LOOP(L)                 (L)->loop
  #define LOOP_VINFO_BBS(L)                  (L)->bbs
+ #define LOOP_VINFO_NITERSM1(L)             (L)->num_itersm1
  #define LOOP_VINFO_NITERS(L)               (L)->num_iters
! /* Since LOOP_VINFO_NITERS and LOOP_VINFO_NITERSM1 can change after
!    prologue peeling retain total unchanged scalar loop iterations for
!    cost model.  */
  #define LOOP_VINFO_NITERS_UNCHANGED(L)     (L)->num_iters_unchanged
  #define LOOP_VINFO_COST_MODEL_MIN_ITERS(L) (L)->min_profitable_iters
  #define LOOP_VINFO_VECTORIZABLE_P(L)       (L)->vectorizable

Reply via email to