Re: Loop stride optimization hint

Martin Jambor Wed, 12 Sep 2012 14:58:03 -0700

Hi,

On Wed, Sep 12, 2012 at 07:57:16PM +0200, Jan Hubicka wrote:
> Hi,
> for Fortran one of common reason to inline is because array descriptor is 
> known and defines
> loop stride.  This patch makes ipa-inline-analysis to notice these cases.
> 
> Bootstrapped/regtested x86_64-linux, will commit it tomorrow if there are no 
> complains.
> 
> Honza
> 
>       * ipa-inline-analysis.c (dump_inline_hints): Dump loop stride.
>       (set_hint_predicate): New function.
>       (reset_inline_summary): Reset loop stride.
>       (remap_predicate_after_duplication): New function.
>       (remap_hint_predicate_after_duplication): New function.
>       (inline_node_duplication_hook): Update.
>       (dump_inline_summary): Dump stride summaries.
>       (estimate_function_body_sizes): Compute strides.
>       (remap_hint_predicate): New function.
>       (inline_merge_summary): Use it.
>       (inline_read_section): Read stride.
>       (inline_write_summary): Write stride.
>       * ipa-inline.c (want_inline_small_function_p): Handle strides.
>       (edge_badness): Likewise.
>       * ipa-inline.h (inline_hints_vals): Add stride hint.
>       (inline_summary): Update stride.
> 
>       * gcc.dg/ipa/inlinehint-2.c: New testcase.


...

> *************** estimate_function_body_sizes (struct cgr
> *** 2390,2395 ****
> --- 2442,2448 ----
>         struct loop *loop;
>         loop_iterator li;
>         predicate loop_iterations = true_predicate ();
> +       predicate loop_stride = true_predicate ();
>   
>         if (dump_file && (dump_flags & TDF_DETAILS))
>       flow_loops_dump (dump_file, NULL, 0);
> *************** estimate_function_body_sizes (struct cgr
> *** 2398,2405 ****
>       {
>             VEC (edge, heap) *exits;
>             edge ex;
> !       unsigned int j;
>         struct tree_niter_desc niter_desc;
>   
>         exits = get_loop_exit_edges (loop);
>             FOR_EACH_VEC_ELT (edge, exits, j, ex)
> --- 2451,2459 ----
>       {
>             VEC (edge, heap) *exits;
>             edge ex;
> !       unsigned int j, i;
>         struct tree_niter_desc niter_desc;
> +       basic_block *body = get_loop_body (loop);
>   
>         exits = get_loop_exit_edges (loop);
>             FOR_EACH_VEC_ELT (edge, exits, j, ex)
> *************** estimate_function_body_sizes (struct cgr
> *** 2416,2427 ****
>                 loop_iterations = and_predicates (info->conds, 
> &loop_iterations, &will_be_nonconstant);
>             }
>             VEC_free (edge, heap, exits);
>       }
> !       if (!true_predicate_p (&loop_iterations))
> !     {
> !           inline_summary (node)->loop_iterations = (struct predicate 
> *)pool_alloc (edge_predicate_pool);
> !           *inline_summary (node)->loop_iterations = loop_iterations;
> !     }
>         scev_finalize ();
>       }
>     inline_summary (node)->self_time = time;
> --- 2470,2508 ----
>                 loop_iterations = and_predicates (info->conds, 
> &loop_iterations, &will_be_nonconstant);
>             }
>             VEC_free (edge, heap, exits);
> + 
> +           for (i = 0; i < loop->num_nodes; i++)
> +         {
> +           gimple_stmt_iterator gsi;
> +           for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next 
> (&gsi))
> +             {
> +               gimple stmt = gsi_stmt (gsi);
> +               affine_iv iv;
> +               ssa_op_iter iter;
> +               tree use;
> + 
> +               FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
> +                 {
> +                   predicate will_be_nonconstant;
> + 
> +                   if (!simple_iv (loop, loop_containing_stmt (stmt), use, 
> &iv, true)
> +                       || is_gimple_min_invariant (iv.step))
> +                     continue;
> +                   will_be_nonconstant
> +                    = will_be_nonconstant_expr_predicate (parms_info, info,
> +                                                          iv.step, 
> nonconstant_names);
> +                   if (!true_predicate_p (&will_be_nonconstant)
> +                       && !false_predicate_p (&will_be_nonconstant))
> +                     /* This is slightly inprecise.  We may want to 
> represent each loop with
> +                        independent predicate.  */
> +                     loop_stride = and_predicates (info->conds, 
> &loop_stride, &will_be_nonconstant);
> +                 }
> +             }
> +         }
> +       free (body);
>       }
> !       set_hint_predicate (&inline_summary (node)->loop_iterations, 
> loop_iterations);
> !       set_hint_predicate (&inline_summary (node)->loop_stride, loop_stride);
>         scev_finalize ();
>       }
>     inline_summary (node)->self_time = time;

Well, I know i's not that important but some lines are clearly wider
than 80 characters and makes it more difficult to read for people like
me.  In fact, the already committed loop_iterations computation also
often exceeds the limit.

I have also been wondering whether we really want to and all the
different will_be_nonconstant predicates to produce the final hint
predicates.  IIUC we won't get the hint unless all loop
iterations/strides are known.  The loop iterations predicate in the
fairly simple pr48636.f90 is:

  loop iterations:(op0[ref offset: 192] changed || op0[ref offset: 256] changed 
|| op0[ref offset: 224] changed) && (op0[ref offset: 96] changed || op0[ref 
offset: 160] changed || op0[ref offset: 128] changed)

With predicates like these, it's going to be very difficult for IPA-CP
to put together a combination of known values and aggregate contents
for just some contexts so that we hit the sweet spot.  The current
approach of trying one known context-specific value at a time
certainly will not work in real cases, at the same time, it is really
built around independent assessments of the values...

Thanks,

Martin


> *************** estimate_node_size_and_time (struct cgra
> *** 2715,2720 ****
> --- 2796,2804 ----
>     if (info->loop_iterations
>         && !evaluate_predicate (info->loop_iterations, possible_truths))
>       hints |=INLINE_HINT_loop_iterations;
> +   if (info->loop_stride
> +       && !evaluate_predicate (info->loop_stride, possible_truths))
> +     hints |=INLINE_HINT_loop_stride;
>   
>     if (time > MAX_TIME * INLINE_TIME_SCALE)
>       time = MAX_TIME * INLINE_TIME_SCALE;
> *************** remap_edge_summaries  (struct cgraph_edg
> *** 3011,3016 ****
> --- 3095,3131 ----
>       }
>   }
>   
> + /* Same as remap_predicate, but set result into hint *HINT.  */
> + 
> + static void
> + remap_hint_predicate (struct inline_summary *info,
> +                   struct inline_summary *callee_info,
> +                   struct predicate **hint,
> +                   VEC (int, heap) *operand_map,
> +                   VEC (int, heap) *offset_map,
> +                   clause_t possible_truths,
> +                   struct predicate *toplev_predicate)
> + {
> +   predicate p;
> + 
> +   if (!*hint)
> +     return;
> +   p = remap_predicate (info, callee_info,
> +                    *hint,
> +                    operand_map, offset_map,
> +                    possible_truths,
> +                    toplev_predicate);
> +   if (!false_predicate_p (&p)
> +       && !true_predicate_p (&p))
> +     {
> +       if (!*hint)
> +     set_hint_predicate (hint, p);
> +       else
> +     **hint = and_predicates (info->conds, 
> +                              *hint,
> +                              &p);
> +     }
> + }
>   
>   /* We inlined EDGE.  Update summary of the function we inlined into.  */
>   
> *************** inline_merge_summary (struct cgraph_edge
> *** 3102,3129 ****
>       }
>     remap_edge_summaries (edge, edge->callee, info, callee_info, operand_map,
>                       offset_map, clause, &toplev_predicate);
> !   if (callee_info->loop_iterations)
> !     {
> !       predicate p = remap_predicate (info, callee_info,
> !                                  callee_info->loop_iterations,
> !                                  operand_map, offset_map,
> !                                  clause,
> !                                  &toplev_predicate);
> !       if (!false_predicate_p (&p)
> !       && !true_predicate_p (&p))
> !     {
> !       if (!info->loop_iterations)
> !         {
> !           info->loop_iterations
> !              = (struct predicate *)pool_alloc (edge_predicate_pool);
> !           *info->loop_iterations = p;
> !         }
> !       else
> !         *info->loop_iterations = and_predicates (info->conds, 
> !                                                  info->loop_iterations,
> !                                                  &p);
> !     }
> !     }
>   
>     inline_update_callee_summaries (edge->callee,
>                                 inline_edge_summary (edge)->loop_depth);
> --- 3217,3230 ----
>       }
>     remap_edge_summaries (edge, edge->callee, info, callee_info, operand_map,
>                       offset_map, clause, &toplev_predicate);
> !   remap_hint_predicate (info, callee_info,
> !                     &callee_info->loop_iterations,
> !                     operand_map, offset_map,
> !                     clause, &toplev_predicate);
> !   remap_hint_predicate (info, callee_info,
> !                     &callee_info->loop_stride,
> !                     operand_map, offset_map,
> !                     clause, &toplev_predicate);
>   
>     inline_update_callee_summaries (edge->callee,
>                                 inline_edge_summary (edge)->loop_depth);
> *************** inline_read_section (struct lto_file_dec
> *** 3595,3605 ****
>       }
>        
>         p = read_predicate (&ib);
> !       if (!true_predicate_p (&p))
> !     {
> !       info->loop_iterations = (struct predicate *)pool_alloc 
> (edge_predicate_pool);
> !       *info->loop_iterations = p;
> !     }
>         for (e = node->callees; e; e = e->next_callee)
>       read_inline_edge_summary (&ib, e);
>         for (e = node->indirect_calls; e; e = e->next_callee)
> --- 3696,3704 ----
>       }
>        
>         p = read_predicate (&ib);
> !       set_hint_predicate (&info->loop_iterations, p);
> !       p = read_predicate (&ib);
> !       set_hint_predicate (&info->loop_stride, p);
>         for (e = node->callees; e; e = e->next_callee)
>       read_inline_edge_summary (&ib, e);
>         for (e = node->indirect_calls; e; e = e->next_callee)
> *************** inline_write_summary (void)
> *** 3747,3752 ****
> --- 3846,3852 ----
>             write_predicate (ob, &e->predicate);
>           }
>         write_predicate (ob, info->loop_iterations);
> +       write_predicate (ob, info->loop_stride);
>         for (edge = node->callees; edge; edge = edge->next_callee)
>           write_inline_edge_summary (ob, edge);
>         for (edge = node->indirect_calls; edge; edge = edge->next_callee)
> Index: ipa-inline.c
> ===================================================================
> *** ipa-inline.c      (revision 191228)
> --- ipa-inline.c      (working copy)
> *************** want_inline_small_function_p (struct cgr
> *** 481,487 ****
>         else if (DECL_DECLARED_INLINE_P (callee->symbol.decl)
>              && growth >= MAX_INLINE_INSNS_SINGLE
>              && !(hints & (INLINE_HINT_indirect_call
> !                          | INLINE_HINT_loop_iterations)))
>       {
>             e->inline_failed = CIF_MAX_INLINE_INSNS_SINGLE_LIMIT;
>         want_inline = false;
> --- 481,488 ----
>         else if (DECL_DECLARED_INLINE_P (callee->symbol.decl)
>              && growth >= MAX_INLINE_INSNS_SINGLE
>              && !(hints & (INLINE_HINT_indirect_call
> !                          | INLINE_HINT_loop_iterations
> !                          | INLINE_HINT_loop_stride)))
>       {
>             e->inline_failed = CIF_MAX_INLINE_INSNS_SINGLE_LIMIT;
>         want_inline = false;
> *************** want_inline_small_function_p (struct cgr
> *** 533,539 ****
>        inlining given function is very profitable.  */
>         else if (!DECL_DECLARED_INLINE_P (callee->symbol.decl)
>              && growth >= ((hints & (INLINE_HINT_indirect_call
> !                                    | INLINE_HINT_loop_iterations))
>                            ? MAX (MAX_INLINE_INSNS_AUTO,
>                                   MAX_INLINE_INSNS_SINGLE)
>                            : MAX_INLINE_INSNS_AUTO))
> --- 534,541 ----
>        inlining given function is very profitable.  */
>         else if (!DECL_DECLARED_INLINE_P (callee->symbol.decl)
>              && growth >= ((hints & (INLINE_HINT_indirect_call
> !                                    | INLINE_HINT_loop_iterations
> !                                    | INLINE_HINT_loop_stride))
>                            ? MAX (MAX_INLINE_INSNS_AUTO,
>                                   MAX_INLINE_INSNS_SINGLE)
>                            : MAX_INLINE_INSNS_AUTO))
> *************** edge_badness (struct cgraph_edge *edge,
> *** 866,872 ****
>           fprintf (dump_file, "Badness overflow\n");
>       }
>         if (hints & (INLINE_HINT_indirect_call
> !                | INLINE_HINT_loop_iterations))
>       badness /= 8;
>         if (dump)
>       {
> --- 868,875 ----
>           fprintf (dump_file, "Badness overflow\n");
>       }
>         if (hints & (INLINE_HINT_indirect_call
> !                | INLINE_HINT_loop_iterations
> !                | INLINE_HINT_loop_stride))
>       badness /= 8;
>         if (dump)
>       {
> Index: ipa-inline.h
> ===================================================================
> *** ipa-inline.h      (revision 191228)
> --- ipa-inline.h      (working copy)
> *************** typedef struct GTY(()) condition
> *** 46,52 ****
>      They are represtented as bitmap of the following values.  */
>   enum inline_hints_vals {
>     INLINE_HINT_indirect_call = 1,
> !   INLINE_HINT_loop_iterations = 2
>   };
>   typedef int inline_hints;
>   
> --- 46,53 ----
>      They are represtented as bitmap of the following values.  */
>   enum inline_hints_vals {
>     INLINE_HINT_indirect_call = 1,
> !   INLINE_HINT_loop_iterations = 2,
> !   INLINE_HINT_loop_stride = 4
>   };
>   typedef int inline_hints;
>   
> *************** struct GTY(()) inline_summary
> *** 120,128 ****
>     conditions conds;
>     VEC(size_time_entry,gc) *entry;
>   
> !   /* Predicate on when some loop in the function sbecomes to have known
>        bounds.   */
>     struct predicate * GTY((skip)) loop_iterations;
>   };
>   
>   
> --- 121,132 ----
>     conditions conds;
>     VEC(size_time_entry,gc) *entry;
>   
> !   /* Predicate on when some loop in the function becomes to have known
>        bounds.   */
>     struct predicate * GTY((skip)) loop_iterations;
> +   /* Predicate on when some loop in the function becomes to have known
> +      stride.   */
> +   struct predicate * GTY((skip)) loop_stride;
>   };
>   
>   
> Index: testsuite/gcc.dg/ipa/inlinehint-2.c
> ===================================================================
> *** testsuite/gcc.dg/ipa/inlinehint-2.c       (revision 0)
> --- testsuite/gcc.dg/ipa/inlinehint-2.c       (working copy)
> ***************
> *** 0 ****
> --- 1,13 ----
> + /* { dg-options "-O3 -c -fdump-ipa-inline-details -fno-early-inlining 
> -fno-ipa-cp"  } */
> + t(int s, void **p)
> + {
> +   int i;
> +   for (i;i<10000;i+=s)
> +     p[i]=0;
> + }
> + m(void **p)
> + {
> +   t (10);
> + }
> + /* { dg-final { scan-ipa-dump "loop_stride"  "inline"  } } */
> + /* { dg-final { cleanup-ipa-dump "inline" } } */

Re: Loop stride optimization hint

Reply via email to