On Fri, 20 Mar 2020, Martin Jambor wrote:

> Hi,
> 
> PR 93435 is a perfect SRA bomb.  It initializes an array of 16 chars
> element-wise, then uses that to initialize an aggregate that consists
> of four such arrays, that one to initialize one four times as big as
> the previous one all the way to an aggregate that has 64kb.
> 
> This causes the sub-access propagation across assignments to create
> thousands of byte-sized artificial accesses which are then eligible to
> be replaced - they do facilitate forward propagation but there is
> enough of them for DSE to never finish.
> 
> This patch avoids that situation by accounting how many of such
> replacements can be created per SRA candidate.  The default value of
> 32 was just the largest power of two that did not slow down
> compilation of the testcase, but it should also hopefully be big
> enough for any reasonable input that might rely on the optimization.
> 
> Bootstrapped and tested on x86_64, OK for trunk?  I'll prepare
> backport patches as a follow-up.

OK.

Thanks,
Richard.

> Thanks,
> 
> Martin
> 
> 2020-03-19  Martin Jambor  <mjam...@suse.cz>
> 
>       PR tree-optimization/93435
>       * params.opt (sra-max-propagations): New parameter.
>       * tree-sra.c (propagation_budget): New variable.
>       (budget_for_propagation_access): New function.
>       (propagate_subaccesses_from_rhs): Use it.
>       (propagate_subaccesses_from_lhs): Likewise.
>       (propagate_all_subaccesses): Set up and destroy propagation_budget.
> 
>       testsuite/
>       * gcc.dg/tree-ssa/pr93435.c: New test.
> ---
>  gcc/ChangeLog                           |  10 ++
>  gcc/params.opt                          |   4 +
>  gcc/testsuite/ChangeLog                 |   5 +
>  gcc/testsuite/gcc.dg/tree-ssa/pr93435.c | 159 ++++++++++++++++++++++++
>  gcc/tree-sra.c                          |  37 +++++-
>  5 files changed, 213 insertions(+), 2 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr93435.c
> 
> diff --git a/gcc/ChangeLog b/gcc/ChangeLog
> index 81582dd4f8c..25392dcc0bf 100644
> --- a/gcc/ChangeLog
> +++ b/gcc/ChangeLog
> @@ -1,3 +1,13 @@
> +2020-03-19  Martin Jambor  <mjam...@suse.cz>
> +
> +     PR tree-optimization/93435
> +     * params.opt (sra-max-propagations): New parameter.
> +     * tree-sra.c (propagation_budget): New variable.
> +     (budget_for_propagation_access): New function.
> +     (propagate_subaccesses_from_rhs): Use it.
> +     (propagate_subaccesses_from_lhs): Likewise.
> +     (propagate_all_subaccesses): Set up and destroy propagation_budget.
> +
>  2020-03-16  Jakub Jelinek  <ja...@redhat.com>
>  
>       PR debug/94167
> diff --git a/gcc/params.opt b/gcc/params.opt
> index e39216aa7d0..1e484c3e0f5 100644
> --- a/gcc/params.opt
> +++ b/gcc/params.opt
> @@ -844,6 +844,10 @@ Maximum size, in storage units, of an aggregate which 
> should be considered for s
>  Common Joined UInteger Var(param_sra_max_scalarization_size_speed) Param 
> Optimization
>  Maximum size, in storage units, of an aggregate which should be considered 
> for scalarization when compiling for speed.
>  
> +-param=sra-max-propagations=
> +Common Joined UInteger Var(param_sra_max_propagations) Param Optimization 
> Init(32)
> +Maximum number of artificial accesses to enable forward propagation that 
> Scalar Replacement of Aggregates will keep for one local variable.
> +
>  -param=ssa-name-def-chain-limit=
>  Common Joined UInteger Var(param_ssa_name_def_chain_limit) Init(512) Param 
> Optimization
>  The maximum number of SSA_NAME assignments to follow in determining a value.
> diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
> index 0f87a04d100..a4a3dc06bbf 100644
> --- a/gcc/testsuite/ChangeLog
> +++ b/gcc/testsuite/ChangeLog
> @@ -1,3 +1,8 @@
> +2020-03-19  Martin Jambor  <mjam...@suse.cz>
> +
> +     PR tree-optimization/93435
> +     * gcc.dg/tree-ssa/pr93435.c: New test.
> +
>  2020-03-16  Iain Buclaw  <ibuc...@gdcproject.org>
>  
>       * gdc.dg/asm1.d: Add new test for ICE in asm parser.
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr93435.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/pr93435.c
> new file mode 100644
> index 00000000000..cb8e7495b15
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr93435.c
> @@ -0,0 +1,159 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +typedef signed char int8_T;
> +typedef int int32_T;
> +
> +typedef struct {
> +  int8_T a;
> +} struct0_T;
> +
> +typedef struct {
> +  struct0_T f10[4];
> +} struct_T;
> +
> +typedef struct {
> +  struct_T f9[4];
> +} b_struct_T;
> +
> +typedef struct {
> +  b_struct_T f8[4];
> +} c_struct_T;
> +
> +typedef struct {
> +  c_struct_T f7[4];
> +} d_struct_T;
> +
> +typedef struct {
> +  d_struct_T f6[4];
> +} e_struct_T;
> +
> +typedef struct {
> +  e_struct_T f5[4];
> +} f_struct_T;
> +
> +typedef struct {
> +  f_struct_T f4[4];
> +} g_struct_T;
> +
> +typedef struct {
> +  g_struct_T f3[4];
> +} h_struct_T;
> +
> +typedef struct {
> +  h_struct_T f2[4];
> +} i_struct_T;
> +
> +typedef struct {
> +  i_struct_T f1[4];
> +} j_struct_T;
> +
> +typedef struct {
> +  struct {
> +    j_struct_T ds21[4];
> +    i_struct_T ds20[4];
> +    i_struct_T r9;
> +  } f0;
> +} deep_struct_arraysStackData;
> +
> +/* Function Definitions */
> +void deep_struct_arrays(deep_struct_arraysStackData *SD,
> +  int8_T in1, int8_T inCount, int8_T *out1, int8_T *out2, struct0_T out3[4])
> +{
> +  struct0_T r;
> +  struct_T r1;
> +  b_struct_T r2;
> +  c_struct_T r3;
> +  d_struct_T r4;
> +  e_struct_T r5;
> +  f_struct_T r6;
> +  g_struct_T r7;
> +  h_struct_T r8;
> +  int32_T count;
> +  int32_T i;
> +
> +  /*  Check properties of input in1 */
> +  /*  Check properties of input inCount */
> +  /*  Copyright 2006 The MathWorks, Inc. */
> +  r.a = in1;
> +  r1.f10[0] = r;
> +  r1.f10[1] = r;
> +  r1.f10[2] = r;
> +  r1.f10[3] = r;
> +  r2.f9[0] = r1;
> +  r2.f9[1] = r1;
> +  r2.f9[2] = r1;
> +  r2.f9[3] = r1;
> +  r3.f8[0] = r2;
> +  r3.f8[1] = r2;
> +  r3.f8[2] = r2;
> +  r3.f8[3] = r2;
> +  r4.f7[0] = r3;
> +  r4.f7[1] = r3;
> +  r4.f7[2] = r3;
> +  r4.f7[3] = r3;
> +  r5.f6[0] = r4;
> +  r5.f6[1] = r4;
> +  r5.f6[2] = r4;
> +  r5.f6[3] = r4;
> +  r6.f5[0] = r5;
> +  r6.f5[1] = r5;
> +  r6.f5[2] = r5;
> +  r6.f5[3] = r5;
> +  r7.f4[0] = r6;
> +  r7.f4[1] = r6;
> +  r7.f4[2] = r6;
> +  r7.f4[3] = r6;
> +  r8.f3[0] = r7;
> +  r8.f3[1] = r7;
> +  r8.f3[2] = r7;
> +  r8.f3[3] = r7;
> +  SD->f0.r9.f2[0] = r8;
> +  SD->f0.r9.f2[1] = r8;
> +  SD->f0.r9.f2[2] = r8;
> +  SD->f0.r9.f2[3] = r8;
> +  SD->f0.ds20[0] = SD->f0.r9;
> +  SD->f0.ds20[3] = SD->f0.r9;
> +  count = 0;
> +  while (count < inCount) {
> +    i = in1 + SD->f0.ds20[0].f2[0].f3[0].f4[0].f5[0].f6[0].f7[0].f8[0].f9[0]
> +      .f10[0].a;
> +    if (i > 127) {
> +      i = 127;
> +    } else {
> +      if (i < -128) {
> +        i = -128;
> +      }
> +    }
> +
> +    SD->f0.ds20[0].f2[0].f3[0].f4[0].f5[0].f6[0].f7[0].f8[0].f9[0].f10[0].a =
> +      (int8_T)i;
> +    i = 
> SD->f0.ds20[3].f2[3].f3[3].f4[3].f5[3].f6[3].f7[3].f8[3].f9[3].f10[3].a
> +      + 3;
> +    if (i > 127) {
> +      i = 127;
> +    }
> +
> +    SD->f0.ds20[3].f2[3].f3[3].f4[3].f5[3].f6[3].f7[3].f8[3].f9[3].f10[3].a =
> +      (int8_T)i;
> +    count++;
> +  }
> +
> +  if (inCount > 10) {
> +    
> SD->f0.ds21[0].f1[1].f2[2].f3[3].f4[3].f5[3].f6[3].f7[3].f8[3].f9[3].f10[3].
> +      a = 14;
> +  } else {
> +    
> SD->f0.ds21[0].f1[1].f2[2].f3[3].f4[3].f5[3].f6[3].f7[3].f8[3].f9[3].f10[3].
> +      a = 16;
> +  }
> +
> +  *out1 = 
> SD->f0.ds20[0].f2[0].f3[0].f4[0].f5[0].f6[0].f7[0].f8[0].f9[0].f10[0].
> +    a;
> +  *out2 = 
> SD->f0.ds20[3].f2[3].f3[3].f4[3].f5[3].f6[3].f7[3].f8[3].f9[3].f10[3].
> +    a;
> +  out3[0] = r;
> +  out3[1] = r;
> +  out3[2] = r;
> +  out3[3] = 
> SD->f0.ds21[0].f1[1].f2[2].f3[3].f4[3].f5[3].f6[3].f7[3].f8[3].f9[3]
> +    .f10[3];
> +}
> diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c
> index 5561ea6f655..8918c3d6420 100644
> --- a/gcc/tree-sra.c
> +++ b/gcc/tree-sra.c
> @@ -285,6 +285,9 @@ static object_allocator<assign_link> assign_link_pool 
> ("SRA links");
>  /* Base (tree) -> Vector (vec<access_p> *) map.  */
>  static hash_map<tree, auto_vec<access_p> > *base_access_vec;
>  
> +/* Hash to limit creation of artificial accesses */
> +static hash_map<tree, unsigned> *propagation_budget;
> +
>  /* Candidate hash table helpers.  */
>  
>  struct uid_decl_hasher : nofree_ptr_hash <tree_node>
> @@ -2687,6 +2690,32 @@ subtree_mark_written_and_rhs_enqueue (struct access 
> *access)
>      subtree_mark_written_and_rhs_enqueue (child);
>  }
>  
> +/* If there is still budget to create a propagation access for DECL, return
> +   true and decrement the budget.  Otherwise return false.  */
> +
> +static bool
> +budget_for_propagation_access (tree decl)
> +{
> +  unsigned b, *p = propagation_budget->get (decl);
> +  if (p)
> +    b = *p;
> +  else
> +    b = param_sra_max_propagations;
> +
> +  if (b == 0)
> +    return false;
> +  b--;
> +
> +  if (b == 0 && dump_file && (dump_flags & TDF_DETAILS))
> +    {
> +      fprintf (dump_file, "The propagation budget of ");
> +      print_generic_expr (dump_file, decl);
> +      fprintf (dump_file, " (UID: %u) has been exhausted.\n", DECL_UID 
> (decl));
> +    }
> +  propagation_budget->put (decl, b);
> +  return true;
> +}
> +
>  /* Propagate subaccesses and grp_write flags of RACC across an assignment 
> link
>     to LACC.  Enqueue sub-accesses as necessary so that the write flag is
>     propagated transitively.  Return true if anything changed.  Additionally, 
> if
> @@ -2791,7 +2820,8 @@ propagate_subaccesses_from_rhs (struct access *lacc, 
> struct access *racc)
>         continue;
>       }
>  
> -      if (rchild->grp_unscalarizable_region)
> +      if (rchild->grp_unscalarizable_region
> +       || !budget_for_propagation_access (lacc->base))
>       {
>         if (rchild->grp_write && !lacc->grp_write)
>           {
> @@ -2851,7 +2881,8 @@ propagate_subaccesses_from_lhs (struct access *lacc, 
> struct access *racc)
>  
>        if (lchild->grp_unscalarizable_region
>         || child_would_conflict_in_acc (racc, norm_offset, lchild->size,
> -                                       &matching_acc))
> +                                       &matching_acc)
> +       || !budget_for_propagation_access (racc->base))
>       {
>         if (matching_acc
>             && propagate_subaccesses_from_lhs (lchild, matching_acc))
> @@ -2882,6 +2913,7 @@ propagate_subaccesses_from_lhs (struct access *lacc, 
> struct access *racc)
>  static void
>  propagate_all_subaccesses (void)
>  {
> +  propagation_budget = new hash_map<tree, unsigned>;
>    while (rhs_work_queue_head)
>      {
>        struct access *racc = pop_access_from_rhs_work_queue ();
> @@ -2945,6 +2977,7 @@ propagate_all_subaccesses (void)
>           add_access_to_lhs_work_queue (racc);
>       }
>      }
> +  delete propagation_budget;
>  }
>  
>  /* Return true if the forest beginning with ROOT does not contain
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)

Reply via email to