On Tue, Jan 10, 2012 at 2:43 PM, William J. Schmidt
<wschm...@linux.vnet.ibm.com> wrote:
> Greetings,
>
> This patch follows Richard Guenther's suggestion of 2011-07-05 in
> http://gcc.gnu.org/bugzilla/show_bug.cgi?id=49642 to fix the problem in
> gcc 4.6.  It prevents choosing a function split point that is dominated
> by a builtin call to __builtin_constant_p.
>
> The bug was marked fixed in 4.7 since the extra FRE pass allows the
> correct optimization to be done even in the presence of
> __builtin_constant_p.  However, 4.7 still fails in the presence of
> -fno-tree-fre.  I think we should probably include a variation of this
> patch in 4.7 that only kicks in when FRE has been disabled at the
> command line.  The test case would also be modified slightly to include
> -fno-tree-fre in the dg-compile statement.  Thoughts?

I think it should be unconditionally restrict splitting (I suppose on the
trunk the __builtin_constant_p is optimized away already).

Btw, this will also disqualify any point below

 if (__builtin_constant_p (...))
   {
     ...
   }

because after the if join all BBs are dominated by the __builtin_constant_p
call.  What we want to disallow is splitting at a block that is dominated
by the true edge of the condition fed by the __builtin_constant_p result ...

Honza?

> The 4.6 patch was bootstrapped and tests cleanly on powerpc64-linux-gnu.
> OK for 4.6 branch?
>
> Thanks,
> Bill
>
>
> gcc:
>
> 2012-01-10  Bill Schmidt  <wschm...@linux.vnet.ibm.com>
>
>        PR tree-optimization/49642
>        * ipa-split.c (forbidden_dominators): New variable.
>        (check_forbidden_calls): New function.
>        (dominated_by_forbidden): Likewise.
>        (consider_split): Check for forbidden calls.
>        (execute_split_functions): Initialize and free forbidden
>        dominators info; call check_forbidden_calls.
>
> gcc/testsuite:
>
> 2012-01-10  Bill Schmidt  <wschm...@linux.vnet.ibm.com>
>
>        PR tree-optimization/49642
>        * gcc.dg/tree-ssa/pr49642.c: New test.
>
>
> Index: gcc/testsuite/gcc.dg/tree-ssa/pr49642.c
> ===================================================================
> --- gcc/testsuite/gcc.dg/tree-ssa/pr49642.c     (revision 0)
> +++ gcc/testsuite/gcc.dg/tree-ssa/pr49642.c     (revision 0)
> @@ -0,0 +1,49 @@
> +/* Verify that ipa-split is disabled following __builtin_constant_p.  */
> +
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-optimized" } */
> +
> +typedef unsigned int u32;
> +typedef unsigned long long u64;
> +
> +static inline __attribute__((always_inline)) __attribute__((const))
> +int __ilog2_u32(u32 n)
> +{
> + int bit;
> + asm ("cntlzw %0,%1" : "=r" (bit) : "r" (n));
> + return 31 - bit;
> +}
> +
> +
> +static inline __attribute__((always_inline)) __attribute__((const))
> +int __ilog2_u64(u64 n)
> +{
> + int bit;
> + asm ("cntlzd %0,%1" : "=r" (bit) : "r" (n));
> + return 63 - bit;
> +}
> +
> +
> +
> +static u64 ehca_map_vaddr(void *caddr);
> +
> +struct ehca_shca {
> +        u32 hca_cap_mr_pgsize;
> +};
> +
> +static u64 ehca_get_max_hwpage_size(struct ehca_shca *shca)
> +{
> + return 1UL << ( __builtin_constant_p(shca->hca_cap_mr_pgsize) ? ( 
> (shca->hca_cap_mr_pgsize) < 1 ? ____ilog2_NaN() : (shca->hca_cap_mr_pgsize) & 
> (1ULL << 63) ? 63 : (shca->hca_cap_mr_pgsize) & (1ULL << 62) ? 62 : 
> (shca->hca_cap_mr_pgsize) & (1ULL << 61) ? 61 : (shca->hca_cap_mr_pgsize) & 
> (1ULL << 60) ? 60 : (shca->hca_cap_mr_pgsize) & (1ULL << 59) ? 59 : 
> (shca->hca_cap_mr_pgsize) & (1ULL << 58) ? 58 : (shca->hca_cap_mr_pgsize) & 
> (1ULL << 57) ? 57 : (shca->hca_cap_mr_pgsize) & (1ULL << 56) ? 56 : 
> (shca->hca_cap_mr_pgsize) & (1ULL << 55) ? 55 : (shca->hca_cap_mr_pgsize) & 
> (1ULL << 54) ? 54 : (shca->hca_cap_mr_pgsize) & (1ULL << 53) ? 53 : 
> (shca->hca_cap_mr_pgsize) & (1ULL << 52) ? 52 : (shca->hca_cap_mr_pgsize) & 
> (1ULL << 51) ? 51 : (shca->hca_cap_mr_pgsize) & (1ULL << 50) ? 50 : 
> (shca->hca_cap_mr_pgsize) & (1ULL << 49) ? 49 : (shca->hca_cap_mr_pgsize) & 
> (1ULL << 48) ? 48 : (shca->hca_cap_mr_pgsize) & (1ULL << 47) ? 47 : 
> (shca->hca_cap_mr_pgsize) & (1ULL << 46) ? 46 : (shca->hca_cap_mr_pgsize) & 
> (1ULL << 45) ? 45 : (shca->hca_cap_mr_pgsize) & (1ULL << 44) ? 44 : 
> (shca->hca_cap_mr_pgsize) & (1ULL << 43) ? 43 : (shca->hca_cap_mr_pgsize) & 
> (1ULL << 42) ? 42 : (shca->hca_cap_mr_pgsize) & (1ULL << 41) ? 41 : 
> (shca->hca_cap_mr_pgsize) & (1ULL << 40) ? 40 : (shca->hca_cap_mr_pgsize) & 
> (1ULL << 39) ? 39 : (shca->hca_cap_mr_pgsize) & (1ULL << 38) ? 38 : 
> (shca->hca_cap_mr_pgsize) & (1ULL << 37) ? 37 : (shca->hca_cap_mr_pgsize) & 
> (1ULL << 36) ? 36 : (shca->hca_cap_mr_pgsize) & (1ULL << 35) ? 35 : 
> (shca->hca_cap_mr_pgsize) & (1ULL << 34) ? 34 : (shca->hca_cap_mr_pgsize) & 
> (1ULL << 33) ? 33 : (shca->hca_cap_mr_pgsize) & (1ULL << 32) ? 32 : 
> (shca->hca_cap_mr_pgsize) & (1ULL << 31) ? 31 : (shca->hca_cap_mr_pgsize) & 
> (1ULL << 30) ? 30 : (shca->hca_cap_mr_pgsize) & (1ULL << 29) ? 29 : 
> (shca->hca_cap_mr_pgsize) & (1ULL << 28) ? 28 : (shca->hca_cap_mr_pgsize) & 
> (1ULL << 27) ? 27 : (shca->hca_cap_mr_pgsize) & (1ULL << 26) ? 26 : 
> (shca->hca_cap_mr_pgsize) & (1ULL << 25) ? 25 : (shca->hca_cap_mr_pgsize) & 
> (1ULL << 24) ? 24 : (shca->hca_cap_mr_pgsize) & (1ULL << 23) ? 23 : 
> (shca->hca_cap_mr_pgsize) & (1ULL << 22) ? 22 : (shca->hca_cap_mr_pgsize) & 
> (1ULL << 21) ? 21 : (shca->hca_cap_mr_pgsize) & (1ULL << 20) ? 20 : 
> (shca->hca_cap_mr_pgsize) & (1ULL << 19) ? 19 : (shca->hca_cap_mr_pgsize) & 
> (1ULL << 18) ? 18 : (shca->hca_cap_mr_pgsize) & (1ULL << 17) ? 17 : 
> (shca->hca_cap_mr_pgsize) & (1ULL << 16) ? 16 : (shca->hca_cap_mr_pgsize) & 
> (1ULL << 15) ? 15 : (shca->hca_cap_mr_pgsize) & (1ULL << 14) ? 14 : 
> (shca->hca_cap_mr_pgsize) & (1ULL << 13) ? 13 : (shca->hca_cap_mr_pgsize) & 
> (1ULL << 12) ? 12 : (shca->hca_cap_mr_pgsize) & (1ULL << 11) ? 11 : 
> (shca->hca_cap_mr_pgsize) & (1ULL << 10) ? 10 : (shca->hca_cap_mr_pgsize) & 
> (1ULL << 9) ? 9 : (shca->hca_cap_mr_pgsize) & (1ULL << 8) ? 8 : 
> (shca->hca_cap_mr_pgsize) & (1ULL << 7) ? 7 : (shca->hca_cap_mr_pgsize) & 
> (1ULL << 6) ? 6 : (shca->hca_cap_mr_pgsize) & (1ULL << 5) ? 5 : 
> (shca->hca_cap_mr_pgsize) & (1ULL << 4) ? 4 : (shca->hca_cap_mr_pgsize) & 
> (1ULL << 3) ? 3 : (shca->hca_cap_mr_pgsize) & (1ULL << 2) ? 2 : 
> (shca->hca_cap_mr_pgsize) & (1ULL << 1) ? 1 : (shca->hca_cap_mr_pgsize) & 
> (1ULL << 0) ? 0 : ____ilog2_NaN() ) : (sizeof(shca->hca_cap_mr_pgsize) <= 4) 
> ? __ilog2_u32(shca->hca_cap_mr_pgsize) : __ilog2_u64(shca->hca_cap_mr_pgsize) 
> );
> +}
> +
> +int x(struct ehca_shca *shca) {
> +        return ehca_get_max_hwpage_size(shca);
> +}
> +
> +int y(struct ehca_shca *shca)
> +{
> +        return ehca_get_max_hwpage_size(shca);
> +}
> +
> +/* { dg-final { scan-tree-dump-times "____ilog2_NaN" 0 "optimized" } } */
> +/* { dg-final { cleanup-tree-dump "optimized" } } */
> Index: gcc/ipa-split.c
> ===================================================================
> --- gcc/ipa-split.c     (revision 183033)
> +++ gcc/ipa-split.c     (working copy)
> @@ -130,6 +130,10 @@ struct split_point
>
>  struct split_point best_split_point;
>
> +/* Set of basic blocks that are not allowed to dominate a split point.  */
> +
> +bitmap forbidden_dominators;
> +
>  static tree find_retval (basic_block return_bb);
>
>  /* Callback for walk_stmt_load_store_addr_ops.  If T is non-SSA automatic
> @@ -270,6 +274,49 @@ done:
>   return ok;
>  }
>
> +/* If STMT is a call, check the callee against a list of forbidden
> +   functions.  If a match is found, set the bit for block BB in the
> +   forbidden dominators bitmap.  The purpose of this is to avoid
> +   selecting a split point where we are likely to lose the chance
> +   to optimize away an unused function call.  */
> +
> +static void
> +check_forbidden_calls (gimple stmt, basic_block bb)
> +{
> +  tree fndecl;
> +
> +  if (!is_gimple_call (stmt))
> +    return;
> +
> +  fndecl = gimple_call_fndecl (stmt);
> +
> +  if (fndecl
> +      && TREE_CODE (fndecl) == FUNCTION_DECL
> +      && DECL_BUILT_IN (fndecl)
> +      /* At the moment, __builtin_constant_p is the only forbidden
> +        function call (see PR49642).  */
> +      && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_CONSTANT_P)
> +    bitmap_set_bit (forbidden_dominators, bb->index);
> +}
> +
> +/* If BB is dominated by any block in the forbidden dominators set,
> +   return TRUE; else FALSE.  */
> +
> +static bool
> +dominated_by_forbidden (basic_block bb)
> +{
> +  unsigned dom_bb;
> +  bitmap_iterator bi;
> +
> +  EXECUTE_IF_SET_IN_BITMAP (forbidden_dominators, 1, dom_bb, bi)
> +    {
> +      if (dominated_by_p (CDI_DOMINATORS, bb, BASIC_BLOCK (dom_bb)))
> +       return true;
> +    }
> +
> +  return false;
> +}
> +
>  /* We found an split_point CURRENT.  NON_SSA_VARS is bitmap of all non ssa
>    variables used and RETURN_BB is return basic block.
>    See if we can split function here.  */
> @@ -411,6 +458,18 @@ consider_split (struct split_point *current, bitma
>                 "  Refused: split part has non-ssa uses\n");
>       return;
>     }
> +
> +  /* If the split point is dominated by a forbidden function call,
> +     reject the split.  */
> +  if (!bitmap_empty_p (forbidden_dominators)
> +      && dominated_by_forbidden (current->entry_bb))
> +    {
> +      if (dump_file && (dump_flags & TDF_DETAILS))
> +       fprintf (dump_file,
> +                "  Refused: split point dominated by forbidden call\n");
> +      return;
> +    }
> +
>   /* See if retval used by return bb is computed by header or split part.
>      When it is computed by split part, we need to produce return statement
>      in the split part and add code to header to pass it around.
> @@ -1329,6 +1388,10 @@ execute_split_functions (void)
>       return 0;
>     }
>
> +  /* Initialize bitmap to track forbidden calls.  */
> +  forbidden_dominators = BITMAP_ALLOC (NULL);
> +  calculate_dominance_info (CDI_DOMINATORS);
> +
>   /* Compute local info about basic blocks and determine function size/time.  
> */
>   VEC_safe_grow_cleared (bb_info, heap, bb_info_vec, last_basic_block + 1);
>   memset (&best_split_point, 0, sizeof (best_split_point));
> @@ -1350,6 +1413,7 @@ execute_split_functions (void)
>          this_time = estimate_num_insns (stmt, &eni_time_weights) * freq;
>          size += this_size;
>          time += this_time;
> +         check_forbidden_calls (stmt, bb);
>
>          if (dump_file && (dump_flags & TDF_DETAILS))
>            {
> @@ -1371,6 +1435,7 @@ execute_split_functions (void)
>       BITMAP_FREE (best_split_point.split_bbs);
>       todo = TODO_update_ssa | TODO_cleanup_cfg;
>     }
> +  BITMAP_FREE (forbidden_dominators);
>   VEC_free (bb_info, heap, bb_info_vec);
>   bb_info_vec = NULL;
>   return todo;
>
>
>

Reply via email to