On Fri, 24 Jun 2016, Jakub Jelinek wrote:

> On Thu, Jun 23, 2016 at 05:23:21PM +0200, Jakub Jelinek wrote:
> > Thinking about this again, there could be another option - keep
> > __atomic_compare_exchange_N in the IL, but under certain conditions (similar
> > to what the patch uses in fold_builtin_atomic_compare_exchange) for these
> > builtins ignore &var on the second argument, and if we actually turn var
> > into non-addressable, convert the builtin call similarly to what
> > fold_builtin_atomic_compare_exchange does in the patch (except the store
> > would be non-conditional then; the gimple-fold.c part wouldn't be needed
> > then).
> 
> Here is this second approach implemented.  Generally it gives slight code
> size savings over the other patch on the testcases I've posted (and even the
> other patch used to produce generally smaller code than vanilla).
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

Ok.

Thanks,
Richard.

> 2016-06-24  Jakub Jelinek  <ja...@redhat.com>
> 
>       PR middle-end/66867
>       * builtins.c (expand_ifn_atomic_compare_exchange_into_call,
>       expand_ifn_atomic_compare_exchange): New functions.
>       * internal-fn.c (expand_ATOMIC_COMPARE_EXCHANGE): New function.
>       * tree.h (build_call_expr_internal_loc): Rename to ...
>       (build_call_expr_internal_loc_array): ... this.  Fix up type of
>       last argument.
>       * internal-fn.def (ATOMIC_COMPARE_EXCHANGE): New internal fn.
>       * predict.c (expr_expected_value_1): Handle IMAGPART_EXPR of
>       ATOMIC_COMPARE_EXCHANGE result.
>       * builtins.h (expand_ifn_atomic_compare_exchange): New prototype.
>       * gimple-fold.h (optimize_atomic_compare_exchange_p,
>       fold_builtin_atomic_compare_exchange): New prototypes.
>       * gimple-fold.c (optimize_atomic_compare_exchange_p,
>       fold_builtin_atomic_compare_exchange): New functions..
>       * tree-ssa.c (execute_update_addresses_taken): If
>       optimize_atomic_compare_exchange_p, ignore &var in 2nd argument
>       of call when finding addressable vars, and if such var becomes
>       non-addressable, call fold_builtin_atomic_compare_exchange.
> 
> --- gcc/builtins.c.jj 2016-06-08 21:01:25.000000000 +0200
> +++ gcc/builtins.c    2016-06-24 11:18:03.839096035 +0200
> @@ -5158,6 +5158,123 @@ expand_builtin_atomic_compare_exchange (
>    return target;
>  }
>  
> +/* Helper function for expand_ifn_atomic_compare_exchange - expand
> +   internal ATOMIC_COMPARE_EXCHANGE call into __atomic_compare_exchange_N
> +   call.  The weak parameter must be dropped to match the expected parameter
> +   list and the expected argument changed from value to pointer to memory
> +   slot.  */
> +
> +static void
> +expand_ifn_atomic_compare_exchange_into_call (gcall *call, machine_mode mode)
> +{
> +  unsigned int z;
> +  vec<tree, va_gc> *vec;
> +
> +  vec_alloc (vec, 5);
> +  vec->quick_push (gimple_call_arg (call, 0));
> +  tree expected = gimple_call_arg (call, 1);
> +  rtx x = assign_stack_temp_for_type (mode, GET_MODE_SIZE (mode),
> +                                   TREE_TYPE (expected));
> +  rtx expd = expand_expr (expected, x, mode, EXPAND_NORMAL);
> +  if (expd != x)
> +    emit_move_insn (x, expd);
> +  tree v = make_tree (TREE_TYPE (expected), x);
> +  vec->quick_push (build1 (ADDR_EXPR,
> +                        build_pointer_type (TREE_TYPE (expected)), v));
> +  vec->quick_push (gimple_call_arg (call, 2));
> +  /* Skip the boolean weak parameter.  */
> +  for (z = 4; z < 6; z++)
> +    vec->quick_push (gimple_call_arg (call, z));
> +  built_in_function fncode
> +    = (built_in_function) ((int) BUILT_IN_ATOMIC_COMPARE_EXCHANGE_1
> +                        + exact_log2 (GET_MODE_SIZE (mode)));
> +  tree fndecl = builtin_decl_explicit (fncode);
> +  tree fn = build1 (ADDR_EXPR, build_pointer_type (TREE_TYPE (fndecl)),
> +                 fndecl);
> +  tree exp = build_call_vec (boolean_type_node, fn, vec);
> +  tree lhs = gimple_call_lhs (call);
> +  rtx boolret = expand_call (exp, NULL_RTX, lhs == NULL_TREE);
> +  if (lhs)
> +    {
> +      rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
> +      if (GET_MODE (boolret) != mode)
> +     boolret = convert_modes (mode, GET_MODE (boolret), boolret, 1);
> +      x = force_reg (mode, x);
> +      write_complex_part (target, boolret, true);
> +      write_complex_part (target, x, false);
> +    }
> +}
> +
> +/* Expand IFN_ATOMIC_COMPARE_EXCHANGE internal function.  */
> +
> +void
> +expand_ifn_atomic_compare_exchange (gcall *call)
> +{
> +  int size = tree_to_shwi (gimple_call_arg (call, 3)) & 255;
> +  gcc_assert (size == 1 || size == 2 || size == 4 || size == 8 || size == 
> 16);
> +  machine_mode mode = mode_for_size (BITS_PER_UNIT * size, MODE_INT, 0);
> +  rtx expect, desired, mem, oldval, boolret;
> +  enum memmodel success, failure;
> +  tree lhs;
> +  bool is_weak;
> +  source_location loc
> +    = expansion_point_location_if_in_system_header (gimple_location (call));
> +
> +  success = get_memmodel (gimple_call_arg (call, 4));
> +  failure = get_memmodel (gimple_call_arg (call, 5));
> +
> +  if (failure > success)
> +    {
> +      warning_at (loc, OPT_Winvalid_memory_model,
> +               "failure memory model cannot be stronger than success "
> +               "memory model for %<__atomic_compare_exchange%>");
> +      success = MEMMODEL_SEQ_CST;
> +    }
> +
> +  if (is_mm_release (failure) || is_mm_acq_rel (failure))
> +    {
> +      warning_at (loc, OPT_Winvalid_memory_model,
> +               "invalid failure memory model for "
> +               "%<__atomic_compare_exchange%>");
> +      failure = MEMMODEL_SEQ_CST;
> +      success = MEMMODEL_SEQ_CST;
> +    }
> +
> +  if (!flag_inline_atomics)
> +    {
> +      expand_ifn_atomic_compare_exchange_into_call (call, mode);
> +      return;
> +    }
> +
> +  /* Expand the operands.  */
> +  mem = get_builtin_sync_mem (gimple_call_arg (call, 0), mode);
> +
> +  expect = expand_expr_force_mode (gimple_call_arg (call, 1), mode);
> +  desired = expand_expr_force_mode (gimple_call_arg (call, 2), mode);
> +
> +  is_weak = (tree_to_shwi (gimple_call_arg (call, 3)) & 256) != 0;
> +
> +  boolret = NULL;
> +  oldval = NULL;
> +
> +  if (!expand_atomic_compare_and_swap (&boolret, &oldval, mem, expect, 
> desired,
> +                                    is_weak, success, failure))
> +    {
> +      expand_ifn_atomic_compare_exchange_into_call (call, mode);
> +      return;
> +    }
> +
> +  lhs = gimple_call_lhs (call);
> +  if (lhs)
> +    {
> +      rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
> +      if (GET_MODE (boolret) != mode)
> +     boolret = convert_modes (mode, GET_MODE (boolret), boolret, 1);
> +      write_complex_part (target, boolret, true);
> +      write_complex_part (target, oldval, false);
> +    }
> +}
> +
>  /* Expand the __atomic_load intrinsic:
>       TYPE __atomic_load (TYPE *object, enum memmodel)
>     EXP is the CALL_EXPR.
> --- gcc/internal-fn.c.jj      2016-06-15 19:09:09.000000000 +0200
> +++ gcc/internal-fn.c 2016-06-22 15:30:06.838951934 +0200
> @@ -2143,6 +2143,14 @@ expand_ATOMIC_BIT_TEST_AND_RESET (intern
>    expand_ifn_atomic_bit_test_and (call);
>  }
>  
> +/* Expand atomic bit test and set.  */
> +
> +static void
> +expand_ATOMIC_COMPARE_EXCHANGE (internal_fn, gcall *call)
> +{
> +  expand_ifn_atomic_compare_exchange (call);
> +}
> +
>  /* Expand a call to FN using the operands in STMT.  FN has a single
>     output operand and NARGS input operands.  */
>  
> --- gcc/tree.h.jj     2016-06-20 21:16:07.000000000 +0200
> +++ gcc/tree.h        2016-06-21 17:35:19.806362408 +0200
> @@ -3985,8 +3985,8 @@ extern tree build_call_expr_loc (locatio
>  extern tree build_call_expr (tree, int, ...);
>  extern tree build_call_expr_internal_loc (location_t, enum internal_fn,
>                                         tree, int, ...);
> -extern tree build_call_expr_internal_loc (location_t, enum internal_fn,
> -                                       tree, int, tree *);
> +extern tree build_call_expr_internal_loc_array (location_t, enum internal_fn,
> +                                             tree, int, const tree *);
>  extern tree maybe_build_call_expr_loc (location_t, combined_fn, tree,
>                                      int, ...);
>  extern tree build_string_literal (int, const char *);
> --- gcc/gimple-fold.h.jj      2016-01-08 07:31:08.000000000 +0100
> +++ gcc/gimple-fold.h 2016-06-24 11:19:34.002040898 +0200
> @@ -32,6 +32,8 @@ extern tree maybe_fold_and_comparisons (
>                                       enum tree_code, tree, tree);
>  extern tree maybe_fold_or_comparisons (enum tree_code, tree, tree,
>                                      enum tree_code, tree, tree);
> +extern bool optimize_atomic_compare_exchange_p (gimple *);
> +extern void fold_builtin_atomic_compare_exchange (gimple_stmt_iterator *);
>  extern bool arith_overflowed_p (enum tree_code, const_tree, const_tree,
>                               const_tree);
>  extern tree no_follow_ssa_edges (tree);
> --- gcc/internal-fn.def.jj    2016-05-03 13:36:50.000000000 +0200
> +++ gcc/internal-fn.def       2016-06-21 17:10:23.516879436 +0200
> @@ -193,6 +193,7 @@ DEF_INTERNAL_FN (SET_EDOM, ECF_LEAF | EC
>  DEF_INTERNAL_FN (ATOMIC_BIT_TEST_AND_SET, ECF_LEAF | ECF_NOTHROW, NULL)
>  DEF_INTERNAL_FN (ATOMIC_BIT_TEST_AND_COMPLEMENT, ECF_LEAF | ECF_NOTHROW, 
> NULL)
>  DEF_INTERNAL_FN (ATOMIC_BIT_TEST_AND_RESET, ECF_LEAF | ECF_NOTHROW, NULL)
> +DEF_INTERNAL_FN (ATOMIC_COMPARE_EXCHANGE, ECF_LEAF | ECF_NOTHROW, NULL)
>  
>  #undef DEF_INTERNAL_INT_FN
>  #undef DEF_INTERNAL_FLT_FN
> --- gcc/predict.c.jj  2016-06-22 11:17:44.763444374 +0200
> +++ gcc/predict.c     2016-06-22 14:26:08.894724088 +0200
> @@ -1978,6 +1978,25 @@ expr_expected_value_1 (tree type, tree o
>        if (TREE_CONSTANT (op0))
>       return op0;
>  
> +      if (code == IMAGPART_EXPR)
> +     {
> +       if (TREE_CODE (TREE_OPERAND (op0, 0)) == SSA_NAME)
> +         {
> +           def = SSA_NAME_DEF_STMT (TREE_OPERAND (op0, 0));
> +           if (is_gimple_call (def)
> +               && gimple_call_internal_p (def)
> +               && (gimple_call_internal_fn (def)
> +                   == IFN_ATOMIC_COMPARE_EXCHANGE))
> +             {
> +               /* Assume that any given atomic operation has low contention,
> +                  and thus the compare-and-swap operation succeeds.  */
> +               if (predictor)
> +                 *predictor = PRED_COMPARE_AND_SWAP;
> +               return build_one_cst (TREE_TYPE (op0));
> +             }
> +         }
> +     }
> +
>        if (code != SSA_NAME)
>       return NULL_TREE;
>  
> --- gcc/builtins.h.jj 2016-05-03 13:36:50.000000000 +0200
> +++ gcc/builtins.h    2016-06-21 18:05:11.678635858 +0200
> @@ -72,6 +72,7 @@ extern tree std_canonical_va_list_type (
>  extern void std_expand_builtin_va_start (tree, rtx);
>  extern void expand_builtin_trap (void);
>  extern void expand_ifn_atomic_bit_test_and (gcall *);
> +extern void expand_ifn_atomic_compare_exchange (gcall *);
>  extern rtx expand_builtin (tree, rtx, rtx, machine_mode, int);
>  extern rtx expand_builtin_with_bounds (tree, rtx, rtx, machine_mode, int);
>  extern enum built_in_function builtin_mathfn_code (const_tree);
> --- gcc/gimple-fold.c.jj      2016-06-16 21:00:08.000000000 +0200
> +++ gcc/gimple-fold.c 2016-06-24 11:36:12.222916831 +0200
> @@ -2953,6 +2953,133 @@ fold_internal_goacc_dim (const gimple *c
>    return result;
>  }
>  
> +/* Return true if stmt is __atomic_compare_exchange_N call which is suitable
> +   for conversion into ATOMIC_COMPARE_EXCHANGE if the second argument is
> +   &var where var is only addressable because of such calls.  */
> +
> +bool
> +optimize_atomic_compare_exchange_p (gimple *stmt)
> +{
> +  if (gimple_call_num_args (stmt) != 6
> +      || !flag_inline_atomics
> +      || !optimize
> +      || (flag_sanitize & (SANITIZE_THREAD | SANITIZE_ADDRESS)) != 0
> +      || !gimple_call_builtin_p (stmt, BUILT_IN_NORMAL)
> +      || !gimple_vdef (stmt)
> +      || !gimple_vuse (stmt))
> +    return false;
> +
> +  tree fndecl = gimple_call_fndecl (stmt);
> +  switch (DECL_FUNCTION_CODE (fndecl))
> +    {
> +    case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_1:
> +    case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_2:
> +    case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_4:
> +    case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_8:
> +    case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_16:
> +      break;
> +    default:
> +      return false;
> +    }
> +
> +  tree expected = gimple_call_arg (stmt, 1);
> +  if (TREE_CODE (expected) != ADDR_EXPR
> +      || !SSA_VAR_P (TREE_OPERAND (expected, 0))
> +      || !is_gimple_reg_type (TREE_TYPE (TREE_OPERAND (expected, 0)))
> +      || !auto_var_in_fn_p (TREE_OPERAND (expected, 0), 
> current_function_decl)
> +      || TREE_THIS_VOLATILE (TREE_TYPE (TREE_OPERAND (expected, 0)))
> +      || TREE_CODE (TREE_TYPE (TREE_OPERAND (expected, 0))) == VECTOR_TYPE
> +      || TREE_CODE (TREE_TYPE (TREE_OPERAND (expected, 0))) == COMPLEX_TYPE)
> +    return false;
> +
> +  tree weak = gimple_call_arg (stmt, 3);
> +  if (!integer_zerop (weak) && !integer_onep (weak))
> +    return false;
> +
> +  tree parmt = TYPE_ARG_TYPES (TREE_TYPE (fndecl));
> +  tree itype = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (parmt)));
> +  machine_mode mode = TYPE_MODE (itype);
> +
> +  if (direct_optab_handler (atomic_compare_and_swap_optab, mode)
> +      == CODE_FOR_nothing
> +      && optab_handler (sync_compare_and_swap_optab, mode) == 
> CODE_FOR_nothing)
> +    return false;
> +
> +  if (int_size_in_bytes (TREE_TYPE (TREE_OPERAND (expected, 0)))
> +      != GET_MODE_SIZE (mode))
> +    return false;
> +
> +  return true;
> +}
> +
> +/* Fold
> +     r = __atomic_compare_exchange_N (p, &e, d, w, s, f);
> +   into
> +     _Complex uintN_t t = ATOMIC_COMPARE_EXCHANGE (p, e, d, w * 256 + N, s, 
> f);
> +     i = IMAGPART_EXPR <t>;
> +     r = (_Bool) i;
> +     e = REALPART_EXPR <t>;  */
> +
> +void
> +fold_builtin_atomic_compare_exchange (gimple_stmt_iterator *gsi)
> +{
> +  gimple *stmt = gsi_stmt (*gsi);
> +  tree fndecl = gimple_call_fndecl (stmt);
> +  tree parmt = TYPE_ARG_TYPES (TREE_TYPE (fndecl));
> +  tree itype = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (parmt)));
> +  tree ctype = build_complex_type (itype);
> +  tree expected = TREE_OPERAND (gimple_call_arg (stmt, 1), 0);
> +  gimple *g = gimple_build_assign (make_ssa_name (TREE_TYPE (expected)),
> +                                expected);
> +  gsi_insert_before (gsi, g, GSI_SAME_STMT);
> +  gimple_stmt_iterator gsiret = gsi_for_stmt (g);
> +  if (!useless_type_conversion_p (itype, TREE_TYPE (expected)))
> +    {
> +      g = gimple_build_assign (make_ssa_name (itype), VIEW_CONVERT_EXPR,
> +                            build1 (VIEW_CONVERT_EXPR, itype,
> +                                    gimple_assign_lhs (g)));
> +      gsi_insert_before (gsi, g, GSI_SAME_STMT);
> +    }
> +  int flag = (integer_onep (gimple_call_arg (stmt, 3)) ? 256 : 0)
> +          + int_size_in_bytes (itype);
> +  g = gimple_build_call_internal (IFN_ATOMIC_COMPARE_EXCHANGE, 6,
> +                               gimple_call_arg (stmt, 0),
> +                               gimple_assign_lhs (g),
> +                               gimple_call_arg (stmt, 2),
> +                               build_int_cst (integer_type_node, flag),
> +                               gimple_call_arg (stmt, 4),
> +                               gimple_call_arg (stmt, 5));
> +  tree lhs = make_ssa_name (ctype);
> +  gimple_call_set_lhs (g, lhs);
> +  gimple_set_vdef (g, gimple_vdef (stmt));
> +  gimple_set_vuse (g, gimple_vuse (stmt));
> +  SSA_NAME_DEF_STMT (gimple_vdef (g)) = g;
> +  if (gimple_call_lhs (stmt))
> +    {
> +      gsi_insert_before (gsi, g, GSI_SAME_STMT);
> +      g = gimple_build_assign (make_ssa_name (itype), IMAGPART_EXPR,
> +                            build1 (IMAGPART_EXPR, itype, lhs));
> +      gsi_insert_before (gsi, g, GSI_SAME_STMT);
> +      g = gimple_build_assign (gimple_call_lhs (stmt), NOP_EXPR,
> +                            gimple_assign_lhs (g));
> +    }
> +  gsi_replace (gsi, g, true);
> +  g = gimple_build_assign (make_ssa_name (itype), REALPART_EXPR,
> +                        build1 (REALPART_EXPR, itype, lhs));
> +  gsi_insert_after (gsi, g, GSI_NEW_STMT);
> +  if (!useless_type_conversion_p (TREE_TYPE (expected), itype))
> +    {
> +      g = gimple_build_assign (make_ssa_name (TREE_TYPE (expected)),
> +                            VIEW_CONVERT_EXPR,
> +                            build1 (VIEW_CONVERT_EXPR, TREE_TYPE (expected),
> +                                    gimple_assign_lhs (g)));
> +      gsi_insert_after (gsi, g, GSI_NEW_STMT);
> +    }
> +  g = gimple_build_assign (expected, SSA_NAME, gimple_assign_lhs (g));
> +  gsi_insert_after (gsi, g, GSI_NEW_STMT);
> +  *gsi = gsiret;
> +}
> +
>  /* Return true if ARG0 CODE ARG1 in infinite signed precision operation
>     doesn't fit into TYPE.  The test for overflow should be regardless of
>     -fwrapv, and even for unsigned types.  */
> --- gcc/tree-ssa.c.jj 2016-06-14 16:29:49.000000000 +0200
> +++ gcc/tree-ssa.c    2016-06-24 10:38:54.929029421 +0200
> @@ -1414,8 +1414,21 @@ execute_update_addresses_taken (void)
>         enum gimple_code code = gimple_code (stmt);
>         tree decl;
>  
> -       /* Note all addresses taken by the stmt.  */
> -       gimple_ior_addresses_taken (addresses_taken, stmt);
> +       if (code == GIMPLE_CALL
> +           && optimize_atomic_compare_exchange_p (stmt))
> +         {
> +           /* For __atomic_compare_exchange_N if the second argument
> +              is &var, don't mark var addressable;
> +              if it becomes non-addressable, we'll rewrite it into
> +              ATOMIC_COMPARE_EXCHANGE call.  */
> +           tree arg = gimple_call_arg (stmt, 1);
> +           gimple_call_set_arg (stmt, 1, null_pointer_node);
> +           gimple_ior_addresses_taken (addresses_taken, stmt);
> +           gimple_call_set_arg (stmt, 1, arg);
> +         }
> +       else
> +         /* Note all addresses taken by the stmt.  */
> +         gimple_ior_addresses_taken (addresses_taken, stmt);
>  
>         /* If we have a call or an assignment, see if the lhs contains
>            a local decl that requires not to be a gimple register.  */
> @@ -1657,6 +1670,16 @@ execute_update_addresses_taken (void)
>           else if (gimple_code (stmt) == GIMPLE_CALL)
>             {
>               unsigned i;
> +             if (optimize_atomic_compare_exchange_p (stmt))
> +               {
> +                 tree expected = gimple_call_arg (stmt, 1);
> +                 if (bitmap_bit_p (suitable_for_renaming,
> +                                   DECL_UID (TREE_OPERAND (expected, 0))))
> +                   {
> +                     fold_builtin_atomic_compare_exchange (&gsi);
> +                     continue;
> +                   }
> +               }
>               for (i = 0; i < gimple_call_num_args (stmt); ++i)
>                 {
>                   tree *argp = gimple_call_arg_ptr (stmt, i);
> 
> 
>       Jakub
> 
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 
21284 (AG Nuernberg)

Reply via email to