On Fri, 24 Jun 2016, Jakub Jelinek wrote: > On Thu, Jun 23, 2016 at 05:23:21PM +0200, Jakub Jelinek wrote: > > Thinking about this again, there could be another option - keep > > __atomic_compare_exchange_N in the IL, but under certain conditions (similar > > to what the patch uses in fold_builtin_atomic_compare_exchange) for these > > builtins ignore &var on the second argument, and if we actually turn var > > into non-addressable, convert the builtin call similarly to what > > fold_builtin_atomic_compare_exchange does in the patch (except the store > > would be non-conditional then; the gimple-fold.c part wouldn't be needed > > then). > > Here is this second approach implemented. Generally it gives slight code > size savings over the other patch on the testcases I've posted (and even the > other patch used to produce generally smaller code than vanilla). > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
Ok. Thanks, Richard. > 2016-06-24 Jakub Jelinek <ja...@redhat.com> > > PR middle-end/66867 > * builtins.c (expand_ifn_atomic_compare_exchange_into_call, > expand_ifn_atomic_compare_exchange): New functions. > * internal-fn.c (expand_ATOMIC_COMPARE_EXCHANGE): New function. > * tree.h (build_call_expr_internal_loc): Rename to ... > (build_call_expr_internal_loc_array): ... this. Fix up type of > last argument. > * internal-fn.def (ATOMIC_COMPARE_EXCHANGE): New internal fn. > * predict.c (expr_expected_value_1): Handle IMAGPART_EXPR of > ATOMIC_COMPARE_EXCHANGE result. > * builtins.h (expand_ifn_atomic_compare_exchange): New prototype. > * gimple-fold.h (optimize_atomic_compare_exchange_p, > fold_builtin_atomic_compare_exchange): New prototypes. > * gimple-fold.c (optimize_atomic_compare_exchange_p, > fold_builtin_atomic_compare_exchange): New functions.. > * tree-ssa.c (execute_update_addresses_taken): If > optimize_atomic_compare_exchange_p, ignore &var in 2nd argument > of call when finding addressable vars, and if such var becomes > non-addressable, call fold_builtin_atomic_compare_exchange. > > --- gcc/builtins.c.jj 2016-06-08 21:01:25.000000000 +0200 > +++ gcc/builtins.c 2016-06-24 11:18:03.839096035 +0200 > @@ -5158,6 +5158,123 @@ expand_builtin_atomic_compare_exchange ( > return target; > } > > +/* Helper function for expand_ifn_atomic_compare_exchange - expand > + internal ATOMIC_COMPARE_EXCHANGE call into __atomic_compare_exchange_N > + call. The weak parameter must be dropped to match the expected parameter > + list and the expected argument changed from value to pointer to memory > + slot. */ > + > +static void > +expand_ifn_atomic_compare_exchange_into_call (gcall *call, machine_mode mode) > +{ > + unsigned int z; > + vec<tree, va_gc> *vec; > + > + vec_alloc (vec, 5); > + vec->quick_push (gimple_call_arg (call, 0)); > + tree expected = gimple_call_arg (call, 1); > + rtx x = assign_stack_temp_for_type (mode, GET_MODE_SIZE (mode), > + TREE_TYPE (expected)); > + rtx expd = expand_expr (expected, x, mode, EXPAND_NORMAL); > + if (expd != x) > + emit_move_insn (x, expd); > + tree v = make_tree (TREE_TYPE (expected), x); > + vec->quick_push (build1 (ADDR_EXPR, > + build_pointer_type (TREE_TYPE (expected)), v)); > + vec->quick_push (gimple_call_arg (call, 2)); > + /* Skip the boolean weak parameter. */ > + for (z = 4; z < 6; z++) > + vec->quick_push (gimple_call_arg (call, z)); > + built_in_function fncode > + = (built_in_function) ((int) BUILT_IN_ATOMIC_COMPARE_EXCHANGE_1 > + + exact_log2 (GET_MODE_SIZE (mode))); > + tree fndecl = builtin_decl_explicit (fncode); > + tree fn = build1 (ADDR_EXPR, build_pointer_type (TREE_TYPE (fndecl)), > + fndecl); > + tree exp = build_call_vec (boolean_type_node, fn, vec); > + tree lhs = gimple_call_lhs (call); > + rtx boolret = expand_call (exp, NULL_RTX, lhs == NULL_TREE); > + if (lhs) > + { > + rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE); > + if (GET_MODE (boolret) != mode) > + boolret = convert_modes (mode, GET_MODE (boolret), boolret, 1); > + x = force_reg (mode, x); > + write_complex_part (target, boolret, true); > + write_complex_part (target, x, false); > + } > +} > + > +/* Expand IFN_ATOMIC_COMPARE_EXCHANGE internal function. */ > + > +void > +expand_ifn_atomic_compare_exchange (gcall *call) > +{ > + int size = tree_to_shwi (gimple_call_arg (call, 3)) & 255; > + gcc_assert (size == 1 || size == 2 || size == 4 || size == 8 || size == > 16); > + machine_mode mode = mode_for_size (BITS_PER_UNIT * size, MODE_INT, 0); > + rtx expect, desired, mem, oldval, boolret; > + enum memmodel success, failure; > + tree lhs; > + bool is_weak; > + source_location loc > + = expansion_point_location_if_in_system_header (gimple_location (call)); > + > + success = get_memmodel (gimple_call_arg (call, 4)); > + failure = get_memmodel (gimple_call_arg (call, 5)); > + > + if (failure > success) > + { > + warning_at (loc, OPT_Winvalid_memory_model, > + "failure memory model cannot be stronger than success " > + "memory model for %<__atomic_compare_exchange%>"); > + success = MEMMODEL_SEQ_CST; > + } > + > + if (is_mm_release (failure) || is_mm_acq_rel (failure)) > + { > + warning_at (loc, OPT_Winvalid_memory_model, > + "invalid failure memory model for " > + "%<__atomic_compare_exchange%>"); > + failure = MEMMODEL_SEQ_CST; > + success = MEMMODEL_SEQ_CST; > + } > + > + if (!flag_inline_atomics) > + { > + expand_ifn_atomic_compare_exchange_into_call (call, mode); > + return; > + } > + > + /* Expand the operands. */ > + mem = get_builtin_sync_mem (gimple_call_arg (call, 0), mode); > + > + expect = expand_expr_force_mode (gimple_call_arg (call, 1), mode); > + desired = expand_expr_force_mode (gimple_call_arg (call, 2), mode); > + > + is_weak = (tree_to_shwi (gimple_call_arg (call, 3)) & 256) != 0; > + > + boolret = NULL; > + oldval = NULL; > + > + if (!expand_atomic_compare_and_swap (&boolret, &oldval, mem, expect, > desired, > + is_weak, success, failure)) > + { > + expand_ifn_atomic_compare_exchange_into_call (call, mode); > + return; > + } > + > + lhs = gimple_call_lhs (call); > + if (lhs) > + { > + rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE); > + if (GET_MODE (boolret) != mode) > + boolret = convert_modes (mode, GET_MODE (boolret), boolret, 1); > + write_complex_part (target, boolret, true); > + write_complex_part (target, oldval, false); > + } > +} > + > /* Expand the __atomic_load intrinsic: > TYPE __atomic_load (TYPE *object, enum memmodel) > EXP is the CALL_EXPR. > --- gcc/internal-fn.c.jj 2016-06-15 19:09:09.000000000 +0200 > +++ gcc/internal-fn.c 2016-06-22 15:30:06.838951934 +0200 > @@ -2143,6 +2143,14 @@ expand_ATOMIC_BIT_TEST_AND_RESET (intern > expand_ifn_atomic_bit_test_and (call); > } > > +/* Expand atomic bit test and set. */ > + > +static void > +expand_ATOMIC_COMPARE_EXCHANGE (internal_fn, gcall *call) > +{ > + expand_ifn_atomic_compare_exchange (call); > +} > + > /* Expand a call to FN using the operands in STMT. FN has a single > output operand and NARGS input operands. */ > > --- gcc/tree.h.jj 2016-06-20 21:16:07.000000000 +0200 > +++ gcc/tree.h 2016-06-21 17:35:19.806362408 +0200 > @@ -3985,8 +3985,8 @@ extern tree build_call_expr_loc (locatio > extern tree build_call_expr (tree, int, ...); > extern tree build_call_expr_internal_loc (location_t, enum internal_fn, > tree, int, ...); > -extern tree build_call_expr_internal_loc (location_t, enum internal_fn, > - tree, int, tree *); > +extern tree build_call_expr_internal_loc_array (location_t, enum internal_fn, > + tree, int, const tree *); > extern tree maybe_build_call_expr_loc (location_t, combined_fn, tree, > int, ...); > extern tree build_string_literal (int, const char *); > --- gcc/gimple-fold.h.jj 2016-01-08 07:31:08.000000000 +0100 > +++ gcc/gimple-fold.h 2016-06-24 11:19:34.002040898 +0200 > @@ -32,6 +32,8 @@ extern tree maybe_fold_and_comparisons ( > enum tree_code, tree, tree); > extern tree maybe_fold_or_comparisons (enum tree_code, tree, tree, > enum tree_code, tree, tree); > +extern bool optimize_atomic_compare_exchange_p (gimple *); > +extern void fold_builtin_atomic_compare_exchange (gimple_stmt_iterator *); > extern bool arith_overflowed_p (enum tree_code, const_tree, const_tree, > const_tree); > extern tree no_follow_ssa_edges (tree); > --- gcc/internal-fn.def.jj 2016-05-03 13:36:50.000000000 +0200 > +++ gcc/internal-fn.def 2016-06-21 17:10:23.516879436 +0200 > @@ -193,6 +193,7 @@ DEF_INTERNAL_FN (SET_EDOM, ECF_LEAF | EC > DEF_INTERNAL_FN (ATOMIC_BIT_TEST_AND_SET, ECF_LEAF | ECF_NOTHROW, NULL) > DEF_INTERNAL_FN (ATOMIC_BIT_TEST_AND_COMPLEMENT, ECF_LEAF | ECF_NOTHROW, > NULL) > DEF_INTERNAL_FN (ATOMIC_BIT_TEST_AND_RESET, ECF_LEAF | ECF_NOTHROW, NULL) > +DEF_INTERNAL_FN (ATOMIC_COMPARE_EXCHANGE, ECF_LEAF | ECF_NOTHROW, NULL) > > #undef DEF_INTERNAL_INT_FN > #undef DEF_INTERNAL_FLT_FN > --- gcc/predict.c.jj 2016-06-22 11:17:44.763444374 +0200 > +++ gcc/predict.c 2016-06-22 14:26:08.894724088 +0200 > @@ -1978,6 +1978,25 @@ expr_expected_value_1 (tree type, tree o > if (TREE_CONSTANT (op0)) > return op0; > > + if (code == IMAGPART_EXPR) > + { > + if (TREE_CODE (TREE_OPERAND (op0, 0)) == SSA_NAME) > + { > + def = SSA_NAME_DEF_STMT (TREE_OPERAND (op0, 0)); > + if (is_gimple_call (def) > + && gimple_call_internal_p (def) > + && (gimple_call_internal_fn (def) > + == IFN_ATOMIC_COMPARE_EXCHANGE)) > + { > + /* Assume that any given atomic operation has low contention, > + and thus the compare-and-swap operation succeeds. */ > + if (predictor) > + *predictor = PRED_COMPARE_AND_SWAP; > + return build_one_cst (TREE_TYPE (op0)); > + } > + } > + } > + > if (code != SSA_NAME) > return NULL_TREE; > > --- gcc/builtins.h.jj 2016-05-03 13:36:50.000000000 +0200 > +++ gcc/builtins.h 2016-06-21 18:05:11.678635858 +0200 > @@ -72,6 +72,7 @@ extern tree std_canonical_va_list_type ( > extern void std_expand_builtin_va_start (tree, rtx); > extern void expand_builtin_trap (void); > extern void expand_ifn_atomic_bit_test_and (gcall *); > +extern void expand_ifn_atomic_compare_exchange (gcall *); > extern rtx expand_builtin (tree, rtx, rtx, machine_mode, int); > extern rtx expand_builtin_with_bounds (tree, rtx, rtx, machine_mode, int); > extern enum built_in_function builtin_mathfn_code (const_tree); > --- gcc/gimple-fold.c.jj 2016-06-16 21:00:08.000000000 +0200 > +++ gcc/gimple-fold.c 2016-06-24 11:36:12.222916831 +0200 > @@ -2953,6 +2953,133 @@ fold_internal_goacc_dim (const gimple *c > return result; > } > > +/* Return true if stmt is __atomic_compare_exchange_N call which is suitable > + for conversion into ATOMIC_COMPARE_EXCHANGE if the second argument is > + &var where var is only addressable because of such calls. */ > + > +bool > +optimize_atomic_compare_exchange_p (gimple *stmt) > +{ > + if (gimple_call_num_args (stmt) != 6 > + || !flag_inline_atomics > + || !optimize > + || (flag_sanitize & (SANITIZE_THREAD | SANITIZE_ADDRESS)) != 0 > + || !gimple_call_builtin_p (stmt, BUILT_IN_NORMAL) > + || !gimple_vdef (stmt) > + || !gimple_vuse (stmt)) > + return false; > + > + tree fndecl = gimple_call_fndecl (stmt); > + switch (DECL_FUNCTION_CODE (fndecl)) > + { > + case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_1: > + case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_2: > + case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_4: > + case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_8: > + case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_16: > + break; > + default: > + return false; > + } > + > + tree expected = gimple_call_arg (stmt, 1); > + if (TREE_CODE (expected) != ADDR_EXPR > + || !SSA_VAR_P (TREE_OPERAND (expected, 0)) > + || !is_gimple_reg_type (TREE_TYPE (TREE_OPERAND (expected, 0))) > + || !auto_var_in_fn_p (TREE_OPERAND (expected, 0), > current_function_decl) > + || TREE_THIS_VOLATILE (TREE_TYPE (TREE_OPERAND (expected, 0))) > + || TREE_CODE (TREE_TYPE (TREE_OPERAND (expected, 0))) == VECTOR_TYPE > + || TREE_CODE (TREE_TYPE (TREE_OPERAND (expected, 0))) == COMPLEX_TYPE) > + return false; > + > + tree weak = gimple_call_arg (stmt, 3); > + if (!integer_zerop (weak) && !integer_onep (weak)) > + return false; > + > + tree parmt = TYPE_ARG_TYPES (TREE_TYPE (fndecl)); > + tree itype = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (parmt))); > + machine_mode mode = TYPE_MODE (itype); > + > + if (direct_optab_handler (atomic_compare_and_swap_optab, mode) > + == CODE_FOR_nothing > + && optab_handler (sync_compare_and_swap_optab, mode) == > CODE_FOR_nothing) > + return false; > + > + if (int_size_in_bytes (TREE_TYPE (TREE_OPERAND (expected, 0))) > + != GET_MODE_SIZE (mode)) > + return false; > + > + return true; > +} > + > +/* Fold > + r = __atomic_compare_exchange_N (p, &e, d, w, s, f); > + into > + _Complex uintN_t t = ATOMIC_COMPARE_EXCHANGE (p, e, d, w * 256 + N, s, > f); > + i = IMAGPART_EXPR <t>; > + r = (_Bool) i; > + e = REALPART_EXPR <t>; */ > + > +void > +fold_builtin_atomic_compare_exchange (gimple_stmt_iterator *gsi) > +{ > + gimple *stmt = gsi_stmt (*gsi); > + tree fndecl = gimple_call_fndecl (stmt); > + tree parmt = TYPE_ARG_TYPES (TREE_TYPE (fndecl)); > + tree itype = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (parmt))); > + tree ctype = build_complex_type (itype); > + tree expected = TREE_OPERAND (gimple_call_arg (stmt, 1), 0); > + gimple *g = gimple_build_assign (make_ssa_name (TREE_TYPE (expected)), > + expected); > + gsi_insert_before (gsi, g, GSI_SAME_STMT); > + gimple_stmt_iterator gsiret = gsi_for_stmt (g); > + if (!useless_type_conversion_p (itype, TREE_TYPE (expected))) > + { > + g = gimple_build_assign (make_ssa_name (itype), VIEW_CONVERT_EXPR, > + build1 (VIEW_CONVERT_EXPR, itype, > + gimple_assign_lhs (g))); > + gsi_insert_before (gsi, g, GSI_SAME_STMT); > + } > + int flag = (integer_onep (gimple_call_arg (stmt, 3)) ? 256 : 0) > + + int_size_in_bytes (itype); > + g = gimple_build_call_internal (IFN_ATOMIC_COMPARE_EXCHANGE, 6, > + gimple_call_arg (stmt, 0), > + gimple_assign_lhs (g), > + gimple_call_arg (stmt, 2), > + build_int_cst (integer_type_node, flag), > + gimple_call_arg (stmt, 4), > + gimple_call_arg (stmt, 5)); > + tree lhs = make_ssa_name (ctype); > + gimple_call_set_lhs (g, lhs); > + gimple_set_vdef (g, gimple_vdef (stmt)); > + gimple_set_vuse (g, gimple_vuse (stmt)); > + SSA_NAME_DEF_STMT (gimple_vdef (g)) = g; > + if (gimple_call_lhs (stmt)) > + { > + gsi_insert_before (gsi, g, GSI_SAME_STMT); > + g = gimple_build_assign (make_ssa_name (itype), IMAGPART_EXPR, > + build1 (IMAGPART_EXPR, itype, lhs)); > + gsi_insert_before (gsi, g, GSI_SAME_STMT); > + g = gimple_build_assign (gimple_call_lhs (stmt), NOP_EXPR, > + gimple_assign_lhs (g)); > + } > + gsi_replace (gsi, g, true); > + g = gimple_build_assign (make_ssa_name (itype), REALPART_EXPR, > + build1 (REALPART_EXPR, itype, lhs)); > + gsi_insert_after (gsi, g, GSI_NEW_STMT); > + if (!useless_type_conversion_p (TREE_TYPE (expected), itype)) > + { > + g = gimple_build_assign (make_ssa_name (TREE_TYPE (expected)), > + VIEW_CONVERT_EXPR, > + build1 (VIEW_CONVERT_EXPR, TREE_TYPE (expected), > + gimple_assign_lhs (g))); > + gsi_insert_after (gsi, g, GSI_NEW_STMT); > + } > + g = gimple_build_assign (expected, SSA_NAME, gimple_assign_lhs (g)); > + gsi_insert_after (gsi, g, GSI_NEW_STMT); > + *gsi = gsiret; > +} > + > /* Return true if ARG0 CODE ARG1 in infinite signed precision operation > doesn't fit into TYPE. The test for overflow should be regardless of > -fwrapv, and even for unsigned types. */ > --- gcc/tree-ssa.c.jj 2016-06-14 16:29:49.000000000 +0200 > +++ gcc/tree-ssa.c 2016-06-24 10:38:54.929029421 +0200 > @@ -1414,8 +1414,21 @@ execute_update_addresses_taken (void) > enum gimple_code code = gimple_code (stmt); > tree decl; > > - /* Note all addresses taken by the stmt. */ > - gimple_ior_addresses_taken (addresses_taken, stmt); > + if (code == GIMPLE_CALL > + && optimize_atomic_compare_exchange_p (stmt)) > + { > + /* For __atomic_compare_exchange_N if the second argument > + is &var, don't mark var addressable; > + if it becomes non-addressable, we'll rewrite it into > + ATOMIC_COMPARE_EXCHANGE call. */ > + tree arg = gimple_call_arg (stmt, 1); > + gimple_call_set_arg (stmt, 1, null_pointer_node); > + gimple_ior_addresses_taken (addresses_taken, stmt); > + gimple_call_set_arg (stmt, 1, arg); > + } > + else > + /* Note all addresses taken by the stmt. */ > + gimple_ior_addresses_taken (addresses_taken, stmt); > > /* If we have a call or an assignment, see if the lhs contains > a local decl that requires not to be a gimple register. */ > @@ -1657,6 +1670,16 @@ execute_update_addresses_taken (void) > else if (gimple_code (stmt) == GIMPLE_CALL) > { > unsigned i; > + if (optimize_atomic_compare_exchange_p (stmt)) > + { > + tree expected = gimple_call_arg (stmt, 1); > + if (bitmap_bit_p (suitable_for_renaming, > + DECL_UID (TREE_OPERAND (expected, 0)))) > + { > + fold_builtin_atomic_compare_exchange (&gsi); > + continue; > + } > + } > for (i = 0; i < gimple_call_num_args (stmt); ++i) > { > tree *argp = gimple_call_arg_ptr (stmt, i); > > > Jakub > > -- Richard Biener <rguent...@suse.de> SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)