On Tue, 3 May 2016, Jakub Jelinek wrote: > On Mon, May 02, 2016 at 11:01:02PM +0200, Uros Bizjak wrote: > > Please don't use operands[N] without corresponding (match_dup N) in > > the RTL pattern. Tthe "operands" array is only as long as the last > > operand number from the pattern. Just grep the pattern name from > > generated insn-emit.c and you will see the problem. > ... > > Ok, here is updated patch, bootstrapped/regtested on x86_64-linux and > i686-linux, ok for trunk?
The middle-end parts are ok. Thanks, Richard. > 2016-05-03 Jakub Jelinek <ja...@redhat.com> > > PR target/49244 > * tree-ssa-ccp.c: Include stor-layout.h and optabs-query.h. > (optimize_atomic_bit_test_and): New function. > (pass_fold_builtins::execute): Use it. > * optabs.def (atomic_bit_test_and_set_optab, > atomic_bit_test_and_complement_optab, > atomic_bit_test_and_reset_optab): New optabs. > * internal-fn.def (ATOMIC_BIT_TEST_AND_SET, > ATOMIC_BIT_TEST_AND_COMPLEMENT, ATOMIC_BIT_TEST_AND_RESET): New ifns. > * builtins.h (expand_ifn_atomic_bit_test_and): New prototype. > * builtins.c (expand_ifn_atomic_bit_test_and): New function. > * internal-fn.c (expand_ATOMIC_BIT_TEST_AND_SET, > expand_ATOMIC_BIT_TEST_AND_COMPLEMENT, > expand_ATOMIC_BIT_TEST_AND_RESET): New functions. > * doc/md.texi (atomic_bit_test_and_set@var{mode}, > atomic_bit_test_and_complement@var{mode}, > atomic_bit_test_and_reset@var{mode}): Document. > * config/i386/sync.md (atomic_bit_test_and_set<mode>, > atomic_bit_test_and_complement<mode>, > atomic_bit_test_and_reset<mode>): New expanders. > (atomic_bit_test_and_set<mode>_1, > atomic_bit_test_and_complement<mode>_1, > atomic_bit_test_and_reset<mode>_1): New insns. > > * gcc.target/i386/pr49244-1.c: New test. > * gcc.target/i386/pr49244-2.c: New test. > > --- gcc/tree-ssa-ccp.c.jj 2016-05-01 12:21:05.063587549 +0200 > +++ gcc/tree-ssa-ccp.c 2016-05-02 13:01:36.367044729 +0200 > @@ -140,6 +140,8 @@ along with GCC; see the file COPYING3. > #include "builtins.h" > #include "tree-chkp.h" > #include "cfgloop.h" > +#include "stor-layout.h" > +#include "optabs-query.h" > > > /* Possible lattice values. */ > @@ -2697,6 +2699,224 @@ optimize_unreachable (gimple_stmt_iterat > return ret; > } > > +/* Optimize > + mask_2 = 1 << cnt_1; > + _4 = __atomic_fetch_or_* (ptr_6, mask_2, _3); > + _5 = _4 & mask_2; > + to > + _4 = ATOMIC_BIT_TEST_AND_SET (ptr_6, cnt_1, 0, _3); > + _5 = _4; > + If _5 is only used in _5 != 0 or _5 == 0 comparisons, 1 > + is passed instead of 0, and the builtin just returns a zero > + or 1 value instead of the actual bit. > + Similarly for __sync_fetch_and_or_* (without the ", _3" part > + in there), and/or if mask_2 is a power of 2 constant. > + Similarly for xor instead of or, use ATOMIC_BIT_TEST_AND_COMPLEMENT > + in that case. And similarly for and instead of or, except that > + the second argument to the builtin needs to be one's complement > + of the mask instead of mask. */ > + > +static void > +optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip, > + enum internal_fn fn, bool has_model_arg, > + bool after) > +{ > + gimple *call = gsi_stmt (*gsip); > + tree lhs = gimple_call_lhs (call); > + use_operand_p use_p; > + gimple *use_stmt; > + tree mask, bit; > + optab optab; > + > + if (!flag_inline_atomics > + || optimize_debug > + || !gimple_call_builtin_p (call, BUILT_IN_NORMAL) > + || !lhs > + || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs) > + || !single_imm_use (lhs, &use_p, &use_stmt) > + || !is_gimple_assign (use_stmt) > + || gimple_assign_rhs_code (use_stmt) != BIT_AND_EXPR > + || !gimple_vdef (call)) > + return; > + > + switch (fn) > + { > + case IFN_ATOMIC_BIT_TEST_AND_SET: > + optab = atomic_bit_test_and_set_optab; > + break; > + case IFN_ATOMIC_BIT_TEST_AND_COMPLEMENT: > + optab = atomic_bit_test_and_complement_optab; > + break; > + case IFN_ATOMIC_BIT_TEST_AND_RESET: > + optab = atomic_bit_test_and_reset_optab; > + break; > + default: > + return; > + } > + > + if (optab_handler (optab, TYPE_MODE (TREE_TYPE (lhs))) == CODE_FOR_nothing) > + return; > + > + mask = gimple_call_arg (call, 1); > + tree use_lhs = gimple_assign_lhs (use_stmt); > + if (!use_lhs) > + return; > + > + if (TREE_CODE (mask) == INTEGER_CST) > + { > + if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET) > + mask = const_unop (BIT_NOT_EXPR, TREE_TYPE (mask), mask); > + mask = fold_convert (TREE_TYPE (lhs), mask); > + int ibit = tree_log2 (mask); > + if (ibit < 0) > + return; > + bit = build_int_cst (TREE_TYPE (lhs), ibit); > + } > + else if (TREE_CODE (mask) == SSA_NAME) > + { > + gimple *g = SSA_NAME_DEF_STMT (mask); > + if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET) > + { > + if (!is_gimple_assign (g) > + || gimple_assign_rhs_code (g) != BIT_NOT_EXPR) > + return; > + mask = gimple_assign_rhs1 (g); > + if (TREE_CODE (mask) != SSA_NAME) > + return; > + g = SSA_NAME_DEF_STMT (mask); > + } > + if (!is_gimple_assign (g) > + || gimple_assign_rhs_code (g) != LSHIFT_EXPR > + || !integer_onep (gimple_assign_rhs1 (g))) > + return; > + bit = gimple_assign_rhs2 (g); > + } > + else > + return; > + > + if (gimple_assign_rhs1 (use_stmt) == lhs) > + { > + if (!operand_equal_p (gimple_assign_rhs2 (use_stmt), mask, 0)) > + return; > + } > + else if (gimple_assign_rhs2 (use_stmt) != lhs > + || !operand_equal_p (gimple_assign_rhs1 (use_stmt), mask, 0)) > + return; > + > + bool use_bool = true; > + bool has_debug_uses = false; > + imm_use_iterator iter; > + gimple *g; > + > + if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (use_lhs)) > + use_bool = false; > + FOR_EACH_IMM_USE_STMT (g, iter, use_lhs) > + { > + enum tree_code code = ERROR_MARK; > + tree op0, op1; > + if (is_gimple_debug (g)) > + { > + has_debug_uses = true; > + continue; > + } > + else if (is_gimple_assign (g)) > + switch (gimple_assign_rhs_code (g)) > + { > + case COND_EXPR: > + op1 = gimple_assign_rhs1 (g); > + code = TREE_CODE (op1); > + op0 = TREE_OPERAND (op1, 0); > + op1 = TREE_OPERAND (op1, 1); > + break; > + case EQ_EXPR: > + case NE_EXPR: > + code = gimple_assign_rhs_code (g); > + op0 = gimple_assign_rhs1 (g); > + op1 = gimple_assign_rhs2 (g); > + break; > + default: > + break; > + } > + else if (gimple_code (g) == GIMPLE_COND) > + { > + code = gimple_cond_code (g); > + op0 = gimple_cond_lhs (g); > + op1 = gimple_cond_rhs (g); > + } > + > + if ((code == EQ_EXPR || code == NE_EXPR) > + && op0 == use_lhs > + && integer_zerop (op1)) > + { > + use_operand_p use_p; > + int n = 0; > + FOR_EACH_IMM_USE_ON_STMT (use_p, iter) > + n++; > + if (n == 1) > + continue; > + } > + > + use_bool = false; > + BREAK_FROM_IMM_USE_STMT (iter); > + } > + > + tree new_lhs = make_ssa_name (TREE_TYPE (lhs)); > + tree flag = build_int_cst (TREE_TYPE (lhs), use_bool); > + if (has_model_arg) > + g = gimple_build_call_internal (fn, 4, gimple_call_arg (call, 0), > + bit, flag, gimple_call_arg (call, 2)); > + else > + g = gimple_build_call_internal (fn, 3, gimple_call_arg (call, 0), > + bit, flag); > + gimple_call_set_lhs (g, new_lhs); > + gimple_set_location (g, gimple_location (call)); > + gimple_set_vuse (g, gimple_vuse (call)); > + gimple_set_vdef (g, gimple_vdef (call)); > + SSA_NAME_DEF_STMT (gimple_vdef (call)) = g; > + gimple_stmt_iterator gsi = *gsip; > + gsi_insert_after (&gsi, g, GSI_NEW_STMT); > + if (after) > + { > + /* The internal function returns the value of the specified bit > + before the atomic operation. If we are interested in the value > + of the specified bit after the atomic operation (makes only sense > + for xor, otherwise the bit content is compile time known), > + we need to invert the bit. */ > + g = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)), > + BIT_XOR_EXPR, new_lhs, > + use_bool ? build_int_cst (TREE_TYPE (lhs), 1) > + : mask); > + new_lhs = gimple_assign_lhs (g); > + gsi_insert_after (&gsi, g, GSI_NEW_STMT); > + } > + if (use_bool && has_debug_uses) > + { > + tree temp = make_node (DEBUG_EXPR_DECL); > + DECL_ARTIFICIAL (temp) = 1; > + TREE_TYPE (temp) = TREE_TYPE (lhs); > + DECL_MODE (temp) = TYPE_MODE (TREE_TYPE (lhs)); > + tree t = build2 (LSHIFT_EXPR, TREE_TYPE (lhs), new_lhs, bit); > + g = gimple_build_debug_bind (temp, t, g); > + gsi_insert_after (&gsi, g, GSI_NEW_STMT); > + FOR_EACH_IMM_USE_STMT (g, iter, use_lhs) > + if (is_gimple_debug (g)) > + { > + use_operand_p use_p; > + FOR_EACH_IMM_USE_ON_STMT (use_p, iter) > + SET_USE (use_p, temp); > + update_stmt (g); > + } > + } > + SSA_NAME_OCCURS_IN_ABNORMAL_PHI (new_lhs) > + = SSA_NAME_OCCURS_IN_ABNORMAL_PHI (use_lhs); > + replace_uses_by (use_lhs, new_lhs); > + gsi = gsi_for_stmt (use_stmt); > + gsi_remove (&gsi, true); > + release_defs (use_stmt); > + gsi_remove (gsip, true); > + release_ssa_name (lhs); > +} > + > /* A simple pass that attempts to fold all builtin functions. This pass > is run after we've propagated as many constants as we can. */ > > @@ -2806,6 +3026,78 @@ pass_fold_builtins::execute (function *f > cfg_changed = true; > break; > > + case BUILT_IN_ATOMIC_FETCH_OR_1: > + case BUILT_IN_ATOMIC_FETCH_OR_2: > + case BUILT_IN_ATOMIC_FETCH_OR_4: > + case BUILT_IN_ATOMIC_FETCH_OR_8: > + case BUILT_IN_ATOMIC_FETCH_OR_16: > + optimize_atomic_bit_test_and (&i, > + IFN_ATOMIC_BIT_TEST_AND_SET, > + true, false); > + break; > + case BUILT_IN_SYNC_FETCH_AND_OR_1: > + case BUILT_IN_SYNC_FETCH_AND_OR_2: > + case BUILT_IN_SYNC_FETCH_AND_OR_4: > + case BUILT_IN_SYNC_FETCH_AND_OR_8: > + case BUILT_IN_SYNC_FETCH_AND_OR_16: > + optimize_atomic_bit_test_and (&i, > + IFN_ATOMIC_BIT_TEST_AND_SET, > + false, false); > + break; > + > + case BUILT_IN_ATOMIC_FETCH_XOR_1: > + case BUILT_IN_ATOMIC_FETCH_XOR_2: > + case BUILT_IN_ATOMIC_FETCH_XOR_4: > + case BUILT_IN_ATOMIC_FETCH_XOR_8: > + case BUILT_IN_ATOMIC_FETCH_XOR_16: > + optimize_atomic_bit_test_and > + (&i, IFN_ATOMIC_BIT_TEST_AND_COMPLEMENT, true, false); > + break; > + case BUILT_IN_SYNC_FETCH_AND_XOR_1: > + case BUILT_IN_SYNC_FETCH_AND_XOR_2: > + case BUILT_IN_SYNC_FETCH_AND_XOR_4: > + case BUILT_IN_SYNC_FETCH_AND_XOR_8: > + case BUILT_IN_SYNC_FETCH_AND_XOR_16: > + optimize_atomic_bit_test_and > + (&i, IFN_ATOMIC_BIT_TEST_AND_COMPLEMENT, false, false); > + break; > + > + case BUILT_IN_ATOMIC_XOR_FETCH_1: > + case BUILT_IN_ATOMIC_XOR_FETCH_2: > + case BUILT_IN_ATOMIC_XOR_FETCH_4: > + case BUILT_IN_ATOMIC_XOR_FETCH_8: > + case BUILT_IN_ATOMIC_XOR_FETCH_16: > + optimize_atomic_bit_test_and > + (&i, IFN_ATOMIC_BIT_TEST_AND_COMPLEMENT, true, true); > + break; > + case BUILT_IN_SYNC_XOR_AND_FETCH_1: > + case BUILT_IN_SYNC_XOR_AND_FETCH_2: > + case BUILT_IN_SYNC_XOR_AND_FETCH_4: > + case BUILT_IN_SYNC_XOR_AND_FETCH_8: > + case BUILT_IN_SYNC_XOR_AND_FETCH_16: > + optimize_atomic_bit_test_and > + (&i, IFN_ATOMIC_BIT_TEST_AND_COMPLEMENT, false, true); > + break; > + > + case BUILT_IN_ATOMIC_FETCH_AND_1: > + case BUILT_IN_ATOMIC_FETCH_AND_2: > + case BUILT_IN_ATOMIC_FETCH_AND_4: > + case BUILT_IN_ATOMIC_FETCH_AND_8: > + case BUILT_IN_ATOMIC_FETCH_AND_16: > + optimize_atomic_bit_test_and (&i, > + IFN_ATOMIC_BIT_TEST_AND_RESET, > + true, false); > + break; > + case BUILT_IN_SYNC_FETCH_AND_AND_1: > + case BUILT_IN_SYNC_FETCH_AND_AND_2: > + case BUILT_IN_SYNC_FETCH_AND_AND_4: > + case BUILT_IN_SYNC_FETCH_AND_AND_8: > + case BUILT_IN_SYNC_FETCH_AND_AND_16: > + optimize_atomic_bit_test_and (&i, > + IFN_ATOMIC_BIT_TEST_AND_RESET, > + false, false); > + break; > + > case BUILT_IN_VA_START: > case BUILT_IN_VA_END: > case BUILT_IN_VA_COPY: > --- gcc/optabs.def.jj 2016-05-01 12:21:04.600593737 +0200 > +++ gcc/optabs.def 2016-05-02 09:22:47.814226751 +0200 > @@ -337,6 +337,9 @@ OPTAB_D (atomic_add_fetch_optab, "atomic > OPTAB_D (atomic_add_optab, "atomic_add$I$a") > OPTAB_D (atomic_and_fetch_optab, "atomic_and_fetch$I$a") > OPTAB_D (atomic_and_optab, "atomic_and$I$a") > +OPTAB_D (atomic_bit_test_and_set_optab, "atomic_bit_test_and_set$I$a") > +OPTAB_D (atomic_bit_test_and_complement_optab, > "atomic_bit_test_and_complement$I$a") > +OPTAB_D (atomic_bit_test_and_reset_optab, "atomic_bit_test_and_reset$I$a") > OPTAB_D (atomic_compare_and_swap_optab, "atomic_compare_and_swap$I$a") > OPTAB_D (atomic_exchange_optab, "atomic_exchange$I$a") > OPTAB_D (atomic_fetch_add_optab, "atomic_fetch_add$I$a") > --- gcc/internal-fn.def.jj 2016-05-01 12:21:04.574594084 +0200 > +++ gcc/internal-fn.def 2016-05-02 09:22:47.815226737 +0200 > @@ -189,6 +189,11 @@ DEF_INTERNAL_FN (GOACC_REDUCTION, ECF_NO > current target. */ > DEF_INTERNAL_FN (SET_EDOM, ECF_LEAF | ECF_NOTHROW, NULL) > > +/* Atomic functions. */ > +DEF_INTERNAL_FN (ATOMIC_BIT_TEST_AND_SET, ECF_LEAF | ECF_NOTHROW, NULL) > +DEF_INTERNAL_FN (ATOMIC_BIT_TEST_AND_COMPLEMENT, ECF_LEAF | ECF_NOTHROW, > NULL) > +DEF_INTERNAL_FN (ATOMIC_BIT_TEST_AND_RESET, ECF_LEAF | ECF_NOTHROW, NULL) > + > #undef DEF_INTERNAL_INT_FN > #undef DEF_INTERNAL_FLT_FN > #undef DEF_INTERNAL_OPTAB_FN > --- gcc/builtins.h.jj 2016-05-01 12:21:04.915589527 +0200 > +++ gcc/builtins.h 2016-05-02 09:22:47.816226723 +0200 > @@ -71,6 +71,7 @@ extern tree std_fn_abi_va_list (tree); > extern tree std_canonical_va_list_type (tree); > extern void std_expand_builtin_va_start (tree, rtx); > extern void expand_builtin_trap (void); > +extern void expand_ifn_atomic_bit_test_and (gcall *); > extern rtx expand_builtin (tree, rtx, rtx, machine_mode, int); > extern rtx expand_builtin_with_bounds (tree, rtx, rtx, machine_mode, int); > extern enum built_in_function builtin_mathfn_code (const_tree); > --- gcc/builtins.c.jj 2016-05-01 12:21:04.856590316 +0200 > +++ gcc/builtins.c 2016-05-02 09:22:47.818226695 +0200 > @@ -5310,6 +5310,90 @@ expand_builtin_atomic_fetch_op (machine_ > return ret; > } > > +/* Expand IFN_ATOMIC_BIT_TEST_AND_* internal function. */ > + > +void > +expand_ifn_atomic_bit_test_and (gcall *call) > +{ > + tree ptr = gimple_call_arg (call, 0); > + tree bit = gimple_call_arg (call, 1); > + tree flag = gimple_call_arg (call, 2); > + tree lhs = gimple_call_lhs (call); > + enum memmodel model = MEMMODEL_SYNC_SEQ_CST; > + machine_mode mode = TYPE_MODE (TREE_TYPE (flag)); > + enum rtx_code code; > + optab optab; > + struct expand_operand ops[5]; > + > + gcc_assert (flag_inline_atomics); > + > + if (gimple_call_num_args (call) == 4) > + model = get_memmodel (gimple_call_arg (call, 3)); > + > + rtx mem = get_builtin_sync_mem (ptr, mode); > + rtx val = expand_expr_force_mode (bit, mode); > + > + switch (gimple_call_internal_fn (call)) > + { > + case IFN_ATOMIC_BIT_TEST_AND_SET: > + code = IOR; > + optab = atomic_bit_test_and_set_optab; > + break; > + case IFN_ATOMIC_BIT_TEST_AND_COMPLEMENT: > + code = XOR; > + optab = atomic_bit_test_and_complement_optab; > + break; > + case IFN_ATOMIC_BIT_TEST_AND_RESET: > + code = AND; > + optab = atomic_bit_test_and_reset_optab; > + break; > + default: > + gcc_unreachable (); > + } > + > + if (lhs == NULL_TREE) > + { > + val = expand_simple_binop (mode, ASHIFT, const1_rtx, > + val, NULL_RTX, true, OPTAB_DIRECT); > + if (code == AND) > + val = expand_simple_unop (mode, NOT, val, NULL_RTX, true); > + expand_atomic_fetch_op (const0_rtx, mem, val, code, model, false); > + return; > + } > + > + rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE); > + enum insn_code icode = direct_optab_handler (optab, mode); > + gcc_assert (icode != CODE_FOR_nothing); > + create_output_operand (&ops[0], target, mode); > + create_fixed_operand (&ops[1], mem); > + create_convert_operand_to (&ops[2], val, mode, true); > + create_integer_operand (&ops[3], model); > + create_integer_operand (&ops[4], integer_onep (flag)); > + if (maybe_expand_insn (icode, 5, ops)) > + return; > + > + rtx bitval = val; > + val = expand_simple_binop (mode, ASHIFT, const1_rtx, > + val, NULL_RTX, true, OPTAB_DIRECT); > + rtx maskval = val; > + if (code == AND) > + val = expand_simple_unop (mode, NOT, val, NULL_RTX, true); > + rtx result = expand_atomic_fetch_op (gen_reg_rtx (mode), mem, val, > + code, model, false); > + if (integer_onep (flag)) > + { > + result = expand_simple_binop (mode, ASHIFTRT, result, bitval, > + NULL_RTX, true, OPTAB_DIRECT); > + result = expand_simple_binop (mode, AND, result, const1_rtx, target, > + true, OPTAB_DIRECT); > + } > + else > + result = expand_simple_binop (mode, AND, result, maskval, target, true, > + OPTAB_DIRECT); > + if (result != target) > + emit_move_insn (target, result); > +} > + > /* Expand an atomic clear operation. > void _atomic_clear (BOOL *obj, enum memmodel) > EXP is the call expression. */ > --- gcc/internal-fn.c.jj 2016-05-01 12:21:04.952589033 +0200 > +++ gcc/internal-fn.c 2016-05-02 09:22:47.815226737 +0200 > @@ -39,6 +39,7 @@ along with GCC; see the file COPYING3. > #include "expr.h" > #include "ubsan.h" > #include "recog.h" > +#include "builtins.h" > > /* The names of each internal function, indexed by function number. */ > const char *const internal_fn_name_array[] = { > @@ -2118,6 +2119,30 @@ expand_SET_EDOM (internal_fn, gcall *) > #endif > } > > +/* Expand atomic bit test and set. */ > + > +static void > +expand_ATOMIC_BIT_TEST_AND_SET (internal_fn, gcall *call) > +{ > + expand_ifn_atomic_bit_test_and (call); > +} > + > +/* Expand atomic bit test and complement. */ > + > +static void > +expand_ATOMIC_BIT_TEST_AND_COMPLEMENT (internal_fn, gcall *call) > +{ > + expand_ifn_atomic_bit_test_and (call); > +} > + > +/* Expand atomic bit test and reset. */ > + > +static void > +expand_ATOMIC_BIT_TEST_AND_RESET (internal_fn, gcall *call) > +{ > + expand_ifn_atomic_bit_test_and (call); > +} > + > /* Expand a call to FN using the operands in STMT. FN has a single > output operand and NARGS input operands. */ > > --- gcc/doc/md.texi.jj 2016-02-22 22:26:40.000000000 +0100 > +++ gcc/doc/md.texi 2016-05-02 09:37:55.018690799 +0200 > @@ -6909,6 +6909,33 @@ The specific value that defines "set" is > is normally based on what is performed by the native atomic test and set > instruction. > > +@cindex @code{atomic_bit_test_and_set@var{mode}} instruction pattern > +@cindex @code{atomic_bit_test_and_complement@var{mode}} instruction pattern > +@cindex @code{atomic_bit_test_and_reset@var{mode}} instruction pattern > +@item @samp{atomic_bit_test_and_set@var{mode}} > +@itemx @samp{atomic_bit_test_and_complement@var{mode}} > +@itemx @samp{atomic_bit_test_and_reset@var{mode}} > +These patterns emit code for an atomic bitwise operation on memory with > memory > +model semantics, and return the original value of the specified bit. > +Operand 0 is an output operand which contains the value of the specified bit > +from the memory location before the operation was performed. Operand 1 is > the > +memory on which the atomic operation is performed. Operand 2 is the bit > within > +the operand, starting with least significant bit. Operand 3 is the memory > model > +to be used by the operation. Operand 4 is a flag - it is @code{const1_rtx} > +if operand 0 should contain the original value of the specified bit in the > +least significant bit of the operand, and @code{const0_rtx} if the bit should > +be in its original position in the operand. > +@code{atomic_bit_test_and_set@var{mode}} atomically sets the specified bit > after > +remembering its original value, > @code{atomic_bit_test_and_complement@var{mode}} > +inverts the specified bit and @code{atomic_bit_test_and_reset@var{mode}} > clears > +the specified bit. > + > +If these patterns are not defined, attempts will be made to use > +@code{atomic_fetch_or@var{mode}}, @code{atomic_fetch_xor@var{mode}} or > +@code{atomic_fetch_and@var{mode}} instruction patterns, or their @code{sync} > +counterparts. If none of these are available a compare-and-swap > +loop will be used. > + > @cindex @code{mem_thread_fence@var{mode}} instruction pattern > @item @samp{mem_thread_fence@var{mode}} > This pattern emits code required to implement a thread fence with > --- gcc/config/i386/sync.md.jj 2016-05-01 12:21:05.013588217 +0200 > +++ gcc/config/i386/sync.md 2016-05-02 09:22:47.819226682 +0200 > @@ -605,3 +605,114 @@ (define_insn "atomic_<logic><mode>" > (clobber (reg:CC FLAGS_REG))] > "" > "lock{%;} %K2<logic>{<imodesuffix>}\t{%1, %0|%0, %1}") > + > +(define_expand "atomic_bit_test_and_set<mode>" > + [(match_operand:SWI248 0 "register_operand") > + (match_operand:SWI248 1 "memory_operand") > + (match_operand:SWI248 2 "nonmemory_operand") > + (match_operand:SI 3 "const_int_operand") ;; model > + (match_operand:SI 4 "const_int_operand")] > + "" > +{ > + emit_insn (gen_atomic_bit_test_and_set<mode>_1 (operands[1], operands[2], > + operands[3])); > + rtx tem = gen_reg_rtx (QImode); > + ix86_expand_setcc (tem, EQ, gen_rtx_REG (CCCmode, FLAGS_REG), const0_rtx); > + rtx result = convert_modes (<MODE>mode, QImode, tem, 1); > + if (operands[4] == const0_rtx) > + result = expand_simple_binop (<MODE>mode, ASHIFT, result, > + operands[2], operands[0], 0, OPTAB_DIRECT); > + if (result != operands[0]) > + emit_move_insn (operands[0], result); > + DONE; > +}) > + > +(define_insn "atomic_bit_test_and_set<mode>_1" > + [(set (reg:CCC FLAGS_REG) > + (compare:CCC > + (unspec_volatile:SWI248 > + [(match_operand:SWI248 0 "memory_operand" "+m") > + (match_operand:SI 2 "const_int_operand")] ;; model > + UNSPECV_XCHG) > + (const_int 0))) > + (set (zero_extract:SWI248 (match_dup 0) > + (const_int 1) > + (match_operand:SWI248 1 "nonmemory_operand" "rN")) > + (const_int 1))] > + "" > + "lock{%;} %K2bts{<imodesuffix>}\t{%1, %0|%0, %1}") > + > +(define_expand "atomic_bit_test_and_complement<mode>" > + [(match_operand:SWI248 0 "register_operand") > + (match_operand:SWI248 1 "memory_operand") > + (match_operand:SWI248 2 "nonmemory_operand") > + (match_operand:SI 3 "const_int_operand") ;; model > + (match_operand:SI 4 "const_int_operand")] > + "" > +{ > + emit_insn (gen_atomic_bit_test_and_complement<mode>_1 (operands[1], > + operands[2], > + operands[3])); > + rtx tem = gen_reg_rtx (QImode); > + ix86_expand_setcc (tem, EQ, gen_rtx_REG (CCCmode, FLAGS_REG), const0_rtx); > + rtx result = convert_modes (<MODE>mode, QImode, tem, 1); > + if (operands[4] == const0_rtx) > + result = expand_simple_binop (<MODE>mode, ASHIFT, result, > + operands[2], operands[0], 0, OPTAB_DIRECT); > + if (result != operands[0]) > + emit_move_insn (operands[0], result); > + DONE; > +}) > + > +(define_insn "atomic_bit_test_and_complement<mode>_1" > + [(set (reg:CCC FLAGS_REG) > + (compare:CCC > + (unspec_volatile:SWI248 > + [(match_operand:SWI248 0 "memory_operand" "+m") > + (match_operand:SI 2 "const_int_operand")] ;; model > + UNSPECV_XCHG) > + (const_int 0))) > + (set (zero_extract:SWI248 (match_dup 0) > + (const_int 1) > + (match_operand:SWI248 1 "nonmemory_operand" "rN")) > + (not:SWI248 (zero_extract:SWI248 (match_dup 0) > + (const_int 1) > + (match_dup 1))))] > + "" > + "lock{%;} %K2btc{<imodesuffix>}\t{%1, %0|%0, %1}") > + > +(define_expand "atomic_bit_test_and_reset<mode>" > + [(match_operand:SWI248 0 "register_operand") > + (match_operand:SWI248 1 "memory_operand") > + (match_operand:SWI248 2 "nonmemory_operand") > + (match_operand:SI 3 "const_int_operand") ;; model > + (match_operand:SI 4 "const_int_operand")] > + "" > +{ > + emit_insn (gen_atomic_bit_test_and_reset<mode>_1 (operands[1], operands[2], > + operands[3])); > + rtx tem = gen_reg_rtx (QImode); > + ix86_expand_setcc (tem, EQ, gen_rtx_REG (CCCmode, FLAGS_REG), const0_rtx); > + rtx result = convert_modes (<MODE>mode, QImode, tem, 1); > + if (operands[4] == const0_rtx) > + result = expand_simple_binop (<MODE>mode, ASHIFT, result, > + operands[2], operands[0], 0, OPTAB_DIRECT); > + if (result != operands[0]) > + emit_move_insn (operands[0], result); > + DONE; > +}) > + > +(define_insn "atomic_bit_test_and_reset<mode>_1" > + [(set (reg:CCC FLAGS_REG) > + (compare:CCC > + (unspec_volatile:SWI248 > + [(match_operand:SWI248 0 "memory_operand" "+m") > + (match_operand:SI 2 "const_int_operand")] ;; model > + UNSPECV_XCHG) > + (const_int 0))) > + (set (zero_extract:SWI248 (match_dup 0) > + (const_int 1) > + (match_operand:SWI248 1 "nonmemory_operand" "rN")) > + (const_int 0))] > + "" > + "lock{%;} %K2btr{<imodesuffix>}\t{%1, %0|%0, %1}") > --- gcc/testsuite/gcc.target/i386/pr49244-1.c.jj 2016-05-02 > 14:52:56.776814774 +0200 > +++ gcc/testsuite/gcc.target/i386/pr49244-1.c 2016-05-02 12:39:52.126750700 > +0200 > @@ -0,0 +1,188 @@ > +/* PR target/49244 */ > +/* { dg-do compile } */ > +/* { dg-options "-O2" } */ > + > +void bar (void); > + > +__attribute__((noinline, noclone)) int > +f1 (int *a, int bit) > +{ > + unsigned int mask = (1u << bit); > + return (__sync_fetch_and_or (a, mask) & mask) != 0; > +} > + > +__attribute__((noinline, noclone)) int > +f2 (int *a, int bit) > +{ > + unsigned int mask = (1u << bit); > + unsigned int t1 = __atomic_fetch_or (a, mask, __ATOMIC_RELAXED); > + unsigned int t2 = t1 & mask; > + return t2 != 0; > +} > + > +__attribute__((noinline, noclone)) long int > +f3 (long int *a, int bit) > +{ > + unsigned long int mask = (1ul << bit); > + return (__atomic_fetch_or (a, mask, __ATOMIC_SEQ_CST) & mask) == 0; > +} > + > +__attribute__((noinline, noclone)) int > +f4 (int *a) > +{ > + unsigned int mask = (1u << 7); > + return (__sync_fetch_and_or (a, mask) & mask) != 0; > +} > + > +__attribute__((noinline, noclone)) int > +f5 (int *a) > +{ > + unsigned int mask = (1u << 13); > + return (__atomic_fetch_or (a, mask, __ATOMIC_RELAXED) & mask) != 0; > +} > + > +__attribute__((noinline, noclone)) int > +f6 (int *a) > +{ > + unsigned int mask = (1u << 0); > + return (__atomic_fetch_or (a, mask, __ATOMIC_SEQ_CST) & mask) != 0; > +} > + > +__attribute__((noinline, noclone)) void > +f7 (int *a, int bit) > +{ > + unsigned int mask = (1u << bit); > + if ((__sync_fetch_and_xor (a, mask) & mask) != 0) > + bar (); > +} > + > +__attribute__((noinline, noclone)) void > +f8 (int *a, int bit) > +{ > + unsigned int mask = (1u << bit); > + if ((__atomic_fetch_xor (a, mask, __ATOMIC_RELAXED) & mask) == 0) > + bar (); > +} > + > +__attribute__((noinline, noclone)) int > +f9 (int *a, int bit) > +{ > + unsigned int mask = (1u << bit); > + return (__atomic_fetch_xor (a, mask, __ATOMIC_SEQ_CST) & mask) != 0; > +} > + > +__attribute__((noinline, noclone)) int > +f10 (int *a) > +{ > + unsigned int mask = (1u << 7); > + return (__sync_fetch_and_xor (a, mask) & mask) != 0; > +} > + > +__attribute__((noinline, noclone)) int > +f11 (int *a) > +{ > + unsigned int mask = (1u << 13); > + return (__atomic_fetch_xor (a, mask, __ATOMIC_RELAXED) & mask) != 0; > +} > + > +__attribute__((noinline, noclone)) int > +f12 (int *a) > +{ > + unsigned int mask = (1u << 0); > + return (__atomic_fetch_xor (a, mask, __ATOMIC_SEQ_CST) & mask) != 0; > +} > + > +__attribute__((noinline, noclone)) int > +f13 (int *a, int bit) > +{ > + unsigned int mask = (1u << bit); > + return (__sync_fetch_and_and (a, ~mask) & mask) != 0; > +} > + > +__attribute__((noinline, noclone)) int > +f14 (int *a, int bit) > +{ > + unsigned int mask = (1u << bit); > + return (__atomic_fetch_and (a, ~mask, __ATOMIC_RELAXED) & mask) != 0; > +} > + > +__attribute__((noinline, noclone)) int > +f15 (int *a, int bit) > +{ > + unsigned int mask = (1u << bit); > + return (__atomic_fetch_and (a, ~mask, __ATOMIC_SEQ_CST) & mask) != 0; > +} > + > +__attribute__((noinline, noclone)) int > +f16 (int *a) > +{ > + unsigned int mask = (1u << 7); > + return (__sync_fetch_and_and (a, ~mask) & mask) != 0; > +} > + > +__attribute__((noinline, noclone)) int > +f17 (int *a) > +{ > + unsigned int mask = (1u << 13); > + return (__atomic_fetch_and (a, ~mask, __ATOMIC_RELAXED) & mask) != 0; > +} > + > +__attribute__((noinline, noclone)) int > +f18 (int *a) > +{ > + unsigned int mask = (1u << 0); > + return (__atomic_fetch_and (a, ~mask, __ATOMIC_SEQ_CST) & mask) != 0; > +} > + > +__attribute__((noinline, noclone)) unsigned long int > +f19 (unsigned long int *a, int bit) > +{ > + unsigned long int mask = (1ul << bit); > + return (__atomic_xor_fetch (a, mask, __ATOMIC_SEQ_CST) & mask) != 0; > +} > + > +__attribute__((noinline, noclone)) unsigned long int > +f20 (unsigned long int *a) > +{ > + unsigned long int mask = (1ul << 7); > + return (__atomic_xor_fetch (a, mask, __ATOMIC_SEQ_CST) & mask) == 0; > +} > + > +__attribute__((noinline, noclone)) int > +f21 (int *a, int bit) > +{ > + unsigned int mask = (1u << bit); > + return (__sync_fetch_and_or (a, mask) & mask); > +} > + > +__attribute__((noinline, noclone)) unsigned long int > +f22 (unsigned long int *a) > +{ > + unsigned long int mask = (1ul << 7); > + return (__atomic_xor_fetch (a, mask, __ATOMIC_SEQ_CST) & mask); > +} > + > +__attribute__((noinline, noclone)) unsigned long int > +f23 (unsigned long int *a) > +{ > + unsigned long int mask = (1ul << 7); > + return (__atomic_fetch_xor (a, mask, __ATOMIC_SEQ_CST) & mask); > +} > + > +__attribute__((noinline, noclone)) unsigned short int > +f24 (unsigned short int *a) > +{ > + unsigned short int mask = (1u << 7); > + return (__sync_fetch_and_or (a, mask) & mask) != 0; > +} > + > +__attribute__((noinline, noclone)) unsigned short int > +f25 (unsigned short int *a) > +{ > + unsigned short int mask = (1u << 7); > + return (__atomic_fetch_or (a, mask, __ATOMIC_SEQ_CST) & mask) != 0; > +} > + > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*bts" 9 } } */ > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btc" 10 } } */ > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btr" 6 } } */ > --- gcc/testsuite/gcc.target/i386/pr49244-2.c.jj 2016-05-02 > 12:51:51.501983254 +0200 > +++ gcc/testsuite/gcc.target/i386/pr49244-2.c 2016-05-02 14:47:30.240202019 > +0200 > @@ -0,0 +1,108 @@ > +/* PR target/49244 */ > +/* { dg-do run } */ > +/* { dg-options "-O2 -g" } */ > + > +int cnt; > + > +__attribute__((noinline, noclone)) void > +bar (void) > +{ > + cnt++; > +} > + > +#include "pr49244-1.c" > + > +int a; > +long int b; > +unsigned long int c; > +unsigned short int d; > + > +int > +main () > +{ > + __atomic_store_n (&a, 15, __ATOMIC_RELAXED); > + if (f1 (&a, 2) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 15 > + || f1 (&a, 4) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 31) > + __builtin_abort (); > + if (f2 (&a, 1) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 31 > + || f2 (&a, 5) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 63) > + __builtin_abort (); > + __atomic_store_n (&b, 24, __ATOMIC_RELAXED); > + if (f3 (&b, 2) != 1 || __atomic_load_n (&b, __ATOMIC_RELAXED) != 28 > + || f3 (&b, 3) != 0 || __atomic_load_n (&b, __ATOMIC_RELAXED) != 28) > + __builtin_abort (); > + __atomic_store_n (&a, 0, __ATOMIC_RELAXED); > + if (f4 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 128 > + || f4 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 128) > + __builtin_abort (); > + if (f5 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8320 > + || f5 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8320) > + __builtin_abort (); > + if (f6 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321 > + || f6 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321) > + __builtin_abort (); > + if (cnt != 0 > + || (f7 (&a, 7), cnt) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != > 8193 > + || (f7 (&a, 7), cnt) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != > 8321) > + __builtin_abort (); > + if ((f8 (&a, 7), cnt) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != > 8193 > + || (f8 (&a, 7), cnt) != 2 || __atomic_load_n (&a, __ATOMIC_RELAXED) != > 8321) > + __builtin_abort (); > + if (f9 (&a, 13) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 129 > + || f9 (&a, 13) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321) > + __builtin_abort (); > + if (f10 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193 > + || f10 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321) > + __builtin_abort (); > + if (f11 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 129 > + || f11 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321) > + __builtin_abort (); > + if (f12 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8320 > + || f12 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321) > + __builtin_abort (); > + if (f13 (&a, 7) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193 > + || f13 (&a, 7) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193) > + __builtin_abort (); > + if (f14 (&a, 13) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1 > + || f14 (&a, 13) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1) > + __builtin_abort (); > + if (f15 (&a, 0) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0 > + || f15 (&a, 0) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0) > + __builtin_abort (); > + __atomic_store_n (&a, 8321, __ATOMIC_RELAXED); > + if (f16 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193 > + || f16 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193) > + __builtin_abort (); > + if (f17 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1 > + || f17 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1) > + __builtin_abort (); > + if (f18 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0 > + || f18 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0) > + __builtin_abort (); > + if (f19 (&c, 7) != 1 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 128 > + || f19 (&c, 7) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 0) > + __builtin_abort (); > + if (f20 (&c) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 128 > + || f20 (&c) != 1 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 0) > + __builtin_abort (); > + __atomic_store_n (&a, 128, __ATOMIC_RELAXED); > + if (f21 (&a, 4) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 144 > + || f21 (&a, 4) != 16 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 144) > + __builtin_abort (); > + __atomic_store_n (&c, 1, __ATOMIC_RELAXED); > + if (f22 (&c) != 128 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 129 > + || f22 (&c) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 1) > + __builtin_abort (); > + if (f23 (&c) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 129 > + || f23 (&c) != 128 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 1) > + __builtin_abort (); > + if (f24 (&d) != 0 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 128 > + || f24 (&d) != 1 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 128) > + __builtin_abort (); > + __atomic_store_n (&d, 1, __ATOMIC_RELAXED); > + if (f25 (&d) != 0 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 129 > + || f25 (&d) != 1 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 129 > + || cnt != 2) > + __builtin_abort (); > + return 0; > +} > > > Jakub > > -- Richard Biener <rguent...@suse.de> SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)