Hi! The following patch implements the lowering of __atomic_*fetch* functions where first argument is a pointer to (optionally _Atomic) _BitInt which either doesn't have size 1, 2, 4, 8 or 16 bytes or has 16 byte size but target doesn't support TImode. Patch on top of the _BitInt patch series.
Tested on x86_64-linux. 2023-08-03 Jakub Jelinek <ja...@redhat.com> PR c/102989 gcc/c-family/ * c-common.cc (sync_resolve_size): Add ORIG_FORMAT argument. If FETCH && !ORIG_FORMAT, type is BITINT_TYPE, return -1 if size isn't one of 1, 2, 4, 8 or 16 or if it is 16 but TImode is not supported. (atomic_bitint_fetch_using_cas_loop): New function. (resolve_overloaded_builtin): Adjust sync_resolve_size caller. If -1 is returned, use atomic_bitint_fetch_using_cas_loop to lower it. Formatting fix. gcc/testsuite/ * gcc.dg/bitint-18.c: New test. --- gcc/c-family/c-common.cc.jj 2023-07-11 15:28:55.119673958 +0200 +++ gcc/c-family/c-common.cc 2023-08-03 12:10:50.852085519 +0200 @@ -7190,12 +7190,16 @@ speculation_safe_value_resolve_return (t /* A helper function for resolve_overloaded_builtin in resolving the overloaded __sync_ builtins. Returns a positive power of 2 if the first operand of PARAMS is a pointer to a supported data type. - Returns 0 if an error is encountered. + Returns 0 if an error is encountered. Return -1 for _BitInt + __atomic*fetch* with unsupported type which should be handled by + a cas loop. FETCH is true when FUNCTION is one of the _FETCH_OP_ or _OP_FETCH_ + built-ins. ORIG_FORMAT is for __sync_* rather than __atomic_* built-ins. */ static int -sync_resolve_size (tree function, vec<tree, va_gc> *params, bool fetch) +sync_resolve_size (tree function, vec<tree, va_gc> *params, bool fetch, + bool orig_format) { /* Type of the argument. */ tree argtype; @@ -7230,9 +7234,19 @@ sync_resolve_size (tree function, vec<tr goto incompatible; size = tree_to_uhwi (TYPE_SIZE_UNIT (type)); + if (size == 16 + && fetch + && !orig_format + && TREE_CODE (type) == BITINT_TYPE + && !targetm.scalar_mode_supported_p (TImode)) + return -1; + if (size == 1 || size == 2 || size == 4 || size == 8 || size == 16) return size; + if (fetch && !orig_format && TREE_CODE (type) == BITINT_TYPE) + return -1; + incompatible: /* Issue the diagnostic only if the argument is valid, otherwise it would be redundant at best and could be misleading. */ @@ -7849,6 +7863,223 @@ resolve_overloaded_atomic_store (locatio } +/* Emit __atomic*fetch* on _BitInt which doesn't have a size of + 1, 2, 4, 8 or 16 bytes using __atomic_compare_exchange loop. + ORIG_CODE is the DECL_FUNCTION_CODE of ORIG_FUNCTION and + ORIG_PARAMS arguments of the call. */ + +static tree +atomic_bitint_fetch_using_cas_loop (location_t loc, + enum built_in_function orig_code, + tree orig_function, + vec<tree, va_gc> *orig_params) +{ + enum tree_code code = ERROR_MARK; + bool return_old_p = false; + switch (orig_code) + { + case BUILT_IN_ATOMIC_ADD_FETCH_N: + code = PLUS_EXPR; + break; + case BUILT_IN_ATOMIC_SUB_FETCH_N: + code = MINUS_EXPR; + break; + case BUILT_IN_ATOMIC_AND_FETCH_N: + code = BIT_AND_EXPR; + break; + case BUILT_IN_ATOMIC_NAND_FETCH_N: + break; + case BUILT_IN_ATOMIC_XOR_FETCH_N: + code = BIT_XOR_EXPR; + break; + case BUILT_IN_ATOMIC_OR_FETCH_N: + code = BIT_IOR_EXPR; + break; + case BUILT_IN_ATOMIC_FETCH_ADD_N: + code = PLUS_EXPR; + return_old_p = true; + break; + case BUILT_IN_ATOMIC_FETCH_SUB_N: + code = MINUS_EXPR; + return_old_p = true; + break; + case BUILT_IN_ATOMIC_FETCH_AND_N: + code = BIT_AND_EXPR; + return_old_p = true; + break; + case BUILT_IN_ATOMIC_FETCH_NAND_N: + return_old_p = true; + break; + case BUILT_IN_ATOMIC_FETCH_XOR_N: + code = BIT_XOR_EXPR; + return_old_p = true; + break; + case BUILT_IN_ATOMIC_FETCH_OR_N: + code = BIT_IOR_EXPR; + return_old_p = true; + break; + default: + gcc_unreachable (); + } + + if (orig_params->length () != 3) + { + if (orig_params->length () < 3) + error_at (loc, "too few arguments to function %qE", orig_function); + else + error_at (loc, "too many arguments to function %qE", orig_function); + return error_mark_node; + } + + tree stmts = push_stmt_list (); + + tree nonatomic_lhs_type = TREE_TYPE (TREE_TYPE ((*orig_params)[0])); + nonatomic_lhs_type = TYPE_MAIN_VARIANT (nonatomic_lhs_type); + gcc_assert (TREE_CODE (nonatomic_lhs_type) == BITINT_TYPE); + + tree lhs_addr = (*orig_params)[0]; + tree val = convert (nonatomic_lhs_type, (*orig_params)[1]); + tree model = convert (integer_type_node, (*orig_params)[2]); + if (TREE_SIDE_EFFECTS (lhs_addr)) + { + tree var = create_tmp_var_raw (TREE_TYPE (lhs_addr)); + lhs_addr = build4 (TARGET_EXPR, TREE_TYPE (lhs_addr), var, lhs_addr, + NULL_TREE, NULL_TREE); + add_stmt (lhs_addr); + } + if (TREE_SIDE_EFFECTS (val)) + { + tree var = create_tmp_var_raw (nonatomic_lhs_type); + val = build4 (TARGET_EXPR, nonatomic_lhs_type, var, val, NULL_TREE, + NULL_TREE); + add_stmt (val); + } + if (TREE_SIDE_EFFECTS (model)) + { + tree var = create_tmp_var_raw (integer_type_node); + model = build4 (TARGET_EXPR, integer_type_node, var, model, NULL_TREE, + NULL_TREE); + add_stmt (model); + } + + tree old = create_tmp_var_raw (nonatomic_lhs_type); + tree old_addr = build_unary_op (loc, ADDR_EXPR, old, false); + TREE_ADDRESSABLE (old) = 1; + suppress_warning (old); + + tree newval = create_tmp_var_raw (nonatomic_lhs_type); + tree newval_addr = build_unary_op (loc, ADDR_EXPR, newval, false); + TREE_ADDRESSABLE (newval) = 1; + suppress_warning (newval); + + tree loop_decl = create_artificial_label (loc); + tree loop_label = build1 (LABEL_EXPR, void_type_node, loop_decl); + + tree done_decl = create_artificial_label (loc); + tree done_label = build1 (LABEL_EXPR, void_type_node, done_decl); + + vec<tree, va_gc> *params; + vec_alloc (params, 6); + + /* __atomic_load (addr, &old, SEQ_CST). */ + tree fndecl = builtin_decl_explicit (BUILT_IN_ATOMIC_LOAD); + params->quick_push (lhs_addr); + params->quick_push (old_addr); + params->quick_push (build_int_cst (integer_type_node, MEMMODEL_RELAXED)); + tree func_call = resolve_overloaded_builtin (loc, fndecl, params); + if (func_call == NULL_TREE) + func_call = build_function_call_vec (loc, vNULL, fndecl, params, NULL); + old = build4 (TARGET_EXPR, nonatomic_lhs_type, old, func_call, NULL_TREE, + NULL_TREE); + add_stmt (old); + params->truncate (0); + + /* loop: */ + add_stmt (loop_label); + + /* newval = old + val; */ + tree rhs; + switch (code) + { + case PLUS_EXPR: + case MINUS_EXPR: + if (!TYPE_OVERFLOW_WRAPS (nonatomic_lhs_type)) + { + tree utype + = build_bitint_type (TYPE_PRECISION (nonatomic_lhs_type), 1); + rhs = convert (nonatomic_lhs_type, + build2_loc (loc, code, utype, + convert (utype, old), + convert (utype, val))); + } + else + rhs = build2_loc (loc, code, nonatomic_lhs_type, old, val); + break; + case BIT_AND_EXPR: + case BIT_IOR_EXPR: + case BIT_XOR_EXPR: + rhs = build2_loc (loc, code, nonatomic_lhs_type, old, val); + break; + case ERROR_MARK: + rhs = build2_loc (loc, BIT_AND_EXPR, nonatomic_lhs_type, + build1_loc (loc, BIT_NOT_EXPR, + nonatomic_lhs_type, old), val); + break; + default: + gcc_unreachable (); + } + rhs = build4 (TARGET_EXPR, nonatomic_lhs_type, newval, rhs, NULL_TREE, + NULL_TREE); + SET_EXPR_LOCATION (rhs, loc); + add_stmt (rhs); + + /* if (__atomic_compare_exchange (addr, &old, &new, false, model, model)) + goto done; */ + fndecl = builtin_decl_explicit (BUILT_IN_ATOMIC_COMPARE_EXCHANGE); + params->quick_push (lhs_addr); + params->quick_push (old_addr); + params->quick_push (newval_addr); + params->quick_push (integer_zero_node); + params->quick_push (model); + if (tree_fits_uhwi_p (model) + && (tree_to_uhwi (model) == MEMMODEL_RELEASE + || tree_to_uhwi (model) == MEMMODEL_ACQ_REL)) + params->quick_push (build_int_cst (integer_type_node, MEMMODEL_RELAXED)); + else + params->quick_push (model); + func_call = resolve_overloaded_builtin (loc, fndecl, params); + if (func_call == NULL_TREE) + func_call = build_function_call_vec (loc, vNULL, fndecl, params, NULL); + + tree goto_stmt = build1 (GOTO_EXPR, void_type_node, done_decl); + SET_EXPR_LOCATION (goto_stmt, loc); + + tree stmt + = build3 (COND_EXPR, void_type_node, func_call, goto_stmt, NULL_TREE); + SET_EXPR_LOCATION (stmt, loc); + add_stmt (stmt); + + /* goto loop; */ + goto_stmt = build1 (GOTO_EXPR, void_type_node, loop_decl); + SET_EXPR_LOCATION (goto_stmt, loc); + add_stmt (goto_stmt); + + /* done: */ + add_stmt (done_label); + + tree ret = create_tmp_var_raw (nonatomic_lhs_type); + stmt = build2_loc (loc, MODIFY_EXPR, void_type_node, ret, + return_old_p ? old : newval); + add_stmt (stmt); + + /* Finish the compound statement. */ + stmts = pop_stmt_list (stmts); + + return build4 (TARGET_EXPR, nonatomic_lhs_type, ret, stmts, NULL_TREE, + NULL_TREE); +} + + /* Some builtin functions are placeholders for other expressions. This function should be called immediately after parsing the call expression before surrounding code has committed to the type of the expression. @@ -8030,19 +8261,22 @@ resolve_overloaded_builtin (location_t l /* The following are not _FETCH_OPs and must be accepted with pointers to _Bool (or C++ bool). */ if (fetch_op) - fetch_op = - (orig_code != BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_N - && orig_code != BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N - && orig_code != BUILT_IN_SYNC_LOCK_TEST_AND_SET_N - && orig_code != BUILT_IN_SYNC_LOCK_RELEASE_N); + fetch_op = (orig_code != BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_N + && orig_code != BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N + && orig_code != BUILT_IN_SYNC_LOCK_TEST_AND_SET_N + && orig_code != BUILT_IN_SYNC_LOCK_RELEASE_N); - int n = sync_resolve_size (function, params, fetch_op); + int n = sync_resolve_size (function, params, fetch_op, orig_format); tree new_function, first_param, result; enum built_in_function fncode; if (n == 0) return error_mark_node; + if (n == -1) + return atomic_bitint_fetch_using_cas_loop (loc, orig_code, + function, params); + fncode = (enum built_in_function)((int)orig_code + exact_log2 (n) + 1); new_function = builtin_decl_explicit (fncode); if (!sync_resolve_params (loc, function, new_function, params, --- gcc/testsuite/gcc.dg/bitint-18.c.jj 2023-08-03 12:26:35.510922996 +0200 +++ gcc/testsuite/gcc.dg/bitint-18.c 2023-08-03 12:26:42.114831050 +0200 @@ -0,0 +1,44 @@ +/* PR c/102989 */ +/* { dg-do compile { target bitint } } */ +/* { dg-options "-std=c2x -pedantic-errors" } */ + +_Atomic _BitInt(15) a; +_Atomic(_BitInt(15)) b; +_Atomic _BitInt(115) c; +_Atomic _BitInt(192) d; +_Atomic _BitInt(575) e; +unsigned _BitInt(575) f; + +__attribute__((noipa)) _BitInt(575) +foo (_BitInt(575) x) +{ + return x; +} + +__attribute__((noipa)) int +bar (int x) +{ + return x; +} + +__attribute__((noipa)) _Atomic _BitInt(575) * +baz (_Atomic _BitInt(575) *x) +{ + return x; +} + +int +main () +{ + a += 1wb; + b -= 2wb; + c *= 3wb; + d /= 4wb; + e -= 5wb; + f = __atomic_fetch_add (&e, 54342985743985743985743895743834298574985734895743895734895wb, __ATOMIC_SEQ_CST); + f += __atomic_sub_fetch (&e, 13110356772307144130089534440127211568864891923061809853784155727841516341877716905506658630804426134644404380556711020290072702485839594283061059349912463486203837251238365wb, __ATOMIC_SEQ_CST); + f += __atomic_fetch_and (&e, -33740418462630594385361724744395454079240140931656245750192534103967695265126850678980088699287669565365078793986191778469857714756111026776864987769580622009237241167211461wb, __ATOMIC_RELAXED); + f += __atomic_xor_fetch (&e, 30799001892772360282132495459823194445423296347702377756575214695893559890977912003055702776548378201752339680602420936304294728688029412276600086349055079523071860836114234wb, __ATOMIC_SEQ_CST); + f += __atomic_fetch_or (baz (&e), foo (-6581969867283727911005990155704642154324773504588160884865628865547696324844988049982401783508268917375066790729408659617189350524019843499435572226770089390885472550659255wb), bar (__ATOMIC_RELAXED)); + f += __atomic_nand_fetch (&e, 55047840194947228224723671648125013926111290688378416557548660662319034233151051252215595447712248992759177463741832904590457754423713378627482465906620631734790561114905369wb, __ATOMIC_ACQ_REL); +} Jakub