> Am 16.06.2023 um 16:34 schrieb Jakub Jelinek <ja...@redhat.com>: > > Hi! > > While the design of these builtins in clang is questionable, > rather than being say > unsigned __builtin_addc (unsigned, unsigned, bool, bool *) > so that it is clear they add two [0, 0xffffffff] range numbers > plus one [0, 1] range carry in and give [0, 0xffffffff] range > return plus [0, 1] range carry out, they actually instead > add 3 [0, 0xffffffff] values together but the carry out > isn't then the expected [0, 2] value because > 0xffffffffULL + 0xffffffff + 0xffffffff is 0x2fffffffd, > but just [0, 1] whether there was any overflow at all. > > It is something used in the wild and shorter to write than the > corresponding > #define __builtin_addc(a,b,carry_in,carry_out) \ > ({ unsigned _s; \ > unsigned _c1 = __builtin_uadd_overflow (a, b, &_s); \ > unsigned _c2 = __builtin_uadd_overflow (_s, carry_in, &_s); \ > *(carry_out) = (_c1 | _c2); \ > _s; }) > and so a canned builtin for something people could often use. > It isn't that hard to maintain on the GCC side, as we just lower > it to two .ADD_OVERFLOW calls early, and the already committed > pottern recognization code can then make .UADDC/.USUBC calls out of > that if the carry in is in [0, 1] range and the corresponding > optab is supported by the target. > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? Ok Richard > 2023-06-16 Jakub Jelinek <ja...@redhat.com> > > PR middle-end/79173 > * builtin-types.def (BT_FN_UINT_UINT_UINT_UINT_UINTPTR, > BT_FN_ULONG_ULONG_ULONG_ULONG_ULONGPTR, > BT_FN_ULONGLONG_ULONGLONG_ULONGLONG_ULONGLONG_ULONGLONGPTR): New > types. > * builtins.def (BUILT_IN_ADDC, BUILT_IN_ADDCL, BUILT_IN_ADDCLL, > BUILT_IN_SUBC, BUILT_IN_SUBCL, BUILT_IN_SUBCLL): New builtins. > * builtins.cc (fold_builtin_addc_subc): New function. > (fold_builtin_varargs): Handle BUILT_IN_{ADD,SUB}C{,L,LL}. > * doc/extend.texi (__builtin_addc, __builtin_subc): Document. > > * gcc.target/i386/pr79173-11.c: New test. > * gcc.dg/builtin-addc-1.c: New test. > > --- gcc/builtin-types.def.jj 2023-06-16 12:01:09.622759288 +0200 > +++ gcc/builtin-types.def 2023-06-16 12:04:20.277086893 +0200 > @@ -842,10 +842,17 @@ DEF_FUNCTION_TYPE_4 (BT_FN_PTR_PTR_INT_S > BT_PTR, BT_PTR, BT_INT, BT_SIZE, BT_SIZE) > DEF_FUNCTION_TYPE_4 (BT_FN_UINT_UINT_UINT_UINT_UINT, > BT_UINT, BT_UINT, BT_UINT, BT_UINT, BT_UINT) > +DEF_FUNCTION_TYPE_4 (BT_FN_UINT_UINT_UINT_UINT_UINTPTR, > + BT_UINT, BT_UINT, BT_UINT, BT_UINT, BT_PTR_UINT) > DEF_FUNCTION_TYPE_4 (BT_FN_UINT_FLOAT_FLOAT_FLOAT_FLOAT, > BT_UINT, BT_FLOAT, BT_FLOAT, BT_FLOAT, BT_FLOAT) > DEF_FUNCTION_TYPE_4 (BT_FN_ULONG_ULONG_ULONG_UINT_UINT, > BT_ULONG, BT_ULONG, BT_ULONG, BT_UINT, BT_UINT) > +DEF_FUNCTION_TYPE_4 (BT_FN_ULONG_ULONG_ULONG_ULONG_ULONGPTR, > + BT_ULONG, BT_ULONG, BT_ULONG, BT_ULONG, BT_PTR_ULONG) > +DEF_FUNCTION_TYPE_4 > (BT_FN_ULONGLONG_ULONGLONG_ULONGLONG_ULONGLONG_ULONGLONGPTR, > + BT_ULONGLONG, BT_ULONGLONG, BT_ULONGLONG, BT_ULONGLONG, > + BT_PTR_ULONGLONG) > DEF_FUNCTION_TYPE_4 (BT_FN_STRING_STRING_CONST_STRING_SIZE_SIZE, > BT_STRING, BT_STRING, BT_CONST_STRING, BT_SIZE, BT_SIZE) > DEF_FUNCTION_TYPE_4 (BT_FN_INT_FILEPTR_INT_CONST_STRING_VALIST_ARG, > --- gcc/builtins.def.jj 2023-06-16 12:01:09.622759288 +0200 > +++ gcc/builtins.def 2023-06-16 12:04:20.278086879 +0200 > @@ -934,6 +934,12 @@ DEF_GCC_BUILTIN (BUILT_IN_USUBLL_ > DEF_GCC_BUILTIN (BUILT_IN_UMUL_OVERFLOW, "umul_overflow", > BT_FN_BOOL_UINT_UINT_UINTPTR, ATTR_NOTHROW_NONNULL_LEAF_LIST) > DEF_GCC_BUILTIN (BUILT_IN_UMULL_OVERFLOW, "umull_overflow", > BT_FN_BOOL_ULONG_ULONG_ULONGPTR, ATTR_NOTHROW_NONNULL_LEAF_LIST) > DEF_GCC_BUILTIN (BUILT_IN_UMULLL_OVERFLOW, "umulll_overflow", > BT_FN_BOOL_ULONGLONG_ULONGLONG_ULONGLONGPTR, ATTR_NOTHROW_NONNULL_LEAF_LIST) > +DEF_GCC_BUILTIN (BUILT_IN_ADDC, "addc", > BT_FN_UINT_UINT_UINT_UINT_UINTPTR, ATTR_NOTHROW_NONNULL_LEAF_LIST) > +DEF_GCC_BUILTIN (BUILT_IN_ADDCL, "addcl", > BT_FN_ULONG_ULONG_ULONG_ULONG_ULONGPTR, ATTR_NOTHROW_NONNULL_LEAF_LIST) > +DEF_GCC_BUILTIN (BUILT_IN_ADDCLL, "addcll", > BT_FN_ULONGLONG_ULONGLONG_ULONGLONG_ULONGLONG_ULONGLONGPTR, > ATTR_NOTHROW_NONNULL_LEAF_LIST) > +DEF_GCC_BUILTIN (BUILT_IN_SUBC, "subc", > BT_FN_UINT_UINT_UINT_UINT_UINTPTR, ATTR_NOTHROW_NONNULL_LEAF_LIST) > +DEF_GCC_BUILTIN (BUILT_IN_SUBCL, "subcl", > BT_FN_ULONG_ULONG_ULONG_ULONG_ULONGPTR, ATTR_NOTHROW_NONNULL_LEAF_LIST) > +DEF_GCC_BUILTIN (BUILT_IN_SUBCLL, "subcll", > BT_FN_ULONGLONG_ULONGLONG_ULONGLONG_ULONGLONG_ULONGLONGPTR, > ATTR_NOTHROW_NONNULL_LEAF_LIST) > > /* Category: miscellaneous builtins. */ > DEF_LIB_BUILTIN (BUILT_IN_ABORT, "abort", BT_FN_VOID, > ATTR_TMPURE_NORETURN_NOTHROW_LEAF_COLD_LIST) > --- gcc/builtins.cc.jj 2023-06-13 18:23:37.141794072 +0200 > +++ gcc/builtins.cc 2023-06-16 13:11:25.094406298 +0200 > @@ -9555,6 +9555,51 @@ fold_builtin_arith_overflow (location_t > return build2_loc (loc, COMPOUND_EXPR, boolean_type_node, store, ovfres); > } > > +/* Fold __builtin_{add,sub}c{,l,ll} into pair of internal functions > + that return both result of arithmetics and overflowed boolean > + flag in a complex integer result. */ > + > +static tree > +fold_builtin_addc_subc (location_t loc, enum built_in_function fcode, > + tree *args) > +{ > + enum internal_fn ifn; > + > + switch (fcode) > + { > + case BUILT_IN_ADDC: > + case BUILT_IN_ADDCL: > + case BUILT_IN_ADDCLL: > + ifn = IFN_ADD_OVERFLOW; > + break; > + case BUILT_IN_SUBC: > + case BUILT_IN_SUBCL: > + case BUILT_IN_SUBCLL: > + ifn = IFN_SUB_OVERFLOW; > + break; > + default: > + gcc_unreachable (); > + } > + > + tree type = TREE_TYPE (args[0]); > + tree ctype = build_complex_type (type); > + tree call = build_call_expr_internal_loc (loc, ifn, ctype, 2, > + args[0], args[1]); > + tree tgt = save_expr (call); > + tree intres = build1_loc (loc, REALPART_EXPR, type, tgt); > + tree ovfres = build1_loc (loc, IMAGPART_EXPR, type, tgt); > + call = build_call_expr_internal_loc (loc, ifn, ctype, 2, > + intres, args[2]); > + tgt = save_expr (call); > + intres = build1_loc (loc, REALPART_EXPR, type, tgt); > + tree ovfres2 = build1_loc (loc, IMAGPART_EXPR, type, tgt); > + ovfres = build2_loc (loc, BIT_IOR_EXPR, type, ovfres, ovfres2); > + tree mem_arg3 = build_fold_indirect_ref_loc (loc, args[3]); > + tree store > + = fold_build2_loc (loc, MODIFY_EXPR, void_type_node, mem_arg3, ovfres); > + return build2_loc (loc, COMPOUND_EXPR, type, store, intres); > +} > + > /* Fold a call to __builtin_FILE to a constant string. */ > > static inline tree > @@ -10843,6 +10888,14 @@ fold_builtin_varargs (location_t loc, tr > ret = fold_builtin_fpclassify (loc, args, nargs); > break; > > + case BUILT_IN_ADDC: > + case BUILT_IN_ADDCL: > + case BUILT_IN_ADDCLL: > + case BUILT_IN_SUBC: > + case BUILT_IN_SUBCL: > + case BUILT_IN_SUBCLL: > + return fold_builtin_addc_subc (loc, fcode, args); > + > default: > break; > } > --- gcc/doc/extend.texi.jj 2023-06-16 12:01:15.015683954 +0200 > +++ gcc/doc/extend.texi 2023-06-16 12:40:10.179841300 +0200 > @@ -12839,6 +12839,50 @@ after addition, conditional jump on carr > > @enddefbuiltin > > +@defbuiltin{unsigned int __builtin_addc (unsigned int a, unsigned int b, > unsigned int carry_in, unsigned int *carry_out)} > +@defbuiltinx{unsigned long int __builtin_addcl (unsigned long int a, > unsigned long int b, unsigned int carry_in, unsigned long int *carry_out)} > +@defbuiltinx{unsigned long long int __builtin_addcll (unsigned long long int > a, unsigned long long int b, unsigned long long int carry_in, unsigned long > long int *carry_out)} > + > +These built-in functions are equivalent to: > +@smallexample > + (@{ __typeof__ (@var{a}) s; \ > + __typeof__ (@var{a}) c1 = __builtin_add_overflow (@var{a}, @var{b}, > &s); \ > + __typeof__ (@var{a}) c2 = __builtin_add_overflow (s, @var{carry_in}, > &s); \ > + *(@var{carry_out}) = c1 | c2; \ > + s; @}) > +@end smallexample > + > +i.e.@: they add 3 unsigned values, set what the last argument > +points to to 1 if any of the two additions overflowed (otherwise 0) > +and return the sum of those 3 unsigned values. Note, while all > +the first 3 arguments can have arbitrary values, better code will be > +emitted if one of them (preferrably the third one) has only values > +0 or 1 (i.e.@: carry-in). > + > +@enddefbuiltin > + > +@defbuiltin{unsigned int __builtin_subc (unsigned int a, unsigned int b, > unsigned int carry_in, unsigned int *carry_out)} > +@defbuiltinx{unsigned long int __builtin_subcl (unsigned long int a, > unsigned long int b, unsigned int carry_in, unsigned long int *carry_out)} > +@defbuiltinx{unsigned long long int __builtin_subcll (unsigned long long int > a, unsigned long long int b, unsigned long long int carry_in, unsigned long > long int *carry_out)} > + > +These built-in functions are equivalent to: > +@smallexample > + (@{ __typeof__ (@var{a}) s; \ > + __typeof__ (@var{a}) c1 = __builtin_sub_overflow (@var{a}, @var{b}, > &s); \ > + __typeof__ (@var{a}) c2 = __builtin_sub_overflow (s, @var{carry_in}, > &s); \ > + *(@var{carry_out}) = c1 | c2; \ > + s; @}) > +@end smallexample > + > +i.e.@: they subtract 2 unsigned values from the first unsigned value, > +set what the last argument points to to 1 if any of the two subtractions > +overflowed (otherwise 0) and return the result of the subtractions. > +Note, while all the first 3 arguments can have arbitrary values, better code > +will be emitted if one of them (preferrably the third one) has only values > +0 or 1 (i.e.@: carry-in). > + > +@enddefbuiltin > + > @node x86 specific memory model extensions for transactional memory > @section x86-Specific Memory Model Extensions for Transactional Memory > > --- gcc/testsuite/gcc.target/i386/pr79173-11.c.jj 2023-06-16 > 12:46:12.659735680 +0200 > +++ gcc/testsuite/gcc.target/i386/pr79173-11.c 2023-06-16 > 12:46:41.864323951 +0200 > @@ -0,0 +1,39 @@ > +/* PR middle-end/79173 */ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -fno-stack-protector -masm=att" } */ > +/* { dg-final { scan-assembler-times "addq\t%r\[^\n\r]*, \\\(%rdi\\\)" 1 { > target lp64 } } } */ > +/* { dg-final { scan-assembler-times "adcq\t%r\[^\n\r]*, 8\\\(%rdi\\\)" 1 { > target lp64 } } } */ > +/* { dg-final { scan-assembler-times "adcq\t%r\[^\n\r]*, 16\\\(%rdi\\\)" 1 { > target lp64 } } } */ > +/* { dg-final { scan-assembler-times "adcq\t%r\[^\n\r]*, 24\\\(%rdi\\\)" 1 { > target lp64 } } } */ > +/* { dg-final { scan-assembler-times "subq\t%r\[^\n\r]*, \\\(%rdi\\\)" 1 { > target lp64 } } } */ > +/* { dg-final { scan-assembler-times "sbbq\t%r\[^\n\r]*, 8\\\(%rdi\\\)" 1 { > target lp64 } } } */ > +/* { dg-final { scan-assembler-times "sbbq\t%r\[^\n\r]*, 16\\\(%rdi\\\)" 1 { > target lp64 } } } */ > +/* { dg-final { scan-assembler-times "sbbq\t%r\[^\n\r]*, 24\\\(%rdi\\\)" 1 { > target lp64 } } } */ > +/* { dg-final { scan-assembler-times "addl\t%e\[^\n\r]*, > \\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */ > +/* { dg-final { scan-assembler-times "adcl\t%e\[^\n\r]*, > 4\\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */ > +/* { dg-final { scan-assembler-times "adcl\t%e\[^\n\r]*, > 8\\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */ > +/* { dg-final { scan-assembler-times "adcl\t%e\[^\n\r]*, > 12\\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */ > +/* { dg-final { scan-assembler-times "subl\t%e\[^\n\r]*, > \\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */ > +/* { dg-final { scan-assembler-times "sbbl\t%e\[^\n\r]*, > 4\\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */ > +/* { dg-final { scan-assembler-times "sbbl\t%e\[^\n\r]*, > 8\\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */ > +/* { dg-final { scan-assembler-times "sbbl\t%e\[^\n\r]*, > 12\\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */ > + > +void > +foo (unsigned long *p, unsigned long *q) > +{ > + unsigned long c; > + p[0] = __builtin_addcl (p[0], q[0], 0, &c); > + p[1] = __builtin_addcl (p[1], q[1], c, &c); > + p[2] = __builtin_addcl (p[2], q[2], c, &c); > + p[3] = __builtin_addcl (p[3], q[3], c, &c); > +} > + > +void > +bar (unsigned long *p, unsigned long *q) > +{ > + unsigned long c; > + p[0] = __builtin_subcl (p[0], q[0], 0, &c); > + p[1] = __builtin_subcl (p[1], q[1], c, &c); > + p[2] = __builtin_subcl (p[2], q[2], c, &c); > + p[3] = __builtin_subcl (p[3], q[3], c, &c); > +} > --- gcc/testsuite/gcc.dg/builtin-addc-1.c.jj 2023-06-16 12:47:57.247261195 > +0200 > +++ gcc/testsuite/gcc.dg/builtin-addc-1.c 2023-06-16 13:03:19.269260147 > +0200 > @@ -0,0 +1,101 @@ > +/* { dg-do run } */ > +/* { dg-options "-O2 -g" } */ > + > +int > +main () > +{ > + unsigned int c; > + unsigned long cl; > + unsigned long long cll; > + if (__builtin_addc (1, 42, 0, &c) != 43 || c != 0) > + __builtin_abort (); > + if (__builtin_addc (1, 42, 15, &c) != 58 || c != 0) > + __builtin_abort (); > + if (__builtin_addc (-2U, -3U, -4U, &c) != -9U || c != 1) > + __builtin_abort (); > + if (__builtin_addc (-2U, 1, 0, &c) != -1U || c != 0) > + __builtin_abort (); > + if (__builtin_addc (-2U, 1, 1, &c) != 0 || c != 1) > + __builtin_abort (); > + if (__builtin_addc (-2U, 2, 0, &c) != 0 || c != 1) > + __builtin_abort (); > + if (__builtin_addc (-2U, 0, 2, &c) != 0 || c != 1) > + __builtin_abort (); > + if (__builtin_addcl (1L, 42L, 0L, &cl) != 43 || cl != 0L) > + __builtin_abort (); > + if (__builtin_addcl (1L, 42L, 15L, &cl) != 58 || cl != 0L) > + __builtin_abort (); > + if (__builtin_addcl (-2UL, -3UL, -4UL, &cl) != -9UL || cl != 1L) > + __builtin_abort (); > + if (__builtin_addcl (-2UL, 1L, 0L, &cl) != -1UL || cl != 0L) > + __builtin_abort (); > + if (__builtin_addcl (-2UL, 1L, 1L, &cl) != 0 || cl != 1L) > + __builtin_abort (); > + if (__builtin_addcl (-2UL, 2L, 0L, &cl) != 0 || cl != 1L) > + __builtin_abort (); > + if (__builtin_addcl (-2UL, 0L, 2L, &cl) != 0 || cl != 1L) > + __builtin_abort (); > + if (__builtin_addcll (1LL, 42LL, 0LL, &cll) != 43 || cll != 0LL) > + __builtin_abort (); > + if (__builtin_addcll (1LL, 42LL, 15LL, &cll) != 58 || cll != 0LL) > + __builtin_abort (); > + if (__builtin_addcll (-2ULL, -3ULL, -4ULL, &cll) != -9ULL || cll != 1LL) > + __builtin_abort (); > + if (__builtin_addcll (-2ULL, 1LL, 0LL, &cll) != -1ULL || cll != 0LL) > + __builtin_abort (); > + if (__builtin_addcll (-2ULL, 1LL, 1LL, &cll) != 0 || cll != 1LL) > + __builtin_abort (); > + if (__builtin_addcll (-2ULL, 2LL, 0LL, &cll) != 0 || cll != 1LL) > + __builtin_abort (); > + if (__builtin_addcll (-2ULL, 0LL, 2LL, &cll) != 0 || cll != 1LL) > + __builtin_abort (); > + if (__builtin_subc (42, 42, 0, &c) != 0 || c != 0) > + __builtin_abort (); > + if (__builtin_subc (42, 42, 1, &c) != -1U || c != 1) > + __builtin_abort (); > + if (__builtin_subc (1, -3U, -4U, &c) != 8 || c != 1) > + __builtin_abort (); > + if (__builtin_subc (-2U, 1, 0, &c) != -3U || c != 0) > + __builtin_abort (); > + if (__builtin_subc (-2U, -1U, 0, &c) != -1U || c != 1) > + __builtin_abort (); > + if (__builtin_subc (-2U, -2U, 0, &c) != 0 || c != 0) > + __builtin_abort (); > + if (__builtin_subc (-2U, -2U, 1, &c) != -1U || c != 1) > + __builtin_abort (); > + if (__builtin_subc (-2U, 1, -2U, &c) != -1U || c != 1) > + __builtin_abort (); > + if (__builtin_subcl (42L, 42L, 0L, &cl) != 0L || cl != 0L) > + __builtin_abort (); > + if (__builtin_subcl (42L, 42L, 1L, &cl) != -1UL || cl != 1L) > + __builtin_abort (); > + if (__builtin_subcl (1L, -3UL, -4UL, &cl) != 8L || cl != 1L) > + __builtin_abort (); > + if (__builtin_subcl (-2UL, 1L, 0L, &cl) != -3UL || cl != 0L) > + __builtin_abort (); > + if (__builtin_subcl (-2UL, -1UL, 0L, &cl) != -1UL || cl != 1L) > + __builtin_abort (); > + if (__builtin_subcl (-2UL, -2UL, 0L, &cl) != 0L || cl != 0L) > + __builtin_abort (); > + if (__builtin_subcl (-2UL, -2UL, 1L, &cl) != -1UL || cl != 1L) > + __builtin_abort (); > + if (__builtin_subcl (-2UL, 1L, -2UL, &cl) != -1UL || cl != 1L) > + __builtin_abort (); > + if (__builtin_subcll (42LL, 42LL, 0LL, &cll) != 0LL || cll != 0LL) > + __builtin_abort (); > + if (__builtin_subcll (42LL, 42LL, 1LL, &cll) != -1ULL || cll != 1LL) > + __builtin_abort (); > + if (__builtin_subcll (1LL, -3ULL, -4ULL, &cll) != 8LL || cll != 1LL) > + __builtin_abort (); > + if (__builtin_subcll (-2ULL, 1LL, 0LL, &cll) != -3ULL || cll != 0LL) > + __builtin_abort (); > + if (__builtin_subcll (-2ULL, -1ULL, 0LL, &cll) != -1ULL || cll != 1LL) > + __builtin_abort (); > + if (__builtin_subcll (-2ULL, -2ULL, 0LL, &cll) != 0LL || cll != 0LL) > + __builtin_abort (); > + if (__builtin_subcll (-2ULL, -2ULL, 1LL, &cll) != -1ULL || cll != 1LL) > + __builtin_abort (); > + if (__builtin_subcll (-2ULL, 1LL, -2ULL, &cll) != -1ULL || cll != 1LL) > + __builtin_abort (); > + return 0; > +} > > Jakub >
Re: [PATCH] builtins: Add support for clang compatible __builtin_{add, sub}c{, l, ll} [PR79173]
Richard Biener via Gcc-patches Fri, 16 Jun 2023 10:06:54 -0700
- [PATCH] builtins: Add support for clang com... Jakub Jelinek via Gcc-patches
- Re: [PATCH] builtins: Add support for ... Richard Biener via Gcc-patches