"Andre Vieira (lists)" <andre.simoesdiasvie...@arm.com> writes: > Hi, > > This patch adds support for the ACLE Data Intrinsics to the AArch64 port. > > Bootstrapped and regression tested on aarch64-none-linux. > > OK for trunk?
Sorry for the slow review. > > gcc/ChangeLog: > > 2022-06-10 Andre Vieira <andre.simoesdiasvie...@arm.com> > > * config/aarch64/aarch64.md (rbit<mode>2): Rename this ... > (@aarch64_rbit<mode>): ... this and change it in... > (ffs<mode>2,ctz<mode>2): ... here. > (@aarch64_rev16<mode>): New. > * config/aarch64/aarch64-builtins.cc: (aarch64_builtins): > Define the following enum AARCH64_REV16, AARCH64_REV16L, > AARCH64_REV16LL, > AARCH64_RBIT, AARCH64_RBITL, AARCH64_RBITLL. > (aarch64_init_data_intrinsics): New. > (handle_arm_acle_h): Add call to aarch64_init_data_intrinsics. > (aarch64_expand_builtin_data_intrinsic): New. > (aarch64_general_expand_builtin): Add call to > aarch64_expand_builtin_data_intrinsic. > * config/aarch64/arm_acle.h (__clz, __clzl, __clzll, __cls, > __clsl, __clsll, __rbit, > __rbitl, __rbitll, __rev, __revl, __revll, __rev16, __rev16l, > __rev16ll, __ror, __rorl, > __rorll, __revsh): New. > > gcc/testsuite/ChangeLog: > > 2022-06-10 Andre Vieira <andre.simoesdiasvie...@arm.com> > > * gcc.target/aarch64/acle/data-intrinsics.c: New test. > > diff --git a/gcc/config/aarch64/aarch64-builtins.cc > b/gcc/config/aarch64/aarch64-builtins.cc > index > e0a741ac663188713e21f457affa57217d074783..91a687dee13a27c21f0c50de9ba777aa900d6096 > 100644 > --- a/gcc/config/aarch64/aarch64-builtins.cc > +++ b/gcc/config/aarch64/aarch64-builtins.cc > @@ -613,6 +613,12 @@ enum aarch64_builtins > AARCH64_LS64_BUILTIN_ST64B, > AARCH64_LS64_BUILTIN_ST64BV, > AARCH64_LS64_BUILTIN_ST64BV0, > + AARCH64_REV16, > + AARCH64_REV16L, > + AARCH64_REV16LL, > + AARCH64_RBIT, > + AARCH64_RBITL, > + AARCH64_RBITLL, > AARCH64_BUILTIN_MAX > }; > > @@ -1664,10 +1670,41 @@ aarch64_init_ls64_builtins (void) > = aarch64_general_add_builtin (data[i].name, data[i].type, > data[i].code); > } > > +static void > +aarch64_init_data_intrinsics (void) > +{ > + tree uint32_fntype = build_function_type_list (uint32_type_node, > + uint32_type_node, NULL_TREE); > + tree long_fntype = build_function_type_list (long_unsigned_type_node, > + long_unsigned_type_node, > + NULL_TREE); Very minor, but ulong_fntype might be clearer, since the other two variable names are explicitly unsigned. > + tree uint64_fntype = build_function_type_list (uint64_type_node, > + uint64_type_node, NULL_TREE); > + aarch64_builtin_decls[AARCH64_REV16] > + = aarch64_general_add_builtin ("__builtin_aarch64_rev16", uint32_fntype, > + AARCH64_REV16); > + aarch64_builtin_decls[AARCH64_REV16L] > + = aarch64_general_add_builtin ("__builtin_aarch64_rev16l", long_fntype, > + AARCH64_REV16L); > + aarch64_builtin_decls[AARCH64_REV16LL] > + = aarch64_general_add_builtin ("__builtin_aarch64_rev16ll", > uint64_fntype, > + AARCH64_REV16LL); > + aarch64_builtin_decls[AARCH64_RBIT] > + = aarch64_general_add_builtin ("__builtin_aarch64_rbit", uint32_fntype, > + AARCH64_RBIT); > + aarch64_builtin_decls[AARCH64_RBITL] > + = aarch64_general_add_builtin ("__builtin_aarch64_rbitl", long_fntype, > + AARCH64_RBITL); > + aarch64_builtin_decls[AARCH64_RBITLL] > + = aarch64_general_add_builtin ("__builtin_aarch64_rbitll", uint64_fntype, > + AARCH64_RBITLL); > +} > + > /* Implement #pragma GCC aarch64 "arm_acle.h". */ > void > handle_arm_acle_h (void) > { > + aarch64_init_data_intrinsics (); > if (TARGET_LS64) > aarch64_init_ls64_builtins (); > } > @@ -2393,6 +2430,32 @@ aarch64_expand_builtin_memtag (int fcode, tree exp, > rtx target) > emit_insn (pat); > return target; > } > +/* Function to expand an expression EXP which calls one of the ACLE Data > + Intrinsic builtins FCODE with the result going to TARGET. */ > +static rtx > +aarch64_expand_builtin_data_intrinsic (unsigned int fcode, tree exp, rtx > target) > +{ > + rtx op0 = expand_normal (CALL_EXPR_ARG (exp, 0)); > + machine_mode mode = GET_MODE (op0); > + rtx pat; > + switch (fcode) > + { > + case AARCH64_REV16: > + case AARCH64_REV16L: > + case AARCH64_REV16LL: > + pat = gen_aarch64_rev16 (mode, target, op0); Does this work when op0 is a constant or comes from memory? Same for when target is a memory. E.g. does: void test_rev16 (uint32_t *ptr) { *ptr = __rev16 (*ptr); } work? It'd be more robust to use the expand_insn interface instead; see aarch64_expand_builtin_ls64 for an example. > + break; > + case AARCH64_RBIT: > + case AARCH64_RBITL: > + case AARCH64_RBITLL: > + pat = gen_aarch64_rbit (mode, target, op0); > + break; > + default: > + gcc_unreachable (); > + } > + emit_insn (pat); > + return target; > +} > > /* Expand an expression EXP as fpsr or fpcr setter (depending on > UNSPEC) using MODE. */ > @@ -2551,6 +2614,9 @@ aarch64_general_expand_builtin (unsigned int fcode, > tree exp, rtx target, > if (fcode >= AARCH64_MEMTAG_BUILTIN_START > && fcode <= AARCH64_MEMTAG_BUILTIN_END) > return aarch64_expand_builtin_memtag (fcode, exp, target); > + if (fcode >= AARCH64_REV16 > + && fcode <= AARCH64_RBITLL) > + return aarch64_expand_builtin_data_intrinsic (fcode, exp, target); > > gcc_unreachable (); > } > diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md > index > acec8c1146765c0fac73c15351853324b8f03209..ef0aed25c6b26eff61f9f6030dc5921a534e3d19 > 100644 > --- a/gcc/config/aarch64/aarch64.md > +++ b/gcc/config/aarch64/aarch64.md > @@ -4950,7 +4950,7 @@ (define_expand "ffs<mode>2" > rtx ccreg = aarch64_gen_compare_reg (EQ, operands[1], const0_rtx); > rtx x = gen_rtx_NE (VOIDmode, ccreg, const0_rtx); > > - emit_insn (gen_rbit<mode>2 (operands[0], operands[1])); > + emit_insn (gen_aarch64_rbit (<MODE>mode, operands[0], operands[1])); > emit_insn (gen_clz<mode>2 (operands[0], operands[0])); > emit_insn (gen_csinc3<mode>_insn (operands[0], x, operands[0], > const0_rtx)); > DONE; > @@ -4996,7 +4996,7 @@ (define_insn "clrsb<mode>2" > [(set_attr "type" "clz")] > ) > > -(define_insn "rbit<mode>2" > +(define_insn "@aarch64_rbit<mode>" > [(set (match_operand:GPI 0 "register_operand" "=r") > (unspec:GPI [(match_operand:GPI 1 "register_operand" "r")] > UNSPEC_RBIT))] > "" > @@ -5017,7 +5017,7 @@ (define_insn_and_split "ctz<mode>2" > "reload_completed" > [(const_int 0)] > " > - emit_insn (gen_rbit<mode>2 (operands[0], operands[1])); > + emit_insn (gen_aarch64_rbit (<MODE>mode, operands[0], operands[1])); > emit_insn (gen_clz<mode>2 (operands[0], operands[0])); > DONE; > ") > @@ -6022,6 +6022,13 @@ (define_insn "bswaphi2" > [(set_attr "type" "rev")] > ) > > +(define_insn "@aarch64_rev16<mode>" > + [(set (match_operand:GPI 0 "register_operand" "=r") > + (unspec:GPI [(match_operand:GPI 1 "register_operand" "r")] UNSPEC_REV))] > + "" > + "rev16\\t%<w>0, %<w>1" > + [(set_attr "type" "rev")]) > + > (define_insn "*aarch64_bfxil<mode>" > [(set (match_operand:GPI 0 "register_operand" "=r,r") > (ior:GPI (and:GPI (match_operand:GPI 1 "register_operand" "r,0") > diff --git a/gcc/config/aarch64/arm_acle.h b/gcc/config/aarch64/arm_acle.h > index > 9775a48c65825b424d3eb442384f5ab87b734fd7..faddd5d0a780c5d65ba430bd3174c701e848c794 > 100644 > --- a/gcc/config/aarch64/arm_acle.h > +++ b/gcc/config/aarch64/arm_acle.h > @@ -28,6 +28,7 @@ > #define _GCC_ARM_ACLE_H > > #include <stdint.h> > +#include <stddef.h> > > #pragma GCC aarch64 "arm_acle.h" > > @@ -35,6 +36,54 @@ > extern "C" { > #endif > > +#define _GCC_ARM_ACLE_ROR_FN(NAME, TYPE) \ > +__extension__ extern __inline TYPE \ > +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ > +NAME (TYPE value, uint32_t rotate) \ The names of the parameters and local variables need the same __ uglification as __revl. Same for _GCC_ARM_ACLE_DATA_FN. > +{ \ > + size_t size = sizeof (TYPE) * __CHAR_BIT__; \ > + rotate = rotate % size; \ > + return value >> rotate | value << (size - rotate); \ This runs into UB for rotate == 0. > +} > + > +_GCC_ARM_ACLE_ROR_FN (__ror, uint32_t) > +_GCC_ARM_ACLE_ROR_FN (__rorl, unsigned long) > +_GCC_ARM_ACLE_ROR_FN (__rorll, uint64_t) Would be good to undef the macro once we're done with it, to reduce noise in things like -dM. > + > +#define _GCC_ARM_ACLE_DATA_FN(NAME, BUILTIN, TYPE) \ > +__extension__ extern __inline TYPE \ > +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ > +__##NAME (TYPE value) \ > +{ \ > + return __builtin_##BUILTIN (value); \ > +} > + > +_GCC_ARM_ACLE_DATA_FN (clz, clz, uint32_t) > +_GCC_ARM_ACLE_DATA_FN (clzl, clzl, unsigned long) > +_GCC_ARM_ACLE_DATA_FN (clzll, clzll, uint64_t) > +_GCC_ARM_ACLE_DATA_FN (cls, clrsb, uint32_t) > +_GCC_ARM_ACLE_DATA_FN (clsl, clrsbl, unsigned long) > +_GCC_ARM_ACLE_DATA_FN (clsll, clrsbll, uint64_t) The ACLE spec says that these should return unsigned int, so I guess either these functions should have their own macro or the macro above should have a separate parameter for the return type. Thanks, Richard > +_GCC_ARM_ACLE_DATA_FN (rev16, aarch64_rev16, uint32_t) > +_GCC_ARM_ACLE_DATA_FN (rev16l, aarch64_rev16l, unsigned long) > +_GCC_ARM_ACLE_DATA_FN (rev16ll, aarch64_rev16ll, uint64_t) > +_GCC_ARM_ACLE_DATA_FN (rbit, aarch64_rbit, uint32_t) > +_GCC_ARM_ACLE_DATA_FN (rbitl, aarch64_rbitl, unsigned long) > +_GCC_ARM_ACLE_DATA_FN (rbitll, aarch64_rbitll, uint64_t) > +_GCC_ARM_ACLE_DATA_FN (revsh, bswap16, int16_t) > +_GCC_ARM_ACLE_DATA_FN (rev, bswap32, uint32_t) > +_GCC_ARM_ACLE_DATA_FN (revll, bswap64, uint64_t) > + > +__extension__ extern __inline unsigned long > +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > +__revl (unsigned long __value) > +{ > + if (sizeof (unsigned long) == 8) > + return __revll (__value); > + else > + return __rev (__value); > +} > + > #pragma GCC push_options > #pragma GCC target ("arch=armv8.3-a") > __extension__ extern __inline int32_t > diff --git a/gcc/testsuite/gcc.target/aarch64/acle/data-intrinsics.c > b/gcc/testsuite/gcc.target/aarch64/acle/data-intrinsics.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..90813184704dfcdaf2d24d523ff744aa6cbedf1a > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/acle/data-intrinsics.c > @@ -0,0 +1,215 @@ > +/* Test the ACLE data intrinsics. */ > +/* { dg-do assemble } */ > +/* { dg-additional-options "--save-temps -O1" } */ > +/* { dg-final { check-function-bodies "**" "" "" } } */ > + > +#include "arm_acle.h" > + > +/* > +** test_clz: > +** clz w0, w0 > +** ret > +*/ > + > +uint32_t test_clz (uint32_t a) > +{ > + return __clz (a); > +} > + > +/* > +** test_clzl: > +** clz [wx]0, [wx]0 > +** ret > +*/ > + > +unsigned long test_clzl (unsigned long a) > +{ > + return __clzl (a); > +} > + > +/* > +** test_clzll: > +** clz x0, x0 > +** ret > +*/ > + > +uint64_t test_clzll (uint64_t a) > +{ > + return __clzll (a); > +} > + > +/* > +** test_cls: > +** cls w0, w0 > +** ret > +*/ > + > +uint32_t test_cls (uint32_t a) > +{ > + return __cls (a); > +} > + > +/* > +** test_clsl: > +** cls [wx]0, [wx]0 > +** ret > +*/ > + > +unsigned long test_clsl (unsigned long a) > +{ > + return __clsl (a); > +} > + > +/* > +** test_clsll: > +** cls x0, x0 > +** ret > +*/ > + > +uint64_t test_clsll (uint64_t a) > +{ > + return __clsll (a); > +} > + > +/* > +** test_rbit: > +** rbit w0, w0 > +** ret > +*/ > + > +uint32_t test_rbit (uint32_t a) > +{ > + return __rbit (a); > +} > + > +/* > +** test_rbitl: > +** rbit [wx]0, [wx]0 > +** ret > +*/ > + > +unsigned long test_rbitl (unsigned long a) > +{ > + return __rbitl (a); > +} > + > +/* > +** test_rbitll: > +** rbit x0, x0 > +** ret > +*/ > + > +uint64_t test_rbitll (uint64_t a) > +{ > + return __rbitll (a); > +} > + > +/* > +** test_rev: > +** rev w0, w0 > +** ret > +*/ > + > +uint32_t test_rev (uint32_t a) > +{ > + return __builtin_bswap32 (a); > +} > + > +/* > +** test_revl: > +** rev [wx]0, [wx]0 > +** ret > +*/ > + > +unsigned long test_revl (unsigned long a) > +{ > + return __revl (a); > +} > + > +/* > +** test_revll: > +** rev x0, x0 > +** ret > +*/ > + > +uint64_t test_revll (uint64_t a) > +{ > + return __revll (a); > +} > + > +/* > +** test_rev16: > +** rev16 w0, w0 > +** ret > +*/ > + > +uint32_t test_rev16 (uint32_t a) > +{ > + return __rev16 (a); > +} > + > +/* > +** test_rev16l: > +** rev16 [wx]0, [wx]0 > +** ret > +*/ > + > +unsigned long test_rev16l (unsigned long a) > +{ > + return __rev16l (a); > +} > + > +/* > +** test_rev16ll: > +** rev16 x0, x0 > +** ret > +*/ > + > +uint64_t test_rev16ll (uint64_t a) > +{ > + return __rev16ll (a); > +} > + > +/* > +** test_ror: > +** ror w0, w0, w1 > +** ret > +*/ > + > +uint32_t test_ror (uint32_t a, uint32_t r) > +{ > + return __ror (a, r); > +} > + > +/* > +** test_rorl: > +** ror [wx]0, [wx]0, [wx]1 > +** ret > +*/ > + > +unsigned long test_rorl (unsigned long a, uint32_t r) > +{ > + return __rorl (a, r); > +} > + > +/* > +** test_rorll: > +** ror x0, x0, x1 > +** ret > +*/ > + > +uint64_t test_rorll (uint64_t a, uint32_t r) > +{ > + return __rorll (a, r); > +} > + > +/* > +** test_revsh: > +** rev16 w0, w0 > +** ret > +*/ > + > +int16_t test_revsh (int16_t a) > +{ > + return __revsh (a); > +}