Hi all, Vectorise __builtin_signbit (v4sf) with unsigned shift right vector instruction.
Bootstrapped and tested on aarch64-none-linux-gnu. Assembly output for: $ aarch64-elf-gcc -S -O3 signbitv4sf.c -dp Before patch: foo: adrp x3, in // 37 [c=4 l=4] *movdi_aarch64/12 adrp x2, out // 40 [c=4 l=4] *movdi_aarch64/12 add x3, x3, :lo12:in // 39 [c=4 l=4] add_losym_di add x2, x2, :lo12:out // 42 [c=4 l=4] add_losym_di mov x0, 0 // 3 [c=4 l=4] *movdi_aarch64/3 .p2align 3,,7 .L2: ldr w1, [x3, x0] // 10 [c=16 l=4] *zero_extendsidi2_aarch64/1 and w1, w1, -2147483648 // 11 [c=4 l=4] andsi3/1 str w1, [x2, x0] // 16 [c=4 l=4] *movsi_aarch64/8 add x0, x0, 4 // 17 [c=4 l=4] *adddi3_aarch64/0 cmp x0, 4096 // 19 [c=4 l=4] cmpdi/1 bne .L2 // 20 [c=4 l=4] condjump ret // 50 [c=0 l=4] *do_return After patch: foo: adrp x2, in // 36 [c=4 l=4] *movdi_aarch64/12 adrp x1, out // 39 [c=4 l=4] *movdi_aarch64/12 add x2, x2, :lo12:in // 38 [c=4 l=4] add_losym_di add x1, x1, :lo12:out // 41 [c=4 l=4] add_losym_di mov x0, 0 // 3 [c=4 l=4] *movdi_aarch64/3 .p2align 3,,7 .L2: ldr q0, [x2, x0] // 10 [c=8 l=4] *aarch64_simd_movv4sf/0 ushr v0.4s, v0.4s, 31 // 11 [c=12 l=4] aarch64_simd_lshrv4si str q0, [x1, x0] // 15 [c=4 l=4] *aarch64_simd_movv4si/2 add x0, x0, 16 // 16 [c=4 l=4] *adddi3_aarch64/0 cmp x0, 4096 // 18 [c=4 l=4] cmpdi/1 bne .L2 // 19 [c=4 l=4] condjump ret // 49 [c=0 l=4] *do_return Thanks, Przemyslaw gcc/ChangeLog: 2019-03-20 Przemyslaw Wirkus <przemyslaw.wir...@arm.com> * config/aarch64/aarch64-builtins.c (aarch64_builtin_vectorized_function): Added CASE_CFN_SIGNBIT. * config/aarch64/aarch64-simd-builtins.def: (signbit) Extend to V4SF mode. * config/aarch64/aarch64-simd.md (signbitv4sf2): New expand defined. gcc/testsuite/ChangeLog: 2019-02-28 Przemyslaw Wirkus <przemyslaw.wir...@arm.com> * gcc.target/aarch64/signbitv4sf.c: New test.
diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c index 04063e5ed134d2e64487db23b8fa7794817b2739..86f8345848abd1515cef61824db525dc26ec9bdb 100644 --- a/gcc/config/aarch64/aarch64-builtins.c +++ b/gcc/config/aarch64/aarch64-builtins.c @@ -1709,6 +1709,13 @@ aarch64_builtin_vectorized_function (unsigned int fn, tree type_out, return aarch64_builtin_decls[builtin]; } + CASE_CFN_SIGNBIT: + { + if (AARCH64_CHECK_BUILTIN_MODE (4, S)) + return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_signbitv4sf]; + else + return NULL_TREE; + } case CFN_BUILT_IN_BSWAP16: #undef AARCH64_CHECK_BUILTIN_MODE #define AARCH64_CHECK_BUILTIN_MODE(C, N) \ diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def index 17bb0c4869b12ede2fc51a8f89d841ded8fac230..d568f0ba4e61febf0590b22789b006f3bfe11ccd 100644 --- a/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-builtins.def @@ -324,6 +324,9 @@ VAR1 (UNOP, rint, 2, hf) VAR1 (UNOP, round, 2, hf) + /* Implemented by signbit<mode>2 pattern */ + VAR1 (UNOP, signbit, 2, v4sf) + /* Implemented by l<fcvt_pattern><su_optab><VQDF:mode><vcvt_target>2. */ VAR1 (UNOP, lbtruncv4hf, 2, v4hi) VAR1 (UNOP, lbtruncv8hf, 2, v8hi) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index be6c27d319a1ca6fee581d8f8856a4dff8f4a060..87e2a58649c3e5d490c499115cf6b7495d448c29 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -915,6 +915,21 @@ [(set_attr "type" "neon_ins<q>")] ) +(define_expand "signbitv4sf2" + [(use (match_operand:V4SI 0 "register_operand")) + (use (match_operand:V4SF 1 "register_operand"))] + "TARGET_SIMD" +{ + int shift_amount = GET_MODE_UNIT_BITSIZE (V4SImode) - 1; + rtx shift_vector = aarch64_simd_gen_const_vector_dup (V4SImode, + shift_amount); + operands[1] = lowpart_subreg (V4SImode, operands[1], V4SFmode); + + emit_insn (gen_aarch64_simd_lshrv4si (operands[0], operands[1], + shift_vector)); + DONE; +}) + (define_insn "aarch64_simd_lshr<mode>" [(set (match_operand:VDQ_I 0 "register_operand" "=w") (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w") diff --git a/gcc/testsuite/gcc.target/aarch64/signbitv4sf.c b/gcc/testsuite/gcc.target/aarch64/signbitv4sf.c new file mode 100644 index 0000000000000000000000000000000000000000..aa06a5df1dbb3e295355d485b39963127a828b68 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/signbitv4sf.c @@ -0,0 +1,35 @@ +/* { dg-do run } */ +/* { dg-additional-options "-O3 --save-temps" } */ + +extern void abort (); + +#define N 1024 +float in[N] = {1.0, -1.0, -2.0, 3.0, -5.0, -8.0, 13.0, 21.0}; +int out[N]; + +void +foo () +{ + for (int i = 0; i < N; i++) + out[i] = __builtin_signbit (in[i]); +} + +/* { dg-final { scan-assembler-not {-2147483648} } } */ +/* { dg-final { scan-assembler {\tushr\tv[0-9]+.4s, v[0-9]+.4s, 31} } } */ + +int +main () +{ + foo (); + + for (int i = 0; i < N; i++) + { + if (in[i] >= 0.0 && out[i]) + abort (); + if (in[i] < 0.0 && !out[i]) + abort (); + } + + return 0; +} +