Hi all,

Vectorise __builtin_signbit (v4sf) with unsigned shift right vector
instruction.

Bootstrapped and tested on aarch64-none-linux-gnu.

Assembly output for:
$ aarch64-elf-gcc -S -O3 signbitv4sf.c -dp

Before patch:

foo:
        adrp    x3, in  // 37   [c=4 l=4]  *movdi_aarch64/12
        adrp    x2, out // 40   [c=4 l=4]  *movdi_aarch64/12
        add     x3, x3, :lo12:in        // 39   [c=4 l=4]  add_losym_di
        add     x2, x2, :lo12:out       // 42   [c=4 l=4]  add_losym_di
        mov     x0, 0   // 3    [c=4 l=4]  *movdi_aarch64/3
        .p2align 3,,7
.L2:
        ldr     w1, [x3, x0]    // 10   [c=16 l=4]  *zero_extendsidi2_aarch64/1
        and     w1, w1, -2147483648     // 11   [c=4 l=4]  andsi3/1
        str     w1, [x2, x0]    // 16   [c=4 l=4]  *movsi_aarch64/8
        add     x0, x0, 4       // 17   [c=4 l=4]  *adddi3_aarch64/0
        cmp     x0, 4096        // 19   [c=4 l=4]  cmpdi/1
        bne     .L2             // 20   [c=4 l=4]  condjump
        ret             // 50   [c=0 l=4]  *do_return

After patch:

foo:
        adrp    x2, in  // 36   [c=4 l=4]  *movdi_aarch64/12
        adrp    x1, out // 39   [c=4 l=4]  *movdi_aarch64/12
        add     x2, x2, :lo12:in        // 38   [c=4 l=4]  add_losym_di
        add     x1, x1, :lo12:out       // 41   [c=4 l=4]  add_losym_di
        mov     x0, 0   // 3    [c=4 l=4]  *movdi_aarch64/3
        .p2align 3,,7
.L2:
        ldr     q0, [x2, x0]    // 10   [c=8 l=4]  *aarch64_simd_movv4sf/0
        ushr    v0.4s, v0.4s, 31        // 11   [c=12 l=4]  
aarch64_simd_lshrv4si
        str     q0, [x1, x0]    // 15   [c=4 l=4]  *aarch64_simd_movv4si/2
        add     x0, x0, 16      // 16   [c=4 l=4]  *adddi3_aarch64/0
        cmp     x0, 4096        // 18   [c=4 l=4]  cmpdi/1
        bne     .L2             // 19   [c=4 l=4]  condjump
        ret             // 49   [c=0 l=4]  *do_return

Thanks,
Przemyslaw

gcc/ChangeLog:

2019-03-20  Przemyslaw Wirkus  <przemyslaw.wir...@arm.com>

        * config/aarch64/aarch64-builtins.c
        (aarch64_builtin_vectorized_function): Added CASE_CFN_SIGNBIT.
        * config/aarch64/aarch64-simd-builtins.def: (signbit)
        Extend to V4SF mode.
        * config/aarch64/aarch64-simd.md (signbitv4sf2): New expand
        defined.

gcc/testsuite/ChangeLog:

2019-02-28  Przemyslaw Wirkus  <przemyslaw.wir...@arm.com>

        * gcc.target/aarch64/signbitv4sf.c: New test.
diff --git a/gcc/config/aarch64/aarch64-builtins.c 
b/gcc/config/aarch64/aarch64-builtins.c
index 
04063e5ed134d2e64487db23b8fa7794817b2739..86f8345848abd1515cef61824db525dc26ec9bdb
 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -1709,6 +1709,13 @@ aarch64_builtin_vectorized_function (unsigned int fn, 
tree type_out,
 
        return aarch64_builtin_decls[builtin];
       }
+    CASE_CFN_SIGNBIT:
+      {
+       if (AARCH64_CHECK_BUILTIN_MODE (4, S))
+         return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_signbitv4sf];
+       else
+         return NULL_TREE;
+      }
     case CFN_BUILT_IN_BSWAP16:
 #undef AARCH64_CHECK_BUILTIN_MODE
 #define AARCH64_CHECK_BUILTIN_MODE(C, N) \
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def 
b/gcc/config/aarch64/aarch64-simd-builtins.def
index 
17bb0c4869b12ede2fc51a8f89d841ded8fac230..d568f0ba4e61febf0590b22789b006f3bfe11ccd
 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -324,6 +324,9 @@
   VAR1 (UNOP, rint, 2, hf)
   VAR1 (UNOP, round, 2, hf)
 
+  /* Implemented by signbit<mode>2 pattern */
+  VAR1 (UNOP, signbit, 2, v4sf)
+
   /* Implemented by l<fcvt_pattern><su_optab><VQDF:mode><vcvt_target>2.  */
   VAR1 (UNOP, lbtruncv4hf, 2, v4hi)
   VAR1 (UNOP, lbtruncv8hf, 2, v8hi)
diff --git a/gcc/config/aarch64/aarch64-simd.md 
b/gcc/config/aarch64/aarch64-simd.md
index 
be6c27d319a1ca6fee581d8f8856a4dff8f4a060..87e2a58649c3e5d490c499115cf6b7495d448c29
 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -915,6 +915,21 @@
   [(set_attr "type" "neon_ins<q>")]
 )
 
+(define_expand "signbitv4sf2"
+    [(use (match_operand:V4SI 0 "register_operand"))
+     (use (match_operand:V4SF 1 "register_operand"))]
+     "TARGET_SIMD"
+{
+  int shift_amount = GET_MODE_UNIT_BITSIZE (V4SImode) - 1;
+  rtx shift_vector = aarch64_simd_gen_const_vector_dup (V4SImode,
+                          shift_amount);
+  operands[1] = lowpart_subreg (V4SImode, operands[1], V4SFmode);
+
+  emit_insn (gen_aarch64_simd_lshrv4si (operands[0], operands[1],
+                  shift_vector));
+  DONE;
+})
+
 (define_insn "aarch64_simd_lshr<mode>"
  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
        (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
diff --git a/gcc/testsuite/gcc.target/aarch64/signbitv4sf.c 
b/gcc/testsuite/gcc.target/aarch64/signbitv4sf.c
new file mode 100644
index 
0000000000000000000000000000000000000000..aa06a5df1dbb3e295355d485b39963127a828b68
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/signbitv4sf.c
@@ -0,0 +1,35 @@
+/* { dg-do run } */
+/* { dg-additional-options "-O3 --save-temps" } */
+
+extern void abort ();
+
+#define N 1024
+float in[N] = {1.0, -1.0, -2.0, 3.0, -5.0, -8.0, 13.0, 21.0};
+int out[N];
+
+void
+foo ()
+{
+  for (int i = 0; i < N; i++)
+    out[i] = __builtin_signbit (in[i]);
+}
+
+/* { dg-final { scan-assembler-not {-2147483648} } } */
+/* { dg-final { scan-assembler {\tushr\tv[0-9]+.4s, v[0-9]+.4s, 31} } } */
+
+int
+main ()
+{
+  foo ();
+
+  for (int i = 0; i < N; i++)
+  {
+    if (in[i] >= 0.0 && out[i])
+      abort ();
+    if (in[i] < 0.0 && !out[i])
+      abort ();
+  }
+
+  return 0;
+}
+

Reply via email to