Hi All,

This optimizes signed right shift by BITSIZE-1 into a cmlt operation which is
more optimal because generally compares have a higher throughput than shifts.

On AArch64 the result of the shift would have been either -1 or 0 which is the
results of the compare.

i.e.

void e (int * restrict a, int *b, int n)
{
    for (int i = 0; i < n; i++)
      b[i] = a[i] >> 31;
}

now generates:

.L4:
        ldr     q0, [x0, x3]
        cmlt    v0.4s, v0.4s, #0
        str     q0, [x1, x3]
        add     x3, x3, 16
        cmp     x4, x3
        bne     .L4

instead of:

.L4:
        ldr     q0, [x0, x3]
        sshr    v0.4s, v0.4s, 31
        str     q0, [x1, x3]
        add     x3, x3, 16
        cmp     x4, x3
        bne     .L4

Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

        * config/aarch64/aarch64-simd.md (aarch64_simd_ashr<mode>): Add case cmp
        case.
        * config/aarch64/constraints.md (D1): New.

gcc/testsuite/ChangeLog:

        * gcc.target/aarch64/shl-combine-2.c: New test.

--- inline copy of patch -- 
diff --git a/gcc/config/aarch64/aarch64-simd.md 
b/gcc/config/aarch64/aarch64-simd.md
index 
300bf001b59ca7fa197c580b10adb7f70f20d1e0..19b2d0ad4dab4d5742698297ded861228ee22007
 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -1127,12 +1127,14 @@ (define_insn "aarch64_simd_lshr<mode>"
 )
 
 (define_insn "aarch64_simd_ashr<mode>"
- [(set (match_operand:VDQ_I 0 "register_operand" "=w")
-       (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
-                    (match_operand:VDQ_I  2 "aarch64_simd_rshift_imm" "Dr")))]
+ [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
+       (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,w")
+                    (match_operand:VDQ_I  2 "aarch64_simd_rshift_imm" 
"D1,Dr")))]
  "TARGET_SIMD"
- "sshr\t%0.<Vtype>, %1.<Vtype>, %2"
-  [(set_attr "type" "neon_shift_imm<q>")]
+ "@
+  cmlt\t%0.<Vtype>, %1.<Vtype>, #0
+  sshr\t%0.<Vtype>, %1.<Vtype>, %2"
+  [(set_attr "type" "neon_compare<q>,neon_shift_imm<q>")]
 )
 
 (define_insn "*aarch64_simd_sra<mode>"
diff --git a/gcc/config/aarch64/constraints.md 
b/gcc/config/aarch64/constraints.md
index 
3b49b452119c49320020fa9183314d9a25b92491..18630815ffc13f2168300a899db69fd428dfb0d6
 100644
--- a/gcc/config/aarch64/constraints.md
+++ b/gcc/config/aarch64/constraints.md
@@ -437,6 +437,14 @@ (define_constraint "Dl"
       (match_test "aarch64_simd_shift_imm_p (op, GET_MODE (op),
                                                 true)")))
 
+(define_constraint "D1"
+  "@internal
+ A constraint that matches vector of immediates that is bits(mode)-1."
+ (and (match_code "const,const_vector")
+      (match_test "aarch64_const_vec_all_same_in_range_p (op,
+                       GET_MODE_UNIT_BITSIZE (mode) - 1,
+                       GET_MODE_UNIT_BITSIZE (mode) - 1)")))
+
 (define_constraint "Dr"
   "@internal
  A constraint that matches vector of immediates for right shifts."
diff --git a/gcc/testsuite/gcc.target/aarch64/shl-combine-2.c 
b/gcc/testsuite/gcc.target/aarch64/shl-combine-2.c
new file mode 100644
index 
0000000000000000000000000000000000000000..bdfe35d09ffccc7928947c9e57f1034f7ca2c798
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/shl-combine-2.c
@@ -0,0 +1,12 @@
+/* { dg-do assemble } */
+/* { dg-options "-O3 --save-temps --param=vect-epilogues-nomask=0" } */
+
+void e (int * restrict a, int *b, int n)
+{
+    for (int i = 0; i < n; i++)
+      b[i] = a[i] >> 31;
+}
+
+/* { dg-final { scan-assembler-times {\tcmlt\t} 1 } } */
+/* { dg-final { scan-assembler-not {\tsshr\t} } } */
+


-- 
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 300bf001b59ca7fa197c580b10adb7f70f20d1e0..19b2d0ad4dab4d5742698297ded861228ee22007 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -1127,12 +1127,14 @@ (define_insn "aarch64_simd_lshr<mode>"
 )
 
 (define_insn "aarch64_simd_ashr<mode>"
- [(set (match_operand:VDQ_I 0 "register_operand" "=w")
-       (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
-		     (match_operand:VDQ_I  2 "aarch64_simd_rshift_imm" "Dr")))]
+ [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
+       (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,w")
+		     (match_operand:VDQ_I  2 "aarch64_simd_rshift_imm" "D1,Dr")))]
  "TARGET_SIMD"
- "sshr\t%0.<Vtype>, %1.<Vtype>, %2"
-  [(set_attr "type" "neon_shift_imm<q>")]
+ "@
+  cmlt\t%0.<Vtype>, %1.<Vtype>, #0
+  sshr\t%0.<Vtype>, %1.<Vtype>, %2"
+  [(set_attr "type" "neon_compare<q>,neon_shift_imm<q>")]
 )
 
 (define_insn "*aarch64_simd_sra<mode>"
diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md
index 3b49b452119c49320020fa9183314d9a25b92491..18630815ffc13f2168300a899db69fd428dfb0d6 100644
--- a/gcc/config/aarch64/constraints.md
+++ b/gcc/config/aarch64/constraints.md
@@ -437,6 +437,14 @@ (define_constraint "Dl"
       (match_test "aarch64_simd_shift_imm_p (op, GET_MODE (op),
 						 true)")))
 
+(define_constraint "D1"
+  "@internal
+ A constraint that matches vector of immediates that is bits(mode)-1."
+ (and (match_code "const,const_vector")
+      (match_test "aarch64_const_vec_all_same_in_range_p (op,
+			GET_MODE_UNIT_BITSIZE (mode) - 1,
+			GET_MODE_UNIT_BITSIZE (mode) - 1)")))
+
 (define_constraint "Dr"
   "@internal
  A constraint that matches vector of immediates for right shifts."
diff --git a/gcc/testsuite/gcc.target/aarch64/shl-combine-2.c b/gcc/testsuite/gcc.target/aarch64/shl-combine-2.c
new file mode 100644
index 0000000000000000000000000000000000000000..bdfe35d09ffccc7928947c9e57f1034f7ca2c798
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/shl-combine-2.c
@@ -0,0 +1,12 @@
+/* { dg-do assemble } */
+/* { dg-options "-O3 --save-temps --param=vect-epilogues-nomask=0" } */
+
+void e (int * restrict a, int *b, int n)
+{
+    for (int i = 0; i < n; i++)
+      b[i] = a[i] >> 31;
+}
+
+/* { dg-final { scan-assembler-times {\tcmlt\t} 1 } } */
+/* { dg-final { scan-assembler-not {\tsshr\t} } } */
+

Reply via email to