Hi all,

Richard raised a concern about the RTL we use to represent the AdvSIMD SABD
(vector signed absolute difference) instruction.
We currently represent it as ABS (MINUS op1 op2).

This isn't exactly what SABD does. ABS treats its input as a signed value
and returns the absolute of that.

For example:
(sabd:QI 64 -128) == 192 (unsigned) aka -64 (signed)
whereas
(minus:QI 64 -128) == 192 (unsigned) aka -64 (signed), (abs ...) of that is 64.

A better way to describe the instruction is with MINUS (SMAX (op1 op2) SMIN 
(op1 op2)).
This patch implements that, and also implements similar semantics for the UABD 
instruction
that uses UMAX and UMIN.

That way for the example above we'll have:
(minus:QI (smax:QI (64 -128)) (smin:QI (64 -128))) == (minus:QI 64 -128) == 192 
(or -64 signed) which matches
what SABD does.

Bootstrapped and tested on aarch64-none-linux-gnu.

Ok for trunk?

Thanks,
Kyrill

2019-04-02  Kyrylo Tkachov  <kyrylo.tkac...@arm.com>

    * config/aarch64/iterators.md (max_opp): New code_attr.
    (USMAX): New code iterator.
    * config/aarch64/predicates.md (aarch64_smin): New predicate.
    (aarch64_smax): Likewise.
    * config/aarch64/aarch64-simd.md (abd<mode>_3): Rename to...
    (*aarch64_<su>abd<mode>_3): ... Change RTL representation to
    MINUS (MAX MIN).

2019-04-02  Kyrylo Tkachov  <kyrylo.tkac...@arm.com>

    * gcc.target/aarch64/abd_1.c: New test.
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 96360d877a404a6795aef5458345e87de9006cf4..a54ed551f4fef0ccae8181b81d0acbd9e327edd8 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -705,13 +705,17 @@ (define_insn "aarch64_abs<mode>"
   [(set_attr "type" "neon_abs<q>")]
 )
 
-(define_insn "abd<mode>_3"
+(define_insn "*aarch64_<su>abd<mode>_3"
   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
-	(abs:VDQ_BHSI (minus:VDQ_BHSI
-		       (match_operand:VDQ_BHSI 1 "register_operand" "w")
-		       (match_operand:VDQ_BHSI 2 "register_operand" "w"))))]
-  "TARGET_SIMD"
-  "sabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
+	(minus:VDQ_BHSI
+	  (USMAX:VDQ_BHSI
+	    (match_operand:VDQ_BHSI 1 "register_operand" "w")
+	    (match_operand:VDQ_BHSI 2 "register_operand" "w"))
+	  (match_operator 3 "aarch64_<max_opp>"
+	    [(match_dup 1)
+	     (match_dup 2)])))]
+  "TARGET_SIMD"
+  "<su>abd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
   [(set_attr "type" "neon_abd<q>")]
 )
 
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index e9ede2934a43539279ae68049fb66b84114d90ea..16e4dbda73ab928054590c47a4398408162c0332 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -1056,6 +1056,9 @@ (define_mode_attr f16quad [(V2SF "") (V4SF "q")])
 
 (define_code_attr f16mac [(plus "a") (minus "s")])
 
+;; Map smax to smin and umax to umin.
+(define_code_attr max_opp [(smax "smin") (umax "umin")])
+
 ;; The number of subvectors in an SVE_STRUCT.
 (define_mode_attr vector_count [(VNx32QI "2") (VNx16HI "2")
 				(VNx8SI  "2") (VNx4DI  "2")
@@ -1204,6 +1207,9 @@ (define_code_iterator MAXMIN [smax smin umax umin])
 
 (define_code_iterator FMAXMIN [smax smin])
 
+;; Signed and unsigned max operations.
+(define_code_iterator USMAX [smax umax])
+
 ;; Code iterator for variants of vector max and min.
 (define_code_iterator ADDSUB [plus minus])
 
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index 855cf7b52f840a81e0bbd2b31fd59fc860060626..b8e6d232ff6237a58addda1ec0e953a1054dc616 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -319,6 +319,12 @@ (define_predicate "aarch64_reg_or_imm"
   (ior (match_operand 0 "register_operand")
        (match_operand 0 "const_scalar_int_operand")))
 
+(define_predicate "aarch64_smin"
+  (match_code "smin"))
+
+(define_predicate "aarch64_umin"
+  (match_code "umin"))
+
 ;; True for integer comparisons and for FP comparisons other than LTGT or UNEQ.
 (define_special_predicate "aarch64_comparison_operator"
   (match_code "eq,ne,le,lt,ge,gt,geu,gtu,leu,ltu,unordered,
diff --git a/gcc/testsuite/gcc.target/aarch64/abd_1.c b/gcc/testsuite/gcc.target/aarch64/abd_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..a27cb5ef0c12cd0068ff1728e20cce7f68eddc2d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/abd_1.c
@@ -0,0 +1,36 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+
+#pragma GCC target "+nosve"
+
+#define MAX(x, y) ((x) > (y) ? (x) : (y))
+#define MIN(x, y) ((x) < (y) ? (x) : (y))
+#define N 1024
+
+#define FUNC(T)								\
+void									\
+sabd_##T (signed T * restrict a, signed T * restrict b,		\
+		signed T * restrict out)				\
+{									\
+  for (int i = 0; i < N; i++)						\
+    out[i] = MAX (a[i], b[i]) - MIN (a[i], b[i]);			\
+}									\
+									\
+void									\
+uabd_##T (unsigned T * restrict a, unsigned T * restrict b,	\
+		  unsigned T * restrict out)				\
+{									\
+  for (int i = 0; i < N; i++)						\
+    out[i] = MAX (a[i], b[i]) - MIN (a[i], b[i]);			\
+}
+
+FUNC(char)
+FUNC(short)
+FUNC(int)
+
+/* { dg-final { scan-assembler-times "sabd\\tv\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b" 1 } } */
+/* { dg-final { scan-assembler-times "uabd\\tv\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b" 1 } } */
+/* { dg-final { scan-assembler-times "sabd\\tv\[0-9\]+\.8h, v\[0-9\]+\.8h, v\[0-9\]+\.8h" 1 } } */
+/* { dg-final { scan-assembler-times "uabd\\tv\[0-9\]+\.8h, v\[0-9\]+\.8h, v\[0-9\]+\.8h" 1 } } */
+/* { dg-final { scan-assembler-times "sabd\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s, v\[0-9\]+\.4s" 1 } } */
+/* { dg-final { scan-assembler-times "uabd\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s, v\[0-9\]+\.4s" 1 } } */

Reply via email to