Hi there,
The current RTL patterns for widening addition and subtraction
instructions in
aarch64-simd.md use the code iterator attribute <ADDSUB:optab> to make their
definition more compact.
This approach means that the `minus` and `plus` cases have their operands in
the same order, which causes problems in matching.
The `minus` case needs the more complex operand second to be semantically
correct, but the `plus` case needs the more complex operand first to be in
canonical form.
This patch splits the RTL patterns into two, one for `plus` and one for
`minus` with differing operand order to match their differing requirements.
Ready for trunk?
Bootstrap and test on aarch64-none-linux-gnu
Changelog for gcc/testsuite/Changelog
2018-07-10 Matthew Malcomson <matthew.malcom...@arm.com>
* gcc.target/aarch64/vect-su-add-sub.c: New.
Changelog for gcc/Changelog
2018-07-10 Matthew Malcomson <matthew.malcom...@arm.com>
* config/aarch64/aarch64-simd.md
(aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>): Split into...
(aarch64_<ANY_EXTEND:su>subw<mode>): ... This...
(aarch64_<ANY_EXTEND:su>addw<mode>): ... And this.
(aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>_internal): Split into...
(aarch64_<ANY_EXTEND:su>subw<mode>_internal): ... This...
(aarch64_<ANY_EXTEND:su>addw<mode>_internal): ... And this.
(aarch64_<ANY_EXTEND:su><ADDSUB:optab>w2<mode>_internal): Split into...
(aarch64_<ANY_EXTEND:su>subw2<mode>_internal): ... This...
(aarch64_<ANY_EXTEND:su>addw2<mode>_internal): ... And this.
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index aac5fa146ed8dde4507a0eb4ad6a07ce78d2f0cd..67b29cbe2cad91e031ee23be656ec61a403f2cf9 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -3302,38 +3302,78 @@
DONE;
})
-(define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>"
+(define_insn "aarch64_<ANY_EXTEND:su>subw<mode>"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
- (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
- (ANY_EXTEND:<VWIDE>
- (match_operand:VD_BHSI 2 "register_operand" "w"))))]
+ (minus:<VWIDE>
+ (match_operand:<VWIDE> 1 "register_operand" "w")
+ (ANY_EXTEND:<VWIDE>
+ (match_operand:VD_BHSI 2 "register_operand" "w"))))]
"TARGET_SIMD"
- "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
- [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
+ "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
+ [(set_attr "type" "neon_sub_widen")]
)
-(define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>_internal"
+(define_insn "aarch64_<ANY_EXTEND:su>subw<mode>_internal"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
- (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
- (ANY_EXTEND:<VWIDE>
- (vec_select:<VHALF>
- (match_operand:VQW 2 "register_operand" "w")
- (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
+ (minus:<VWIDE>
+ (match_operand:<VWIDE> 1 "register_operand" "w")
+ (ANY_EXTEND:<VWIDE>
+ (vec_select:<VHALF>
+ (match_operand:VQW 2 "register_operand" "w")
+ (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
"TARGET_SIMD"
- "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
- [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
+ "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
+ [(set_attr "type" "neon_sub_widen")]
)
-(define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w2<mode>_internal"
+(define_insn "aarch64_<ANY_EXTEND:su>subw2<mode>_internal"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
- (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
- (ANY_EXTEND:<VWIDE>
- (vec_select:<VHALF>
- (match_operand:VQW 2 "register_operand" "w")
- (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
+ (minus:<VWIDE>
+ (match_operand:<VWIDE> 1 "register_operand" "w")
+ (ANY_EXTEND:<VWIDE>
+ (vec_select:<VHALF>
+ (match_operand:VQW 2 "register_operand" "w")
+ (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
+ "TARGET_SIMD"
+ "<ANY_EXTEND:su>subw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
+ [(set_attr "type" "neon_sub_widen")]
+)
+
+(define_insn "aarch64_<ANY_EXTEND:su>addw<mode>"
+ [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+ (plus:<VWIDE>
+ (ANY_EXTEND:<VWIDE>
+ (match_operand:VD_BHSI 2 "register_operand" "w"))
+ (match_operand:<VWIDE> 1 "register_operand" "w")))]
"TARGET_SIMD"
- "<ANY_EXTEND:su><ADDSUB:optab>w2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
- [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
+ "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
+ [(set_attr "type" "neon_add_widen")]
+)
+
+(define_insn "aarch64_<ANY_EXTEND:su>addw<mode>_internal"
+ [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+ (plus:<VWIDE>
+ (ANY_EXTEND:<VWIDE>
+ (vec_select:<VHALF>
+ (match_operand:VQW 2 "register_operand" "w")
+ (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
+ (match_operand:<VWIDE> 1 "register_operand" "w")))]
+ "TARGET_SIMD"
+ "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
+ [(set_attr "type" "neon_add_widen")]
+)
+
+(define_insn "aarch64_<ANY_EXTEND:su>addw2<mode>_internal"
+ [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+ (plus:<VWIDE>
+ (ANY_EXTEND:<VWIDE>
+ (vec_select:<VHALF>
+ (match_operand:VQW 2 "register_operand" "w")
+ (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
+ (match_operand:<VWIDE> 1 "register_operand" "w")))]
+ "TARGET_SIMD"
+ "<ANY_EXTEND:su>addw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
+ [(set_attr "type" "neon_add_widen")]
)
(define_expand "aarch64_saddw2<mode>"
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vect_su_add_sub.c b/gcc/testsuite/gcc.target/aarch64/simd/vect_su_add_sub.c
new file mode 100644
index 0000000000000000000000000000000000000000..15956ed83fdd5fc8dc895ab1ac4de3f98bc8a625
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/simd/vect_su_add_sub.c
@@ -0,0 +1,56 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+
+/* Ensure we use the signed/unsigned extend vectorized add and sub
+ instructions. */
+#define N 1024
+
+int a[N];
+long c[N];
+long d[N];
+unsigned int ua[N];
+unsigned long uc[N];
+unsigned long ud[N];
+
+void
+add ()
+{
+ for (int i = 0; i < N; i++)
+ d[i] = a[i] + c[i];
+}
+
+void
+subtract ()
+{
+ for (int i = 0; i < N; i++)
+ d[i] = c[i] - a[i];
+}
+
+void
+uadd ()
+{
+ for (int i = 0; i < N; i++)
+ ud[i] = ua[i] + uc[i];
+}
+
+void
+usubtract ()
+{
+ for (int i = 0; i < N; i++)
+ ud[i] = uc[i] - ua[i];
+}
+
+/* Ensure
+ saddw2 and one saddw for the function add()
+ ssubw2 and one ssubw for the function subtract()
+ uaddw2 and one uaddw for the function uadd()
+ usubw2 and one usubw for the function usubtract() */
+
+/* { dg-final { scan-assembler-times "\[ \t\]ssubw2\[ \t\]+" 1 } } */
+/* { dg-final { scan-assembler-times "\[ \t\]ssubw\[ \t\]+" 1 } } */
+/* { dg-final { scan-assembler-times "\[ \t\]saddw2\[ \t\]+" 1 } } */
+/* { dg-final { scan-assembler-times "\[ \t\]saddw\[ \t\]+" 1 } } */
+/* { dg-final { scan-assembler-times "\[ \t\]usubw2\[ \t\]+" 1 } } */
+/* { dg-final { scan-assembler-times "\[ \t\]usubw\[ \t\]+" 1 } } */
+/* { dg-final { scan-assembler-times "\[ \t\]uaddw2\[ \t\]+" 1 } } */
+/* { dg-final { scan-assembler-times "\[ \t\]uaddw\[ \t\]+" 1 } } */