Hi Kyrill,
> It's been a while, but I believe you had the following comment about
> implementing CSEL:
>
>> (define_insn_and_split "*thumb2_movsicc_insn"
>> [(set (match_operand:SI 0 "s_register_operand" "=l,l,r,r,r,r,r,r,r,r,r,r")
>> (if_then_else:SI
>> @@ -449,17 +473,14 @@
>> it\\t%d3\;mvn%d3\\t%0, #%B1
>> #
>> #
>> - #
>> - #
>> - #
>> + ite\\t%d3\;mov%d3\\t%0, %1\;mvn%D3\\t%0, #%B2
>> + ite\\t%d3\;mvn%d3\\t%0, #%B1\;mov%D3\\t%0, %2
>> + ite\\t%d3\;mvn%d3\\t%0, #%B1\;mvn%D3\\t%0, #%B2
>> #"
>> ; alt 6: ite\\t%d3\;mov%d3\\t%0, %1\;mov%D3\\t%0, %2
>> ; alt 7: ite\\t%d3\;mov%d3\\t%0, %1\;mov%D3\\t%0, %2
>> - ; alt 8: ite\\t%d3\;mov%d3\\t%0, %1\;mvn%D3\\t%0, #%B2
>> - ; alt 9: ite\\t%d3\;mvn%d3\\t%0, #%B1\;mov%D3\\t%0, %2
>> - ; alt 10: ite\\t%d3\;mvn%d3\\t%0, #%B1\;mvn%D3\\t%0, #%B2
>> ; alt 11: ite\\t%d3\;mov%d3\\t%0, %1\;mov%D3\\t%0, %2
>> - "&& reload_completed"
>> + "&& reload_completed && !TARGET_COND_ARITH"
>>
>> Hmm... I think the approach makes sense, but I'd rather we left the
>> alternatives as '#' and refine the condition so that in the
>> TARGET_COND_ARITH case we split in precisely the cases where the
>> TARGET_COND_ARITH can't handle the operands.
>> I appreciate that would complicate this condition somewhat, but it would
>> have the benefit of expressing the RTL structure to allow for further
>> optimisation.
>
> I've made the changes you suggested, let me know if it's good to commit.
>
> (define_insn_and_split "*thumb2_movsicc_insn"
> [(set (match_operand:SI 0 "s_register_operand" "=l,l,r,r,r,r,r,r,r,r,r,r")
> (if_then_else:SI
> @@ -459,7 +483,9 @@
> ; alt 9: ite\\t%d3\;mvn%d3\\t%0, #%B1\;mov%D3\\t%0, %2
> ; alt 10: ite\\t%d3\;mvn%d3\\t%0, #%B1\;mvn%D3\\t%0, #%B2
> ; alt 11: ite\\t%d3\;mov%d3\\t%0, %1\;mov%D3\\t%0, %2
> - "&& reload_completed"
> + ; Conditional arithmetic (csel etc.) can handle all alternatives except
> 8-10
> + "&& reload_completed && (!TARGET_COND_ARITH ||
> + (which_alternative >= 8 && which_alternative <=
> 10))"
> [(const_int 0)]
> {
> enum rtx_code rev_code;
Here's an alternative implementation that doesn't use which_alternative.
Thanks,
Omar
--
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index
950e46edfeeee1b851b8968cbcf071564416dbf6..b8dd6af50a842c924996d528e95ce9873dcb913a
100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -9760,7 +9760,7 @@
[(match_operand:SI 2 "s_register_operand" "r,r")
(match_operand:SI 3 "arm_add_operand" "rI,L")]))
(clobber (reg:CC CC_REGNUM))]
- "TARGET_32BIT"
+ "TARGET_32BIT && !TARGET_COND_ARITH"
"#"
"&& reload_completed"
[(set (reg:CC CC_REGNUM) (compare:CC (match_dup 2) (match_dup 3)))
diff --git a/gcc/config/arm/constraints.md b/gcc/config/arm/constraints.md
index
011badc9957655a0fba67946c1db6fa6334b2bbb..57db29f92f4caee4c9384a9740e79dba2217144a
100644
--- a/gcc/config/arm/constraints.md
+++ b/gcc/config/arm/constraints.md
@@ -36,7 +36,7 @@
;; in Thumb-1 state: Pa, Pb, Pc, Pd, Pe
;; in Thumb-2 state: Ha, Pj, PJ, Ps, Pt, Pu, Pv, Pw, Px, Py, Pz, Rd, Rf, Rb, Ra,
;; Rg, Ri
-;; in all states: Pf, Pg
+;; in all states: Pf, Pg, UM, U1
;; The following memory constraints have been used:
;; in ARM/Thumb-2 state: Uh, Ut, Uv, Uy, Un, Um, Us, Up, Uf, Ux, Ul
@@ -479,6 +479,16 @@
(and (match_code "mem")
(match_test "TARGET_32BIT && neon_vector_mem_operand (op, 1, true)")))
+(define_constraint "UM"
+ "@internal
+ A constraint that matches the immediate constant -1."
+ (match_test "op == constm1_rtx"))
+
+(define_constraint "U1"
+ "@internal
+ A constraint that matches the immediate constant +1."
+ (match_test "op == const1_rtx"))
+
(define_memory_constraint "Ux"
"@internal
In ARM/Thumb-2 state a valid address and load into CORE regs or only to
diff --git a/gcc/config/arm/predicates.md b/gcc/config/arm/predicates.md
index
2144520829cc4a28cd7ac1ef528ecd54f0af13c1..5d75341c9efe82dcda27daa74d2b22c52065dd02
100644
--- a/gcc/config/arm/predicates.md
+++ b/gcc/config/arm/predicates.md
@@ -454,6 +454,13 @@
&& arm_general_register_operand (op,
GET_MODE (op))")
(match_test "satisfies_constraint_Pg (op)")))
+(define_predicate "arm_reg_or_m1_or_1_or_zero"
+ (and (match_code "reg,subreg,const_int")
+ (ior (match_operand 0 "arm_general_register_operand")
+ (match_test "op == constm1_rtx")
+ (match_test "op == const1_rtx")
+ (match_test "op == const0_rtx"))))
+
;; True for MULT, to identify which variant of shift_operator is in use.
(define_special_predicate "mult_operator"
(match_code "mult"))
diff --git a/gcc/config/arm/thumb2.md b/gcc/config/arm/thumb2.md
index
69460f3665b0bc7f47c307aa4ae789bab6a94f92..f15f99903dad36e53d9af664c5b3a5e1ebe3b78b
100644
--- a/gcc/config/arm/thumb2.md
+++ b/gcc/config/arm/thumb2.md
@@ -432,6 +432,30 @@
(set_attr "type" "multiple")]
)
+(define_insn "*cmovsi_insn"
+ [(set (match_operand:SI 0 "arm_general_register_operand"
"=r,r,r,r,r,r,r,r,r")
+ (if_then_else:SI
+ (match_operator 1 "arm_comparison_operator"
+ [(match_operand 2 "cc_register" "") (const_int 0)])
+ (match_operand:SI 3 "arm_reg_or_m1_or_1_or_zero" "r, r,UM,
r,U1,U1,Pz,UM,Pz")
+ (match_operand:SI 4 "arm_reg_or_m1_or_1_or_zero" "r,UM, r,U1,
r,Pz,U1,Pz,UM")))]
+ "TARGET_THUMB2 && TARGET_COND_ARITH
+ && (!((operands[3] == const1_rtx && operands[4] == constm1_rtx)
+ || (operands[3] == constm1_rtx && operands[4] == const1_rtx)))"
+ "@
+ csel\\t%0, %3, %4, %d1
+ csinv\\t%0, %3, zr, %d1
+ csinv\\t%0, %4, zr, %D1
+ csinc\\t%0, %3, zr, %d1
+ csinc\\t%0, %4, zr, %D1
+ cset\\t%0, %d1
+ cset\\t%0, %D1
+ csetm\\t%0, %d1
+ csetm\\t%0, %D1"
+ [(set_attr "type" "csel")
+ (set_attr "predicable" "no")]
+)
+
(define_insn_and_split "*thumb2_movsicc_insn"
[(set (match_operand:SI 0 "s_register_operand" "=l,l,r,r,r,r,r,r,r,r,r,r")
(if_then_else:SI
@@ -459,7 +483,14 @@
; alt 9: ite\\t%d3\;mvn%d3\\t%0, #%B1\;mov%D3\\t%0, %2
; alt 10: ite\\t%d3\;mvn%d3\\t%0, #%B1\;mvn%D3\\t%0, #%B2
; alt 11: ite\\t%d3\;mov%d3\\t%0, %1\;mov%D3\\t%0, %2
- "&& reload_completed"
+ ; Conditional arithmetic (csel etc.) can handle all alternatives except 8-10
+ "&& reload_completed && (!TARGET_COND_ARITH ||
+ (satisfies_constraint_K (operands[1]) &&
+ !satisfies_constraint_I (operands[1]) &&
+ operands[2] != const0_rtx) ||
+ (satisfies_constraint_K (operands[2]) &&
+ !satisfies_constraint_I (operands[2]) &&
+ operands[1] != const0_rtx))"
[(const_int 0)]
{
enum rtx_code rev_code;
diff --git a/gcc/testsuite/gcc.target/arm/csel.c
b/gcc/testsuite/gcc.target/arm/csel.c
new file mode 100644
index
0000000000000000000000000000000000000000..79a4c161eb52b2986c2c2990d2dda3d8c3628782
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/csel.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_arch_v8_1m_main_ok } */
+/* { dg-options "-O2 -march=armv8.1-m.main" } */
+
+int
+test_csel32_condasn(int w0, int w1, int w2, int w3)
+{
+ int w4;
+
+ /* { dg-final { scan-assembler "csel\tr\[0-9\]*.*eq" } } */
+ w4 = (w0 == w1) ? w2 : w3;
+ return w4;
+}
diff --git a/gcc/testsuite/gcc.target/arm/cset.c
b/gcc/testsuite/gcc.target/arm/cset.c
new file mode 100644
index
0000000000000000000000000000000000000000..e63b7b5041ece7905306876c2c6f9f2f95964951
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cset.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_arch_v8_1m_main_ok } */
+/* { dg-options "-O2 -march=armv8.1-m.main" } */
+
+int
+test_cset32_condasn1(int w0, int w1)
+{
+ int w2;
+
+ /* { dg-final { scan-assembler "cset\tr\[0-9\]*.*eq" } } */
+ w2 = (w0 == w1) ? 1 : 0;
+ return w2;
+}
+
+int
+test_cset32_condasn2(int w0, int w1)
+{
+ int w2;
+
+ /* { dg-final { scan-assembler "cset\tr\[0-9\]*.*ne" } } */
+ w2 = (w0 == w1) ? 0 : 1;
+ return w2;
+}
diff --git a/gcc/testsuite/gcc.target/arm/csetm.c
b/gcc/testsuite/gcc.target/arm/csetm.c
new file mode 100644
index
0000000000000000000000000000000000000000..c04520c2f6514850b299208a477893ee40a02aca
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/csetm.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_arch_v8_1m_main_ok } */
+/* { dg-options "-O2 -march=armv8.1-m.main" } */
+
+int
+test_csetm32_condasn1(int w0, int w1)
+{
+ int w2;
+
+ /* { dg-final { scan-assembler "csetm\tr\[0-9\]*.*eq" } } */
+ w2 = (w0 == w1) ? -1 : 0;
+ return w2;
+}
+
+int
+test_csetm32_condasn2(int w0, int w1)
+{
+ int w2;
+
+ /* { dg-final { scan-assembler "csetm\tr\[0-9\]*.*ne" } } */
+ w2 = (w0 == w1) ? 0 : -1;
+ return w2;
+}
diff --git a/gcc/testsuite/gcc.target/arm/csinc-2.c
b/gcc/testsuite/gcc.target/arm/csinc-2.c
new file mode 100644
index
0000000000000000000000000000000000000000..45e3815eb0c4f2d252e7f0326728dbd0f7debd86
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/csinc-2.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_arch_v8_1m_main_ok } */
+/* { dg-options "-O2 -march=armv8.1-m.main" } */
+
+int
+test_csinc32_condasn1(int w0, int w1, int w2)
+{
+ int w3;
+
+ /* { dg-final { scan-assembler "csinc\tr\[0-9\]*.*zr.*eq" } } */
+ w3 = (w0 == w1) ? w2 : 1;
+ return w3;
+}
+
+int
+test_csinc32_condasn2(int w0, int w1, int w2)
+{
+ int w3;
+
+ /* { dg-final { scan-assembler "csinc\tr\[0-9\]*.*zr.*ne" } } */
+ w3 = (w0 == w1) ? 1 : w2;
+ return w3;
+}
diff --git a/gcc/testsuite/gcc.target/arm/csinv-2.c
b/gcc/testsuite/gcc.target/arm/csinv-2.c
new file mode 100644
index
0000000000000000000000000000000000000000..d55de1b5a3342128cfcb25f48361541d0ae38c06
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/csinv-2.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_arch_v8_1m_main_ok } */
+/* { dg-options "-O2 -march=armv8.1-m.main" } */
+
+int
+test_csinv32_condasn1(int w0, int w1, int w2)
+{
+ int w3;
+
+ /* { dg-final { scan-assembler "csinv\tr\[0-9\]*.*zr.*eq" } } */
+ w3 = (w0 == w1) ? w2 : -1;
+ return w3;
+}
+
+int
+test_csinv32_condasn2(int w0, int w1, int w2)
+{
+ int w3;
+
+ /* { dg-final { scan-assembler "csinv\tr\[0-9\]*.*zr.*ne" } } */
+ w3 = (w0 == w1) ? -1 : w2;
+ return w3;
+}
rb13325(1).patch
Description: rb13325(1).patch
