In the example
void f1 ()
{
for (int i = 0; i < N; i++)
{
b[i] += a[i];
if (a[i] > 0)
break;
}
}
when compiled for SVE we generate:
ld1w z28.s, p7/z, [x4, x0, lsl 2]
cmpgt p14.s, p7/z, z28.s, #0
ptest p15, p14.b
b.none .L3
Where the ptest isn't needed since the branch only cares about the Z and NZ
flags.
GCC Today supports eliding this through the pattern *cmp<cmp_op><mode>_ptest
however this pattern only supports the removal when the outermost context is a
CMP where the predicate is inside the condition itself.
This typically only happens for an unpredicated CMP as a ptrue will be generated
during expand.
In the case about at the GIMPLE level we have
mask_patt_14.15_57 = vect__2.11_52 > { 0, ... };
vec_mask_and_58 = loop_mask_48 & mask_patt_14.15_57;
if (vec_mask_and_58 != { 0, ... })
goto <bb 5>; [5.50%]
else
goto <bb 6>; [94.50%]
where the loop mask is applied to the compare as an AND.
The loop mask is moved into the compare by the pattern *cmp<cmp_op><mode>_and
which moves the mask inside if the current mask is a ptrue since
p && true -> p.
However this happens after combine, and so we can't both move the predicate
inside AND eliminate the ptests.
To fix this this patch adds a new pattern *cmp<cmp_op><mode>_and_ptest which
combines these two patterns together allowing us to both push the predicate
inside and eliminate the ptest.
After this patch we generate
ld1w z28.s, p7/z, [x4, x0, lsl 2]
cmpgt p14.s, p7/z, z28.s, #0
b.none .L3
Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
Ok for master?
Thanks,
Tamar
gcc/ChangeLog:
PR target/118974
* config/aarch64/aarch64-sve.md (*cmp<cmp_op><mode>_and_ptest): New.
gcc/testsuite/ChangeLog:
PR target/118974
* gcc.target/aarch64/sve/pr119351.c: Update codegen.
* gcc.target/aarch64/sve/vect-early-break-cbranch.c: Likewise.
---
diff --git a/gcc/config/aarch64/aarch64-sve.md
b/gcc/config/aarch64/aarch64-sve.md
index
bf7569f932b6d7392b9c4fb7b94efafb6fd184c2..fe7f52ee1ed400b4eda28e3f90edc0044a5aa7a9
100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -8319,6 +8319,40 @@ (define_insn_and_rewrite "*cmp<cmp_op><mode>_ptest"
}
)
+;; Predicated integer comparisons, formed by combining a PTRUE-predicated
+;; comparison with an AND in which only the flags result is interesting.
+(define_insn_and_rewrite "*cmp<cmp_op><mode>_and_ptest"
+ [(set (reg:CC_NZC CC_REGNUM)
+ (unspec:CC_NZC
+ [(match_operand:VNx16BI 1 "register_operand")
+ (match_operand 4)
+ (const_int SVE_KNOWN_PTRUE)
+ (and:<VPRED>
+ (unspec:<VPRED>
+ [(match_operand 5)
+ (const_int SVE_KNOWN_PTRUE)
+ (SVE_INT_CMP:<VPRED>
+ (match_operand:SVE_I 2 "register_operand")
+ (match_operand:SVE_I 3
"aarch64_sve_cmp_<sve_imm_con>_operand"))]
+ UNSPEC_PRED_Z)
+ (match_operand:<VPRED> 6 "register_operand"))]
+ UNSPEC_PTEST))
+ (clobber (match_scratch:<VPRED> 0))]
+ "TARGET_SVE"
+ {@ [ cons: =0, 1 , 2 , 3 ; attrs: pred_clobber ]
+ [ &Upa , Upl, w , <sve_imm_con>; yes ]
cmp<cmp_op>\t%0.<Vetype>, %6/z, %2.<Vetype>, #%3
+ [ ?Upl , 0 , w , <sve_imm_con>; yes ] ^
+ [ Upa , Upl, w , <sve_imm_con>; no ] ^
+ [ &Upa , Upl, w , w ; yes ]
cmp<cmp_op>\t%0.<Vetype>, %6/z, %2.<Vetype>, %3.<Vetype>
+ [ ?Upl , 0 , w , w ; yes ] ^
+ [ Upa , Upl, w , w ; no ] ^
+ }
+ "&& !rtx_equal_p (operands[4], operands[5])"
+ {
+ operands[5] = copy_rtx (operands[4]);
+ }
+)
+
;; Predicated integer comparisons, formed by combining a PTRUE-predicated
;; comparison with an AND. Split the instruction into its preferred form
;; at the earliest opportunity, in order to get rid of the redundant
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr119351.c
b/gcc/testsuite/gcc.target/aarch64/sve/pr119351.c
index
85aab355f95f83e1fa65d280f14fb8ade7f7e658..1ebc735a82f4a59d8eccff39346e46a449b4729a
100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/pr119351.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr119351.c
@@ -14,7 +14,6 @@ int x[N] __attribute__((aligned(32)));
** ...
** ld1w z[0-9]+.s, p[0-9]+/z, \[x[0-9], x[0-9], lsl 2\]
** cmple p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0
-** ptest p[0-9]+, p[0-9]+.b
** ...
*/
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch.c
b/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch.c
index
d7cef1105410be04ed67d1d3b800746267f205a8..8bd6fafc4d4248cf0acf7dfa2f07cd005f13de35
100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch.c
@@ -8,7 +8,6 @@ int b[N] = {0};
** f1:
** ...
** cmpgt p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0
-** ptest p[0-9]+, p[0-9]+.b
** b.(any|none) \.L[0-9]+
** ...
*/
@@ -25,7 +24,6 @@ void f1 ()
** f2:
** ...
** cmpge p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0
-** ptest p[0-9]+, p[0-9]+.b
** b.(any|none) \.L[0-9]+
** ...
*/
@@ -42,7 +40,6 @@ void f2 ()
** f3:
** ...
** cmpeq p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0
-** ptest p[0-9]+, p[0-9]+.b
** b.(any|none) \.L[0-9]+
** ...
*/
@@ -59,7 +56,6 @@ void f3 ()
** f4:
** ...
** cmpne p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0
-** ptest p[0-9]+, p[0-9]+.b
** b.(any|none) \.L[0-9]+
** ...
*/
@@ -76,7 +72,6 @@ void f4 ()
** f5:
** ...
** cmplt p[0-9]+.s, p7/z, z[0-9]+.s, #0
-** ptest p[0-9]+, p[0-9]+.b
** b.(any|none) .L[0-9]+
** ...
*/
@@ -93,7 +88,6 @@ void f5 ()
** f6:
** ...
** cmple p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0
-** ptest p[0-9]+, p[0-9]+.b
** b.(any|none) \.L[0-9]+
** ...
*/
--
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
index bf7569f932b6d7392b9c4fb7b94efafb6fd184c2..fe7f52ee1ed400b4eda28e3f90edc0044a5aa7a9 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -8319,6 +8319,40 @@ (define_insn_and_rewrite "*cmp<cmp_op><mode>_ptest"
}
)
+;; Predicated integer comparisons, formed by combining a PTRUE-predicated
+;; comparison with an AND in which only the flags result is interesting.
+(define_insn_and_rewrite "*cmp<cmp_op><mode>_and_ptest"
+ [(set (reg:CC_NZC CC_REGNUM)
+ (unspec:CC_NZC
+ [(match_operand:VNx16BI 1 "register_operand")
+ (match_operand 4)
+ (const_int SVE_KNOWN_PTRUE)
+ (and:<VPRED>
+ (unspec:<VPRED>
+ [(match_operand 5)
+ (const_int SVE_KNOWN_PTRUE)
+ (SVE_INT_CMP:<VPRED>
+ (match_operand:SVE_I 2 "register_operand")
+ (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand"))]
+ UNSPEC_PRED_Z)
+ (match_operand:<VPRED> 6 "register_operand"))]
+ UNSPEC_PTEST))
+ (clobber (match_scratch:<VPRED> 0))]
+ "TARGET_SVE"
+ {@ [ cons: =0, 1 , 2 , 3 ; attrs: pred_clobber ]
+ [ &Upa , Upl, w , <sve_imm_con>; yes ] cmp<cmp_op>\t%0.<Vetype>, %6/z, %2.<Vetype>, #%3
+ [ ?Upl , 0 , w , <sve_imm_con>; yes ] ^
+ [ Upa , Upl, w , <sve_imm_con>; no ] ^
+ [ &Upa , Upl, w , w ; yes ] cmp<cmp_op>\t%0.<Vetype>, %6/z, %2.<Vetype>, %3.<Vetype>
+ [ ?Upl , 0 , w , w ; yes ] ^
+ [ Upa , Upl, w , w ; no ] ^
+ }
+ "&& !rtx_equal_p (operands[4], operands[5])"
+ {
+ operands[5] = copy_rtx (operands[4]);
+ }
+)
+
;; Predicated integer comparisons, formed by combining a PTRUE-predicated
;; comparison with an AND. Split the instruction into its preferred form
;; at the earliest opportunity, in order to get rid of the redundant
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr119351.c b/gcc/testsuite/gcc.target/aarch64/sve/pr119351.c
index 85aab355f95f83e1fa65d280f14fb8ade7f7e658..1ebc735a82f4a59d8eccff39346e46a449b4729a 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/pr119351.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr119351.c
@@ -14,7 +14,6 @@ int x[N] __attribute__((aligned(32)));
** ...
** ld1w z[0-9]+.s, p[0-9]+/z, \[x[0-9], x[0-9], lsl 2\]
** cmple p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0
-** ptest p[0-9]+, p[0-9]+.b
** ...
*/
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch.c b/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch.c
index d7cef1105410be04ed67d1d3b800746267f205a8..8bd6fafc4d4248cf0acf7dfa2f07cd005f13de35 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch.c
@@ -8,7 +8,6 @@ int b[N] = {0};
** f1:
** ...
** cmpgt p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0
-** ptest p[0-9]+, p[0-9]+.b
** b.(any|none) \.L[0-9]+
** ...
*/
@@ -25,7 +24,6 @@ void f1 ()
** f2:
** ...
** cmpge p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0
-** ptest p[0-9]+, p[0-9]+.b
** b.(any|none) \.L[0-9]+
** ...
*/
@@ -42,7 +40,6 @@ void f2 ()
** f3:
** ...
** cmpeq p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0
-** ptest p[0-9]+, p[0-9]+.b
** b.(any|none) \.L[0-9]+
** ...
*/
@@ -59,7 +56,6 @@ void f3 ()
** f4:
** ...
** cmpne p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0
-** ptest p[0-9]+, p[0-9]+.b
** b.(any|none) \.L[0-9]+
** ...
*/
@@ -76,7 +72,6 @@ void f4 ()
** f5:
** ...
** cmplt p[0-9]+.s, p7/z, z[0-9]+.s, #0
-** ptest p[0-9]+, p[0-9]+.b
** b.(any|none) .L[0-9]+
** ...
*/
@@ -93,7 +88,6 @@ void f5 ()
** f6:
** ...
** cmple p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0
-** ptest p[0-9]+, p[0-9]+.b
** b.(any|none) \.L[0-9]+
** ...
*/