On Tue, Mar 02, 2021 at 08:08:14AM +0100, Andreas Krebbel wrote: > On 3/1/21 5:00 PM, Stefan Schulze Frielinghaus wrote: > > As of commit 3a6e3ad38a17a03ee0139b49a0946e7b9ded1eb1 expressions > > x CMP y ? -1 : 0 are fold into x CMP y. Due to this we do not see > > shifts anymore after expand in our testcases but comparisons. Thus > > replace instructions vesraX by corresponding vchX. Keep testcases > > vchX_{lt,gt} where only a relational comparison is done and no shift in > > order to keep test coverage for vectorization. > > The vcond-shift optimization verified by the testcase is currently > implemented in s390_expand_vcond > but due to the common code change we go the vec_cmp route now. So we probably > should do the same > also in s390_expand_vec_compare now. Perhaps like this ... it appears to fix > the testcase for me: > > diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c > index 9d2cee950d0b..9d9f5a0f6f4e 100644 > --- a/gcc/config/s390/s390.c > +++ b/gcc/config/s390/s390.c > @@ -6562,6 +6562,7 @@ s390_expand_vec_compare (rtx target, enum rtx_code cond, > > if (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_VECTOR_FLOAT) > { > + cmp_op2 = force_operand (cmp_op2, 0); > switch (cond) > { > /* NE a != b -> !(a == b) */ > @@ -6600,6 +6601,19 @@ s390_expand_vec_compare (rtx target, enum rtx_code > cond, > } > else > { > + /* Turn x < 0 into x >> (bits - ) */ > + if (cond == LT && cmp_op2 == CONST0_RTX (mode)) > + { > + int shift = GET_MODE_BITSIZE (GET_MODE_INNER (mode)) - 1; > + rtx res = expand_simple_binop (mode, ASHIFTRT, cmp_op1, > + GEN_INT (shift), target, > + 0, OPTAB_DIRECT); > + if (res != target) > + emit_move_insn (target, res); > + return; > + } > + cmp_op2 = force_operand (cmp_op2, 0); > + > switch (cond) > { > /* NE: a != b -> !(a == b) */ > diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md > index bc52211c55e5..c80d582a300d 100644 > --- a/gcc/config/s390/vector.md > +++ b/gcc/config/s390/vector.md > @@ -1589,7 +1589,7 @@ > [(set (match_operand:<TOINTVEC> 0 "register_operand" "") > (match_operator:<TOINTVEC> 1 "vcond_comparison_operator" > [(match_operand:V_HW 2 "register_operand" "") > - (match_operand:V_HW 3 "register_operand" "")]))] > + (match_operand:V_HW 3 "nonmemory_operand" "")]))] > "TARGET_VX" > { > s390_expand_vec_compare (operands[0], GET_CODE(operands[1]), operands[2], > operands[3]);
Sounds great to me. Also eliminates the extra vzero :) Cheers, Stefan > > Andreas > > > > > > gcc/testsuite/ChangeLog: > > > > * gcc.target/s390/vector/vcond-shift.c: Replace vesraX > > instructions by corresponding vchX instructions. > > --- > > .../gcc.target/s390/vector/vcond-shift.c | 31 ++++++++++--------- > > 1 file changed, 17 insertions(+), 14 deletions(-) > > > > diff --git a/gcc/testsuite/gcc.target/s390/vector/vcond-shift.c > > b/gcc/testsuite/gcc.target/s390/vector/vcond-shift.c > > index a6b4e97aa50..9e472aef960 100644 > > --- a/gcc/testsuite/gcc.target/s390/vector/vcond-shift.c > > +++ b/gcc/testsuite/gcc.target/s390/vector/vcond-shift.c > > @@ -3,10 +3,13 @@ > > /* { dg-do compile { target { s390*-*-* } } } */ > > /* { dg-options "-O3 -march=z13 -mzarch" } */ > > > > -/* { dg-final { scan-assembler-times "vesraf\t%v.?,%v.?,31" 6 } } */ > > -/* { dg-final { scan-assembler-times "vesrah\t%v.?,%v.?,15" 6 } } */ > > -/* { dg-final { scan-assembler-times "vesrab\t%v.?,%v.?,7" 6 } } */ > > -/* { dg-final { scan-assembler-not "vzero\t*" } } */ > > +/* { dg-final { scan-assembler-times "vzero\t" 9 } } */ > > +/* { dg-final { scan-assembler-times "vchf\t" 6 } } */ > > +/* { dg-final { scan-assembler-times "vesraf\t%v.?,%v.?,1" 2 } } */ > > +/* { dg-final { scan-assembler-times "vchh\t" 6 } } */ > > +/* { dg-final { scan-assembler-times "vesrah\t%v.?,%v.?,1" 2 } } */ > > +/* { dg-final { scan-assembler-times "vchb\t" 6 } } */ > > +/* { dg-final { scan-assembler-times "vesrab\t%v.?,%v.?,1" 2 } } */ > > /* { dg-final { scan-assembler-times "vesrlf\t%v.?,%v.?,31" 4 } } */ > > /* { dg-final { scan-assembler-times "vesrlh\t%v.?,%v.?,15" 4 } } */ > > /* { dg-final { scan-assembler-times "vesrlb\t%v.?,%v.?,7" 4 } } */ > > @@ -15,19 +18,19 @@ > > #define ITER(X) (2 * (16 / sizeof (X[1]))) > > > > void > > -vesraf_div (int *x) > > +vchf_vesraf_div (int *x) > > { > > int i; > > int *xx = __builtin_assume_aligned (x, 8); > > > > /* Should expand to (xx + (xx < 0 ? 1 : 0)) >> 1 > > - which in turn should get simplified to (xx + (xx >> 31)) >> 1. */ > > + which in turn should get simplified to (xx - (xx < 0)) >> 1. */ > > for (i = 0; i < ITER (xx); i++) > > xx[i] = xx[i] / 2; > > } > > > > void > > -vesrah_div (short *x) > > +vchh_vesrah_div (short *x) > > { > > int i; > > short *xx = __builtin_assume_aligned (x, 8); > > @@ -38,7 +41,7 @@ vesrah_div (short *x) > > > > > > void > > -vesrab_div (signed char *x) > > +vchb_vesrab_div (signed char *x) > > { > > int i; > > signed char *xx = __builtin_assume_aligned (x, 8); > > @@ -50,7 +53,7 @@ vesrab_div (signed char *x) > > > > > > int > > -vesraf_lt (int *x) > > +vchf_lt (int *x) > > { > > int i; > > int *xx = __builtin_assume_aligned (x, 8); > > @@ -60,7 +63,7 @@ vesraf_lt (int *x) > > } > > > > int > > -vesrah_lt (short *x) > > +vchh_lt (short *x) > > { > > int i; > > short *xx = __builtin_assume_aligned (x, 8); > > @@ -70,7 +73,7 @@ vesrah_lt (short *x) > > } > > > > int > > -vesrab_lt (signed char *x) > > +vchb_lt (signed char *x) > > { > > int i; > > signed char *xx = __builtin_assume_aligned (x, 8); > > @@ -82,7 +85,7 @@ vesrab_lt (signed char *x) > > > > > > int > > -vesraf_ge (int *x) > > +vchf_ge (int *x) > > { > > int i; > > int *xx = __builtin_assume_aligned (x, 8); > > @@ -92,7 +95,7 @@ vesraf_ge (int *x) > > } > > > > int > > -vesrah_ge (short *x) > > +vchh_ge (short *x) > > { > > int i; > > short *xx = __builtin_assume_aligned (x, 8); > > @@ -102,7 +105,7 @@ vesrah_ge (short *x) > > } > > > > int > > -vesrab_ge (signed char *x) > > +vchb_ge (signed char *x) > > { > > int i; > > signed char *xx = __builtin_assume_aligned (x, 8); > > >