On Tue, Mar 02, 2021 at 08:08:14AM +0100, Andreas Krebbel wrote:
> On 3/1/21 5:00 PM, Stefan Schulze Frielinghaus wrote:
> > As of commit 3a6e3ad38a17a03ee0139b49a0946e7b9ded1eb1 expressions
> > x CMP y ? -1 : 0 are fold into x CMP y.  Due to this we do not see
> > shifts anymore after expand in our testcases but comparisons.  Thus
> > replace instructions vesraX by corresponding vchX.  Keep testcases
> > vchX_{lt,gt} where only a relational comparison is done and no shift in
> > order to keep test coverage for vectorization.
> 
> The vcond-shift optimization verified by the testcase is currently 
> implemented in s390_expand_vcond
> but due to the common code change we go the vec_cmp route now. So we probably 
> should do the same
> also in s390_expand_vec_compare now. Perhaps like this ... it appears to fix 
> the testcase for me:
> 
> diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
> index 9d2cee950d0b..9d9f5a0f6f4e 100644
> --- a/gcc/config/s390/s390.c
> +++ b/gcc/config/s390/s390.c
> @@ -6562,6 +6562,7 @@ s390_expand_vec_compare (rtx target, enum rtx_code cond,
> 
>    if (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_VECTOR_FLOAT)
>      {
> +      cmp_op2 = force_operand (cmp_op2, 0);
>        switch (cond)
>         {
>           /* NE a != b -> !(a == b) */
> @@ -6600,6 +6601,19 @@ s390_expand_vec_compare (rtx target, enum rtx_code 
> cond,
>      }
>    else
>      {
> +      /* Turn x < 0 into x >> (bits - )  */
> +      if (cond == LT && cmp_op2 == CONST0_RTX (mode))
> +       {
> +         int shift = GET_MODE_BITSIZE (GET_MODE_INNER (mode)) - 1;
> +         rtx res = expand_simple_binop (mode, ASHIFTRT, cmp_op1,
> +                                        GEN_INT (shift), target,
> +                                        0, OPTAB_DIRECT);
> +         if (res != target)
> +           emit_move_insn (target, res);
> +         return;
> +       }
> +      cmp_op2 = force_operand (cmp_op2, 0);
> +
>        switch (cond)
>         {
>           /* NE: a != b -> !(a == b) */
> diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
> index bc52211c55e5..c80d582a300d 100644
> --- a/gcc/config/s390/vector.md
> +++ b/gcc/config/s390/vector.md
> @@ -1589,7 +1589,7 @@
>    [(set (match_operand:<TOINTVEC>  0 "register_operand" "")
>         (match_operator:<TOINTVEC> 1 "vcond_comparison_operator"
>           [(match_operand:V_HW     2 "register_operand" "")
> -          (match_operand:V_HW     3 "register_operand" "")]))]
> +          (match_operand:V_HW     3 "nonmemory_operand" "")]))]
>    "TARGET_VX"
>  {
>    s390_expand_vec_compare (operands[0], GET_CODE(operands[1]), operands[2], 
> operands[3]);

Sounds great to me.  Also eliminates the extra vzero :)

Cheers,
Stefan

> 
> Andreas
> 
> 
> > 
> > gcc/testsuite/ChangeLog:
> > 
> >     * gcc.target/s390/vector/vcond-shift.c: Replace vesraX
> >     instructions by corresponding vchX instructions.
> > ---
> >  .../gcc.target/s390/vector/vcond-shift.c      | 31 ++++++++++---------
> >  1 file changed, 17 insertions(+), 14 deletions(-)
> > 
> > diff --git a/gcc/testsuite/gcc.target/s390/vector/vcond-shift.c 
> > b/gcc/testsuite/gcc.target/s390/vector/vcond-shift.c
> > index a6b4e97aa50..9e472aef960 100644
> > --- a/gcc/testsuite/gcc.target/s390/vector/vcond-shift.c
> > +++ b/gcc/testsuite/gcc.target/s390/vector/vcond-shift.c
> > @@ -3,10 +3,13 @@
> >  /* { dg-do compile { target { s390*-*-* } } } */
> >  /* { dg-options "-O3 -march=z13 -mzarch" } */
> >  
> > -/* { dg-final { scan-assembler-times "vesraf\t%v.?,%v.?,31" 6 } } */
> > -/* { dg-final { scan-assembler-times "vesrah\t%v.?,%v.?,15" 6 } } */
> > -/* { dg-final { scan-assembler-times "vesrab\t%v.?,%v.?,7" 6 } } */
> > -/* { dg-final { scan-assembler-not "vzero\t*" } } */
> > +/* { dg-final { scan-assembler-times "vzero\t" 9 } } */
> > +/* { dg-final { scan-assembler-times "vchf\t" 6 } } */
> > +/* { dg-final { scan-assembler-times "vesraf\t%v.?,%v.?,1" 2 } } */
> > +/* { dg-final { scan-assembler-times "vchh\t" 6 } } */
> > +/* { dg-final { scan-assembler-times "vesrah\t%v.?,%v.?,1" 2 } } */
> > +/* { dg-final { scan-assembler-times "vchb\t" 6 } } */
> > +/* { dg-final { scan-assembler-times "vesrab\t%v.?,%v.?,1" 2 } } */
> >  /* { dg-final { scan-assembler-times "vesrlf\t%v.?,%v.?,31" 4 } } */
> >  /* { dg-final { scan-assembler-times "vesrlh\t%v.?,%v.?,15" 4 } } */
> >  /* { dg-final { scan-assembler-times "vesrlb\t%v.?,%v.?,7" 4 } } */
> > @@ -15,19 +18,19 @@
> >  #define ITER(X) (2 * (16 / sizeof (X[1])))
> >  
> >  void
> > -vesraf_div (int *x)
> > +vchf_vesraf_div (int *x)
> >  {
> >    int i;
> >    int *xx = __builtin_assume_aligned (x, 8);
> >  
> >    /* Should expand to (xx + (xx < 0 ? 1 : 0)) >> 1
> > -     which in turn should get simplified to (xx + (xx >> 31)) >> 1.  */
> > +     which in turn should get simplified to (xx - (xx < 0)) >> 1.  */
> >    for (i = 0; i < ITER (xx); i++)
> >      xx[i] = xx[i] / 2;
> >  }
> >  
> >  void
> > -vesrah_div (short *x)
> > +vchh_vesrah_div (short *x)
> >  {
> >    int i;
> >    short *xx = __builtin_assume_aligned (x, 8);
> > @@ -38,7 +41,7 @@ vesrah_div (short *x)
> >  
> >  
> >  void
> > -vesrab_div (signed char *x)
> > +vchb_vesrab_div (signed char *x)
> >  {
> >    int i;
> >    signed char *xx = __builtin_assume_aligned (x, 8);
> > @@ -50,7 +53,7 @@ vesrab_div (signed char *x)
> >  
> >  
> >  int
> > -vesraf_lt (int *x)
> > +vchf_lt (int *x)
> >  {
> >    int i;
> >    int *xx = __builtin_assume_aligned (x, 8);
> > @@ -60,7 +63,7 @@ vesraf_lt (int *x)
> >  }
> >  
> >  int
> > -vesrah_lt (short *x)
> > +vchh_lt (short *x)
> >  {
> >    int i;
> >    short *xx = __builtin_assume_aligned (x, 8);
> > @@ -70,7 +73,7 @@ vesrah_lt (short *x)
> >  }
> >  
> >  int
> > -vesrab_lt (signed char *x)
> > +vchb_lt (signed char *x)
> >  {
> >    int i;
> >    signed char *xx = __builtin_assume_aligned (x, 8);
> > @@ -82,7 +85,7 @@ vesrab_lt (signed char *x)
> >  
> >  
> >  int
> > -vesraf_ge (int *x)
> > +vchf_ge (int *x)
> >  {
> >    int i;
> >    int *xx = __builtin_assume_aligned (x, 8);
> > @@ -92,7 +95,7 @@ vesraf_ge (int *x)
> >  }
> >  
> >  int
> > -vesrah_ge (short *x)
> > +vchh_ge (short *x)
> >  {
> >    int i;
> >    short *xx = __builtin_assume_aligned (x, 8);
> > @@ -102,7 +105,7 @@ vesrah_ge (short *x)
> >  }
> >  
> >  int
> > -vesrab_ge (signed char *x)
> > +vchb_ge (signed char *x)
> >  {
> >    int i;
> >    signed char *xx = __builtin_assume_aligned (x, 8);
> > 
> 

Reply via email to