luoxhu <luo...@linux.ibm.com> writes:
> This "subtract/extend/add" existed for a long time and still annoying us
> (PR37451, part of PR61837) when converting from 32bits to 64bits, as the ctr
> register is used as 64bits on powerpc64, Andraw Pinski had a patch but
> caused some issue and reverted by Joseph S. Myers(PR37451, PR37782).
>
> Andraw:
> http://gcc.gnu.org/ml/gcc-patches/2008-09/msg01070.html
> http://gcc.gnu.org/ml/gcc-patches/2008-10/msg01321.html
> Joseph:
> https://gcc.gnu.org/legacy-ml/gcc-patches/2011-11/msg02405.html
>
> We still can do the simplification from "subtract/zero_ext/add" to "zero_ext"
> when loop iterations is known to be LT than MODE_MAX (only do simplify
> when counter+0x1 NOT overflow).
>
> Bootstrap and regression tested pass on Power8-LE.
>
> gcc/ChangeLog
>
>       2020-05-14  Xiong Hu Luo  <luo...@linux.ibm.com>
>
>       PR rtl-optimization/37451, part of PR target/61837
>       * loop-doloop.c (doloop_simplify_count): New function.  Simplify
>       (add -1; zero_ext; add +1) to zero_ext when not wrapping.
>       (doloop_modify): Call doloop_simplify_count.
>
> gcc/testsuite/ChangeLog
>
>       2020-05-14  Xiong Hu Luo  <luo...@linux.ibm.com>
>
>       PR rtl-optimization/37451, part of PR target/61837
>       * gcc.target/powerpc/doloop-2.c: New test.

OK, thanks.

Richard

> ---
>  gcc/loop-doloop.c                           | 38 ++++++++++++++++++++-
>  gcc/testsuite/gcc.target/powerpc/doloop-2.c | 29 ++++++++++++++++
>  2 files changed, 66 insertions(+), 1 deletion(-)
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/doloop-2.c
>
> diff --git a/gcc/loop-doloop.c b/gcc/loop-doloop.c
> index db6a014e43d..02282d45bd5 100644
> --- a/gcc/loop-doloop.c
> +++ b/gcc/loop-doloop.c
> @@ -397,6 +397,42 @@ add_test (rtx cond, edge *e, basic_block dest)
>    return true;
>  }
>  
> +/* Fold (add -1; zero_ext; add +1) operations to zero_ext if not wrapping. 
> i.e:
> +
> +   73: r145:SI=r123:DI#0-0x1
> +   74: r144:DI=zero_extend (r145:SI)
> +   75: r143:DI=r144:DI+0x1
> +   ...
> +   31: r135:CC=cmp (r123:DI,0)
> +   72: {pc={(r143:DI!=0x1)?L70:pc};r143:DI=r143:DI-0x1;...}
> +
> +   r123:DI#0-0x1 is param count derived from loop->niter_expr equal to 
> number of
> +   loop iterations, if loop iterations expression doesn't overflow, then
> +   (zero_extend (r123:DI#0-1))+1 can be simplified to zero_extend.  */
> +
> +static rtx
> +doloop_simplify_count (class loop *loop, scalar_int_mode mode, rtx count)
> +{
> +  widest_int iterations;
> +  if (GET_CODE (count) == ZERO_EXTEND)
> +    {
> +      rtx extop0 = XEXP (count, 0);
> +      if (GET_CODE (extop0) == PLUS)
> +     {
> +       rtx addop0 = XEXP (extop0, 0);
> +       rtx addop1 = XEXP (extop0, 1);
> +
> +       if (get_max_loop_iterations (loop, &iterations)
> +           && wi::ltu_p (iterations, GET_MODE_MASK (GET_MODE (addop0)))
> +           && addop1 == constm1_rtx)
> +         return simplify_gen_unary (ZERO_EXTEND, mode, addop0,
> +                                    GET_MODE (addop0));
> +     }
> +    }
> +
> +  return simplify_gen_binary (PLUS, mode, count, const1_rtx);
> +}
> +
>  /* Modify the loop to use the low-overhead looping insn where LOOP
>     describes the loop, DESC describes the number of iterations of the
>     loop, and DOLOOP_INSN is the low-overhead looping insn to emit at the
> @@ -477,7 +513,7 @@ doloop_modify (class loop *loop, class niter_desc *desc,
>      }
>  
>    if (increment_count)
> -    count = simplify_gen_binary (PLUS, mode, count, const1_rtx);
> +    count = doloop_simplify_count (loop, mode, count);
>  
>    /* Insert initialization of the count register into the loop header.  */
>    start_sequence ();
> diff --git a/gcc/testsuite/gcc.target/powerpc/doloop-2.c 
> b/gcc/testsuite/gcc.target/powerpc/doloop-2.c
> new file mode 100644
> index 00000000000..3199fe56d35
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/doloop-2.c
> @@ -0,0 +1,29 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fno-unroll-loops" } */
> +
> +unsigned int
> +foo1 (unsigned int l, int *a)
> +{
> +  unsigned int i;
> +  for(i = 0;i < l; i++)
> +    a[i] = i;
> +  return l;
> +}
> +
> +int
> +foo2 (int l, int *a)
> +{
> +  int i;
> +  for(i = 0;i < l; i++)
> +    a[i] = i;
> +  return l;
> +}
> +
> +/* The place where we were getting an extra -1 is when converting from 32bits
> +   to 64bits as the ctr register is used as 64bits on powerpc64.  We should 
> be
> +   able to do this loop without "add -1/zero_ext/add 1" to the l to get the
> +   number of iterations of this loop still doing a do-loop.  */
> +
> +/* { dg-final { scan-assembler-not {(?n)\maddi .*,.*,-1$} } } */
> +/* { dg-final { scan-assembler-times "bdnz" 2 } } */
> +/* { dg-final { scan-assembler-times "mtctr" 2 } } */

Reply via email to