Re: [PATCH] Canonicalize fancy ways of expressing blend operation into COND_EXPR (PR tree-optimization/92834)

Richard Biener Mon, 09 Dec 2019 01:08:29 -0800

On Fri, 6 Dec 2019, Jakub Jelinek wrote:

> Hi!
> 
> The following patch canonicalizes fancy ways of writing cond ? A : B
> into COND_EXPR, which is what we expect people writing and thus are able to
> optimize it better.  If in some case we wouldn't optimize it better,
> the right way would be improve the COND_EXPR handling, as that is what
> people are using in the wild most of the time.
> E.g. on the testcase in the patch on x86_64-linux with -O2, the difference
> is that test used to be 519 bytes long and now is 223, with -O2
> -march=skylake used to be the same 519 bytes long and now is 275 bytes (in
> that case it uses the SSE4.1+ min/max).
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?


OK.

Thanks,
Richard.

> 2019-12-06  Jakub Jelinek  <ja...@redhat.com>
> 
>       PR tree-optimization/92834
>       * match.pd (A - ((A - B) & -(C cmp D)) -> (C cmp D) ? B : A,
>       A + ((B - A) & -(C cmp D)) -> (C cmp D) ? B : A): New simplifications.
> 
>       * gcc.dg/tree-ssa/pr92834.c: New test.
> 
> --- gcc/match.pd.jj   2019-12-06 14:07:26.877749065 +0100
> +++ gcc/match.pd      2019-12-06 15:06:08.042953309 +0100
> @@ -2697,6 +2697,31 @@ (define_operator_list COND_TERNARY
>    (cmp (minmax @0 INTEGER_CST@1) INTEGER_CST@2)
>    (comb (cmp @0 @2) (cmp @1 @2))))
>  
> +/* Undo fancy way of writing max/min or other ?: expressions,
> +   like a - ((a - b) & -(a < b)), in this case into (a < b) ? b : a.
> +   People normally use ?: and that is what we actually try to optimize.  */
> +(for cmp (simple_comparison)
> + (simplify
> +  (minus @0 (bit_and:c (minus @0 @1)
> +                    (convert? (negate@4 (convert? (cmp@5 @2 @3))))))
> +  (if (INTEGRAL_TYPE_P (type)
> +       && INTEGRAL_TYPE_P (TREE_TYPE (@4))
> +       && TREE_CODE (TREE_TYPE (@4)) != BOOLEAN_TYPE
> +       && INTEGRAL_TYPE_P (TREE_TYPE (@5))
> +       && (TYPE_PRECISION (TREE_TYPE (@4)) >= TYPE_PRECISION (type)
> +        || !TYPE_UNSIGNED (TREE_TYPE (@4))))
> +   (cond (cmp @2 @3) @1 @0)))
> + (simplify
> +  (plus:c @0 (bit_and:c (minus @1 @0)
> +                     (convert? (negate@4 (convert? (cmp@5 @2 @3))))))
> +  (if (INTEGRAL_TYPE_P (type)
> +       && INTEGRAL_TYPE_P (TREE_TYPE (@4))
> +       && TREE_CODE (TREE_TYPE (@4)) != BOOLEAN_TYPE
> +       && INTEGRAL_TYPE_P (TREE_TYPE (@5))
> +       && (TYPE_PRECISION (TREE_TYPE (@4)) >= TYPE_PRECISION (type)
> +        || !TYPE_UNSIGNED (TREE_TYPE (@4))))
> +   (cond (cmp @2 @3) @1 @0))))
> +
>  /* Simplifications of shift and rotates.  */
>  
>  (for rotate (lrotate rrotate)
> --- gcc/testsuite/gcc.dg/tree-ssa/pr92834.c.jj        2019-12-06 
> 15:24:56.817353747 +0100
> +++ gcc/testsuite/gcc.dg/tree-ssa/pr92834.c   2019-12-06 15:24:08.921100518 
> +0100
> @@ -0,0 +1,122 @@
> +/* PR tree-optimization/92834 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-optimized" } */
> +/* { dg-final { scan-tree-dump-times "MIN_EXPR <" 8 "optimized" } } */
> +/* { dg-final { scan-tree-dump-times "MAX_EXPR <" 8 "optimized" } } */
> +
> +static inline unsigned
> +umax1 (unsigned a, unsigned b)
> +{
> +  return a - ((a - b) & -(a < b));
> +}
> +
> +static inline unsigned
> +umin1 (unsigned a, unsigned b)
> +{
> +  return a - ((a - b) & -(a > b));
> +}
> +
> +static inline int
> +smax1 (int a, int b)
> +{
> +  return a - ((a - b) & -(a < b));
> +}
> +
> +static inline int
> +smin1 (int a, int b)
> +{
> +  return a - ((a - b) & -(a > b));
> +}
> +
> +static inline unsigned long long
> +umax2 (unsigned long long a, unsigned long long b)
> +{
> +  return a - ((a - b) & -(a <= b));
> +}
> +
> +static inline unsigned long long
> +umin2 (unsigned long long a, unsigned long long b)
> +{
> +  return a - ((a - b) & -(a >= b));
> +}
> +
> +static inline long long
> +smax2 (long long a, long long b)
> +{
> +  return a - ((a - b) & -(a <= b));
> +}
> +
> +static inline long long
> +smin2 (long long a, long long b)
> +{
> +  return a - ((a - b) & -(a >= b));
> +}
> +
> +static inline unsigned
> +umax3 (unsigned a, unsigned b)
> +{
> +  return a + ((b - a) & -(a < b));
> +}
> +
> +static inline unsigned
> +umin3 (unsigned a, unsigned b)
> +{
> +  return a + ((b - a) & -(a > b));
> +}
> +
> +static inline int
> +smax3 (int a, int b)
> +{
> +  return a + ((b - a) & -(a < b));
> +}
> +
> +static inline int
> +smin3 (int a, int b)
> +{
> +  return a + ((b - a) & -(a > b));
> +}
> +
> +static inline unsigned long long
> +umax4 (unsigned long long a, unsigned long long b)
> +{
> +  return a + ((b - a) & -(a <= b));
> +}
> +
> +static inline unsigned long long
> +umin4 (unsigned long long a, unsigned long long b)
> +{
> +  return a + ((b - a) & -(a >= b));
> +}
> +
> +static inline long long
> +smax4 (long long a, long long b)
> +{
> +  return a + ((b - a) & -(a <= b));
> +}
> +
> +static inline long long
> +smin4 (long long a, long long b)
> +{
> +  return a + ((b - a) & -(a >= b));
> +}
> +
> +void
> +test (unsigned *x, int *y, unsigned long long *z, long long *w)
> +{
> +  x[2] = umax1 (x[0], x[1]);
> +  x[5] = umin1 (x[2], x[3]);
> +  y[2] = smax1 (y[0], y[1]);
> +  y[5] = smin1 (y[2], y[3]);
> +  z[2] = umax2 (z[0], z[1]);
> +  z[5] = umin2 (z[2], z[3]);
> +  w[2] = smax2 (w[0], w[1]);
> +  w[5] = smin2 (w[2], w[3]);
> +  x[8] = umax3 (x[6], x[7]);
> +  x[11] = umin3 (x[9], x[10]);
> +  y[8] = smax3 (y[6], y[7]);
> +  y[11] = smin3 (y[9], y[10]);
> +  z[8] = umax4 (z[6], z[7]);
> +  z[11] = umin4 (z[9], z[10]);
> +  w[8] = smax4 (w[6], w[7]);
> +  w[11] = smin4 (w[9], w[10]);
> +}
> 
>       Jakub
> 
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)

Re: [PATCH] Canonicalize fancy ways of expressing blend operation into COND_EXPR (PR tree-optimization/92834)

Reply via email to