On Tue, Dec 19, 2023 at 6:39 AM liuhongt <hongtao....@intel.com> wrote:
>
> Similar for A < B ? B : A to MAX_EXPR.
> There're codes in the frontend to optimize such pattern but failed to
> handle testcase in the PR since it's exposed at gimple level when
> folding backend builtins.
>
> pr95906 now can be optimized to MAX_EXPR as it's commented in the
> testcase.
>
> // FIXME: this should further optimize to a MAX_EXPR
>  typedef signed char v16i8 __attribute__((vector_size(16)));
>  v16i8 f(v16i8 a, v16i8 b)
>
>
> Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
> Ok for trunk? (or maybe wait for GCC 15).

I wonder if you can amend the existing patterns instead by iterating
over cond/vec_cond.  There are quite some (look for uses of
minmax_from_comparison) that could be adapted to vectors.

The ones matching the simple form you match are

#if GIMPLE
/* A >= B ? A : B -> max (A, B) and friends.  The code is still
   in fold_cond_expr_with_comparison for GENERIC folding with
   some extra constraints.  */
(for cmp (eq ne le lt unle unlt ge gt unge ungt uneq ltgt)
 (simplify
  (cond (cmp:c (nop_convert1?@c0 @0) (nop_convert2?@c1 @1))
        (convert3? @0) (convert4? @1))
  (if (!HONOR_SIGNED_ZEROS (type)
...

I think.  Consider at least placing the new patterns next to that.

> gcc/ChangeLog:
>
>         PR target/104401
>         * match.pd (A < B ? A : B -> MIN_EXPR): New patten match.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/i386/pr104401.c: New test.
>         * gcc.dg/tree-ssa/pr95906.c: Adjust testcase.
> ---
>  gcc/match.pd                             | 20 ++++++++++++++++++
>  gcc/testsuite/gcc.dg/tree-ssa/pr95906.c  |  3 +--
>  gcc/testsuite/gcc.target/i386/pr104401.c | 27 ++++++++++++++++++++++++
>  3 files changed, 48 insertions(+), 2 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr104401.c
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index d57e29bfe1d..9584a70aa3d 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -5263,6 +5263,26 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>       (view_convert:type
>         (vec_cond @4 (view_convert:vtype @2) (view_convert:vtype @3)))))))
>
> +/* Optimize A < B ? A : B to MIN (A, B)
> +           A > B ? A : B to MAX (A, B).  */
> +(for cmp (lt le gt ge)
> +     minmax (min min max max)
> +     MINMAX (MIN_EXPR MIN_EXPR MAX_EXPR MAX_EXPR)
> + (simplify
> +  (vec_cond (cmp @0 @1) @0 @1)
> +   (if (VECTOR_INTEGER_TYPE_P (type)
> +       && target_supports_op_p (type, MINMAX, optab_vector))
> +    (minmax @0 @1))))
> +
> +(for cmp (lt le gt ge)
> +     minmax (max max min min)
> +     MINMAX (MAX_EXPR MAX_EXPR MIN_EXPR MIN_EXPR)
> + (simplify
> +  (vec_cond (cmp @0 @1) @1 @0)
> +   (if (VECTOR_INTEGER_TYPE_P (type)
> +       && target_supports_op_p (type, MINMAX, optab_vector))
> +    (minmax @0 @1))))
> +
>  /* c1 ? c2 ? a : b : b  -->  (c1 & c2) ? a : b  */
>  (simplify
>   (vec_cond @0 (vec_cond:s @1 @2 @3) @3)
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr95906.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/pr95906.c
> index 3d820a58e93..d15670f3e9e 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/pr95906.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr95906.c
> @@ -1,7 +1,6 @@
>  /* { dg-do compile } */
>  /* { dg-options "-O2 -fdump-tree-forwprop3-raw -w -Wno-psabi" } */
>
> -// FIXME: this should further optimize to a MAX_EXPR
>  typedef signed char v16i8 __attribute__((vector_size(16)));
>  v16i8 f(v16i8 a, v16i8 b)
>  {
> @@ -10,4 +9,4 @@ v16i8 f(v16i8 a, v16i8 b)
>  }
>
>  /* { dg-final { scan-tree-dump-not "bit_(and|ior)_expr" "forwprop3" } } */
> -/* { dg-final { scan-tree-dump-times "vec_cond_expr" 1 "forwprop3" } } */
> +/* { dg-final { scan-tree-dump-times "max_expr" 1 "forwprop3" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr104401.c 
> b/gcc/testsuite/gcc.target/i386/pr104401.c
> new file mode 100644
> index 00000000000..8ce7ff88d9e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr104401.c
> @@ -0,0 +1,27 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -msse4.1" } */
> +/* { dg-final { scan-assembler-times "pminsd" 2 } } */
> +/* { dg-final { scan-assembler-times "pmaxsd" 2 } } */
> +
> +#include <smmintrin.h>
> +
> +__m128i min32(__m128i value, __m128i input)
> +{
> +  return _mm_blendv_epi8(input, value, _mm_cmplt_epi32(value, input));
> +}
> +
> +__m128i max32(__m128i value, __m128i input)
> +{
> +  return _mm_blendv_epi8(input, value, _mm_cmpgt_epi32(value, input));
> +}
> +
> +__m128i min32_1(__m128i value, __m128i input)
> +{
> +  return _mm_blendv_epi8(input, value, _mm_cmpgt_epi32(input, value));
> +}
> +
> +__m128i max32_1(__m128i value, __m128i input)
> +{
> +  return _mm_blendv_epi8(input, value, _mm_cmplt_epi32(input, value));
> +}
> +
> --
> 2.31.1
>

Reply via email to