On Tue, Jan 9, 2024 at 11:48 AM liuhongt <hongtao....@intel.com> wrote:
>
> > I wonder if you can amend the existing patterns instead by iterating
> > over cond/vec_cond.  There are quite some (look for uses of
> > minmax_from_comparison) that could be adapted to vectors.
> >
> > The ones matching the simple form you match are
> >
> > #if GIMPLE
> > /* A >= B ? A : B -> max (A, B) and friends.  The code is still
> >    in fold_cond_expr_with_comparison for GENERIC folding with
> >    some extra constraints.  */
> > (for cmp (eq ne le lt unle unlt ge gt unge ungt uneq ltgt)
> >  (simplify
> >   (cond (cmp:c (nop_convert1?@c0 @0) (nop_convert2?@c1 @1))
> >         (convert3? @0) (convert4? @1))
> >   (if (!HONOR_SIGNED_ZEROS (type)
> > ...
> This pattern is a conditional operation that treats a vector as a complete
> unit, it's more like cbranchm which is different from vec_cond_expr.
> So I add my patterns after this.
> >
> > I think.  Consider at least placing the new patterns next to that.
>
>
> Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
> Ok for trunk?

OK.

Richard.

> Similar for A < B ? B : A to MAX_EXPR.
> There're codes in the frontend to optimize such pattern but failed to
> handle testcase in the PR since it's exposed at gimple level when
> folding backend builtins.
>
> pr95906 now can be optimized to MAX_EXPR as it's commented in the
> testcase.
>
> // FIXME: this should further optimize to a MAX_EXPR
>  typedef signed char v16i8 __attribute__((vector_size(16)));
>  v16i8 f(v16i8 a, v16i8 b)
>
> gcc/ChangeLog:
>
>         PR target/104401
>         * match.pd (VEC_COND_EXPR: A < B ? A : B -> MIN_EXPR): New patten 
> match.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/i386/pr104401.c: New test.
>         * gcc.dg/tree-ssa/pr95906.c: Adjust testcase.
> ---
>  gcc/match.pd                             | 21 ++++++++++++++++++
>  gcc/testsuite/gcc.dg/tree-ssa/pr95906.c  |  3 +--
>  gcc/testsuite/gcc.target/i386/pr104401.c | 27 ++++++++++++++++++++++++
>  3 files changed, 49 insertions(+), 2 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr104401.c
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 7b4b15acc41..d8e2009a83f 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -5672,6 +5672,27 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>        (if (VECTOR_TYPE_P (type))
>         (view_convert @c0)
>         (convert @c0))))))))
> +
> +/* This is for VEC_COND_EXPR
> +   Optimize A < B ? A : B to MIN (A, B)
> +           A > B ? A : B to MAX (A, B).  */
> +(for cmp (lt le ungt unge gt ge unlt unle)
> +     minmax (min min min min max max max max)
> +     MINMAX (MIN_EXPR MIN_EXPR MIN_EXPR MIN_EXPR MAX_EXPR MAX_EXPR MAX_EXPR 
> MAX_EXPR)
> + (simplify
> +  (vec_cond (cmp @0 @1) @0 @1)
> +   (if (VECTOR_INTEGER_TYPE_P (type)
> +       && target_supports_op_p (type, MINMAX, optab_vector))
> +    (minmax @0 @1))))
> +
> +(for cmp (lt le ungt unge gt ge unlt unle)
> +     minmax (max max max max min min min min)
> +     MINMAX (MAX_EXPR MAX_EXPR MAX_EXPR MAX_EXPR MIN_EXPR MIN_EXPR MIN_EXPR 
> MIN_EXPR)
> + (simplify
> +  (vec_cond (cmp @0 @1) @1 @0)
> +   (if (VECTOR_INTEGER_TYPE_P (type)
> +       && target_supports_op_p (type, MINMAX, optab_vector))
> +    (minmax @0 @1))))
>  #endif
>
>  (for cnd (cond vec_cond)
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr95906.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/pr95906.c
> index 3d820a58e93..d15670f3e9e 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/pr95906.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr95906.c
> @@ -1,7 +1,6 @@
>  /* { dg-do compile } */
>  /* { dg-options "-O2 -fdump-tree-forwprop3-raw -w -Wno-psabi" } */
>
> -// FIXME: this should further optimize to a MAX_EXPR
>  typedef signed char v16i8 __attribute__((vector_size(16)));
>  v16i8 f(v16i8 a, v16i8 b)
>  {
> @@ -10,4 +9,4 @@ v16i8 f(v16i8 a, v16i8 b)
>  }
>
>  /* { dg-final { scan-tree-dump-not "bit_(and|ior)_expr" "forwprop3" } } */
> -/* { dg-final { scan-tree-dump-times "vec_cond_expr" 1 "forwprop3" } } */
> +/* { dg-final { scan-tree-dump-times "max_expr" 1 "forwprop3" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr104401.c 
> b/gcc/testsuite/gcc.target/i386/pr104401.c
> new file mode 100644
> index 00000000000..8ce7ff88d9e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr104401.c
> @@ -0,0 +1,27 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -msse4.1" } */
> +/* { dg-final { scan-assembler-times "pminsd" 2 } } */
> +/* { dg-final { scan-assembler-times "pmaxsd" 2 } } */
> +
> +#include <smmintrin.h>
> +
> +__m128i min32(__m128i value, __m128i input)
> +{
> +  return _mm_blendv_epi8(input, value, _mm_cmplt_epi32(value, input));
> +}
> +
> +__m128i max32(__m128i value, __m128i input)
> +{
> +  return _mm_blendv_epi8(input, value, _mm_cmpgt_epi32(value, input));
> +}
> +
> +__m128i min32_1(__m128i value, __m128i input)
> +{
> +  return _mm_blendv_epi8(input, value, _mm_cmpgt_epi32(input, value));
> +}
> +
> +__m128i max32_1(__m128i value, __m128i input)
> +{
> +  return _mm_blendv_epi8(input, value, _mm_cmplt_epi32(input, value));
> +}
> +
> --
> 2.31.1
>

Reply via email to