On Tue, Dec 19, 2023 at 6:39 AM liuhongt <hongtao....@intel.com> wrote: > > Similar for A < B ? B : A to MAX_EXPR. > There're codes in the frontend to optimize such pattern but failed to > handle testcase in the PR since it's exposed at gimple level when > folding backend builtins. > > pr95906 now can be optimized to MAX_EXPR as it's commented in the > testcase. > > // FIXME: this should further optimize to a MAX_EXPR > typedef signed char v16i8 __attribute__((vector_size(16))); > v16i8 f(v16i8 a, v16i8 b) > > > Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}. > Ok for trunk? (or maybe wait for GCC 15).
I wonder if you can amend the existing patterns instead by iterating over cond/vec_cond. There are quite some (look for uses of minmax_from_comparison) that could be adapted to vectors. The ones matching the simple form you match are #if GIMPLE /* A >= B ? A : B -> max (A, B) and friends. The code is still in fold_cond_expr_with_comparison for GENERIC folding with some extra constraints. */ (for cmp (eq ne le lt unle unlt ge gt unge ungt uneq ltgt) (simplify (cond (cmp:c (nop_convert1?@c0 @0) (nop_convert2?@c1 @1)) (convert3? @0) (convert4? @1)) (if (!HONOR_SIGNED_ZEROS (type) ... I think. Consider at least placing the new patterns next to that. > gcc/ChangeLog: > > PR target/104401 > * match.pd (A < B ? A : B -> MIN_EXPR): New patten match. > > gcc/testsuite/ChangeLog: > > * gcc.target/i386/pr104401.c: New test. > * gcc.dg/tree-ssa/pr95906.c: Adjust testcase. > --- > gcc/match.pd | 20 ++++++++++++++++++ > gcc/testsuite/gcc.dg/tree-ssa/pr95906.c | 3 +-- > gcc/testsuite/gcc.target/i386/pr104401.c | 27 ++++++++++++++++++++++++ > 3 files changed, 48 insertions(+), 2 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/i386/pr104401.c > > diff --git a/gcc/match.pd b/gcc/match.pd > index d57e29bfe1d..9584a70aa3d 100644 > --- a/gcc/match.pd > +++ b/gcc/match.pd > @@ -5263,6 +5263,26 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > (view_convert:type > (vec_cond @4 (view_convert:vtype @2) (view_convert:vtype @3))))))) > > +/* Optimize A < B ? A : B to MIN (A, B) > + A > B ? A : B to MAX (A, B). */ > +(for cmp (lt le gt ge) > + minmax (min min max max) > + MINMAX (MIN_EXPR MIN_EXPR MAX_EXPR MAX_EXPR) > + (simplify > + (vec_cond (cmp @0 @1) @0 @1) > + (if (VECTOR_INTEGER_TYPE_P (type) > + && target_supports_op_p (type, MINMAX, optab_vector)) > + (minmax @0 @1)))) > + > +(for cmp (lt le gt ge) > + minmax (max max min min) > + MINMAX (MAX_EXPR MAX_EXPR MIN_EXPR MIN_EXPR) > + (simplify > + (vec_cond (cmp @0 @1) @1 @0) > + (if (VECTOR_INTEGER_TYPE_P (type) > + && target_supports_op_p (type, MINMAX, optab_vector)) > + (minmax @0 @1)))) > + > /* c1 ? c2 ? a : b : b --> (c1 & c2) ? a : b */ > (simplify > (vec_cond @0 (vec_cond:s @1 @2 @3) @3) > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr95906.c > b/gcc/testsuite/gcc.dg/tree-ssa/pr95906.c > index 3d820a58e93..d15670f3e9e 100644 > --- a/gcc/testsuite/gcc.dg/tree-ssa/pr95906.c > +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr95906.c > @@ -1,7 +1,6 @@ > /* { dg-do compile } */ > /* { dg-options "-O2 -fdump-tree-forwprop3-raw -w -Wno-psabi" } */ > > -// FIXME: this should further optimize to a MAX_EXPR > typedef signed char v16i8 __attribute__((vector_size(16))); > v16i8 f(v16i8 a, v16i8 b) > { > @@ -10,4 +9,4 @@ v16i8 f(v16i8 a, v16i8 b) > } > > /* { dg-final { scan-tree-dump-not "bit_(and|ior)_expr" "forwprop3" } } */ > -/* { dg-final { scan-tree-dump-times "vec_cond_expr" 1 "forwprop3" } } */ > +/* { dg-final { scan-tree-dump-times "max_expr" 1 "forwprop3" } } */ > diff --git a/gcc/testsuite/gcc.target/i386/pr104401.c > b/gcc/testsuite/gcc.target/i386/pr104401.c > new file mode 100644 > index 00000000000..8ce7ff88d9e > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr104401.c > @@ -0,0 +1,27 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -msse4.1" } */ > +/* { dg-final { scan-assembler-times "pminsd" 2 } } */ > +/* { dg-final { scan-assembler-times "pmaxsd" 2 } } */ > + > +#include <smmintrin.h> > + > +__m128i min32(__m128i value, __m128i input) > +{ > + return _mm_blendv_epi8(input, value, _mm_cmplt_epi32(value, input)); > +} > + > +__m128i max32(__m128i value, __m128i input) > +{ > + return _mm_blendv_epi8(input, value, _mm_cmpgt_epi32(value, input)); > +} > + > +__m128i min32_1(__m128i value, __m128i input) > +{ > + return _mm_blendv_epi8(input, value, _mm_cmpgt_epi32(input, value)); > +} > + > +__m128i max32_1(__m128i value, __m128i input) > +{ > + return _mm_blendv_epi8(input, value, _mm_cmplt_epi32(input, value)); > +} > + > -- > 2.31.1 >