Hi, Gentle ping this: https://gcc.gnu.org/pipermail/gcc-patches/2022-March/591507.html Thanks
On 10/3/2022 下午 2:31, HAO CHEN GUI wrote: > Hi, > This patch adds V1TI mode into mode iterator used in vector comparison > expands.With the patch, both built-ins and direct comparison could generate > P10 new V1TI comparison instructions. > > Bootstrapped and tested on ppc64 Linux BE and LE with no regressions. Is > this okay for trunk? Any recommendations? Thanks a lot. > > ChangeLog > 2022-03-09 Haochen Gui <guih...@linux.ibm.com> > > gcc/ > PR target/103316 > * config/rs6000/rs6000-builtin.cc (rs6000_gimple_fold_builtin): Enable > gimple folding for RS6000_BIF_VCMPEQUT, RS6000_BIF_VCMPNET, > RS6000_BIF_CMPGE_1TI, RS6000_BIF_CMPGE_U1TI, RS6000_BIF_VCMPGTUT, > RS6000_BIF_VCMPGTST, RS6000_BIF_CMPLE_1TI, RS6000_BIF_CMPLE_U1TI. > * config/rs6000/vector.md (VEC_IC): Define. Add support for new Power10 > V1TI instructions. > (vec_cmp<mode><mode>): Set mode iterator to VEC_IC. > (vec_cmpu<mode><mode>): Likewise. > > gcc/testsuite/ > PR target/103316 > * gcc.target/powerpc/pr103316.c: New. > > patch.diff > diff --git a/gcc/config/rs6000/rs6000-builtin.cc > b/gcc/config/rs6000/rs6000-builtin.cc > index 5d34c1bcfc9..143effa89bf 100644 > --- a/gcc/config/rs6000/rs6000-builtin.cc > +++ b/gcc/config/rs6000/rs6000-builtin.cc > @@ -1994,6 +1994,7 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) > case RS6000_BIF_VCMPEQUH: > case RS6000_BIF_VCMPEQUW: > case RS6000_BIF_VCMPEQUD: > + case RS6000_BIF_VCMPEQUT: > /* We deliberately omit RS6000_BIF_VCMPEQUT for now, because gimple > folding produces worse code for 128-bit compares. */ > fold_compare_helper (gsi, EQ_EXPR, stmt); > @@ -2002,6 +2003,7 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) > case RS6000_BIF_VCMPNEB: > case RS6000_BIF_VCMPNEH: > case RS6000_BIF_VCMPNEW: > + case RS6000_BIF_VCMPNET: > /* We deliberately omit RS6000_BIF_VCMPNET for now, because gimple > folding produces worse code for 128-bit compares. */ > fold_compare_helper (gsi, NE_EXPR, stmt); > @@ -2015,6 +2017,8 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) > case RS6000_BIF_CMPGE_U4SI: > case RS6000_BIF_CMPGE_2DI: > case RS6000_BIF_CMPGE_U2DI: > + case RS6000_BIF_CMPGE_1TI: > + case RS6000_BIF_CMPGE_U1TI: > /* We deliberately omit RS6000_BIF_CMPGE_1TI and RS6000_BIF_CMPGE_U1TI > for now, because gimple folding produces worse code for 128-bit > compares. */ > @@ -2029,6 +2033,8 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) > case RS6000_BIF_VCMPGTUW: > case RS6000_BIF_VCMPGTUD: > case RS6000_BIF_VCMPGTSD: > + case RS6000_BIF_VCMPGTUT: > + case RS6000_BIF_VCMPGTST: > /* We deliberately omit RS6000_BIF_VCMPGTUT and RS6000_BIF_VCMPGTST > for now, because gimple folding produces worse code for 128-bit > compares. */ > @@ -2043,6 +2049,8 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) > case RS6000_BIF_CMPLE_U4SI: > case RS6000_BIF_CMPLE_2DI: > case RS6000_BIF_CMPLE_U2DI: > + case RS6000_BIF_CMPLE_1TI: > + case RS6000_BIF_CMPLE_U1TI: > /* We deliberately omit RS6000_BIF_CMPLE_1TI and RS6000_BIF_CMPLE_U1TI > for now, because gimple folding produces worse code for 128-bit > compares. */ > diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md > index b87a742cca8..1afb8a6d786 100644 > --- a/gcc/config/rs6000/vector.md > +++ b/gcc/config/rs6000/vector.md > @@ -26,6 +26,9 @@ > ;; Vector int modes > (define_mode_iterator VEC_I [V16QI V8HI V4SI V2DI]) > > +;; Vector int modes for comparison > +(define_mode_iterator VEC_IC [V16QI V8HI V4SI V2DI V1TI]) > + > ;; 128-bit int modes > (define_mode_iterator VEC_TI [V1TI TI]) > > @@ -533,11 +536,12 @@ (define_expand "vcond_mask_<mode><VEC_int>" > > ;; For signed integer vectors comparison. > (define_expand "vec_cmp<mode><mode>" > - [(set (match_operand:VEC_I 0 "vint_operand") > + [(set (match_operand:VEC_IC 0 "vint_operand") > (match_operator 1 "signed_or_equality_comparison_operator" > - [(match_operand:VEC_I 2 "vint_operand") > - (match_operand:VEC_I 3 "vint_operand")]))] > - "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" > + [(match_operand:VEC_IC 2 "vint_operand") > + (match_operand:VEC_IC 3 "vint_operand")]))] > + "(VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode) && <MODE>mode!= V1TImode) > + || (<MODE>mode == V1TImode && TARGET_POWER10)" > { > enum rtx_code code = GET_CODE (operands[1]); > rtx tmp = gen_reg_rtx (<MODE>mode); > @@ -573,11 +577,12 @@ (define_expand "vec_cmp<mode><mode>" > > ;; For unsigned integer vectors comparison. > (define_expand "vec_cmpu<mode><mode>" > - [(set (match_operand:VEC_I 0 "vint_operand") > + [(set (match_operand:VEC_IC 0 "vint_operand") > (match_operator 1 "unsigned_or_equality_comparison_operator" > - [(match_operand:VEC_I 2 "vint_operand") > - (match_operand:VEC_I 3 "vint_operand")]))] > - "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" > + [(match_operand:VEC_IC 2 "vint_operand") > + (match_operand:VEC_IC 3 "vint_operand")]))] > + "(VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode) && <MODE>mode != V1TImode) > + || (<MODE>mode == V1TImode && TARGET_POWER10)" > { > enum rtx_code code = GET_CODE (operands[1]); > rtx tmp = gen_reg_rtx (<MODE>mode); > diff --git a/gcc/testsuite/gcc.target/powerpc/pr103316.c > b/gcc/testsuite/gcc.target/powerpc/pr103316.c > new file mode 100644 > index 00000000000..da2121a30de > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/pr103316.c > @@ -0,0 +1,79 @@ > +/* { dg-do compile } */ > +/* { dg-require-effective-target power10_ok } */ > +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ > + > +#include <altivec.h> > + > +vector bool __int128 > +test_eq (vector signed __int128 a, vector signed __int128 b) > +{ > + return a == b; > +} > + > +vector bool __int128 > +test_ne (vector signed __int128 a, vector signed __int128 b) > +{ > + return a != b; > +} > + > +vector bool __int128 > +test_gt (vector signed __int128 a, vector signed __int128 b) > +{ > + return a > b; > +} > + > +vector bool __int128 > +test_ge (vector signed __int128 a, vector signed __int128 b) > +{ > + return a >= b; > +} > + > +vector bool __int128 > +test_lt (vector signed __int128 a, vector signed __int128 b) > +{ > + return a < b; > +} > + > +vector bool __int128 > +test_le (vector signed __int128 a, vector signed __int128 b) > +{ > + return a <= b; > +} > + > +vector bool __int128 > +testu_eq (vector unsigned __int128 a, vector unsigned __int128 b) > +{ > + return a == b; > +} > + > +vector bool __int128 > +testu_ne (vector unsigned __int128 a, vector unsigned __int128 b) > +{ > + return a != b; > +} > + > +vector bool __int128 > +testu_gt (vector unsigned __int128 a, vector unsigned __int128 b) > +{ > + return a > b; > +} > + > +vector bool __int128 > +testu_ge (vector unsigned __int128 a, vector unsigned __int128 b) > +{ > + return a >= b; > +} > + > +vector bool __int128 > +testu_lt (vector unsigned __int128 a, vector unsigned __int128 b) > +{ > + return a < b; > +} > + > +vector bool __int128 > +testu_le (vector unsigned __int128 a, vector unsigned __int128 b) > +{ > + return a <= b; > +} > + > +/* { dg-final { scan-assembler-times "vcmp" 12 } } */