Hi Haochen, Some minor comments are inlined.
on 2022/3/10 2:31 PM, HAO CHEN GUI via Gcc-patches wrote: > Hi, > This patch adds V1TI mode into mode iterator used in vector comparison > expands.With the patch, both built-ins and direct comparison could generate > P10 new V1TI comparison instructions. > > Bootstrapped and tested on ppc64 Linux BE and LE with no regressions. Is > this okay for trunk? Any recommendations? Thanks a lot. > > ChangeLog > 2022-03-09 Haochen Gui <guih...@linux.ibm.com> > > gcc/ > PR target/103316 > * config/rs6000/rs6000-builtin.cc (rs6000_gimple_fold_builtin): Enable > gimple folding for RS6000_BIF_VCMPEQUT, RS6000_BIF_VCMPNET, > RS6000_BIF_CMPGE_1TI, RS6000_BIF_CMPGE_U1TI, RS6000_BIF_VCMPGTUT, > RS6000_BIF_VCMPGTST, RS6000_BIF_CMPLE_1TI, RS6000_BIF_CMPLE_U1TI. > * config/rs6000/vector.md (VEC_IC): Define. Add support for new Power10 > V1TI instructions. > (vec_cmp<mode><mode>): Set mode iterator to VEC_IC. > (vec_cmpu<mode><mode>): Likewise. > > gcc/testsuite/ > PR target/103316 > * gcc.target/powerpc/pr103316.c: New. > > patch.diff > diff --git a/gcc/config/rs6000/rs6000-builtin.cc > b/gcc/config/rs6000/rs6000-builtin.cc > index 5d34c1bcfc9..143effa89bf 100644 > --- a/gcc/config/rs6000/rs6000-builtin.cc > +++ b/gcc/config/rs6000/rs6000-builtin.cc > @@ -1994,6 +1994,7 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) > case RS6000_BIF_VCMPEQUH: > case RS6000_BIF_VCMPEQUW: > case RS6000_BIF_VCMPEQUD: > + case RS6000_BIF_VCMPEQUT: > /* We deliberately omit RS6000_BIF_VCMPEQUT for now, because gimple > folding produces worse code for 128-bit compares. */ The comment above is saying why there is no RS6000_BIF_VCMPEQUT before, IIUC the point doesn't hold any more with your patch. So could you remove it to avoid possible confusions? Also some similar places ... > fold_compare_helper (gsi, EQ_EXPR, stmt); > @@ -2002,6 +2003,7 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) > case RS6000_BIF_VCMPNEB: > case RS6000_BIF_VCMPNEH: > case RS6000_BIF_VCMPNEW: > + case RS6000_BIF_VCMPNET: > /* We deliberately omit RS6000_BIF_VCMPNET for now, because gimple > folding produces worse code for 128-bit compares. */ here ... > fold_compare_helper (gsi, NE_EXPR, stmt); > @@ -2015,6 +2017,8 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) > case RS6000_BIF_CMPGE_U4SI: > case RS6000_BIF_CMPGE_2DI: > case RS6000_BIF_CMPGE_U2DI: > + case RS6000_BIF_CMPGE_1TI: > + case RS6000_BIF_CMPGE_U1TI: > /* We deliberately omit RS6000_BIF_CMPGE_1TI and RS6000_BIF_CMPGE_U1TI > for now, because gimple folding produces worse code for 128-bit > compares. */ here... > @@ -2029,6 +2033,8 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) > case RS6000_BIF_VCMPGTUW: > case RS6000_BIF_VCMPGTUD: > case RS6000_BIF_VCMPGTSD: > + case RS6000_BIF_VCMPGTUT: > + case RS6000_BIF_VCMPGTST: > /* We deliberately omit RS6000_BIF_VCMPGTUT and RS6000_BIF_VCMPGTST > for now, because gimple folding produces worse code for 128-bit > compares. */ here... > @@ -2043,6 +2049,8 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) > case RS6000_BIF_CMPLE_U4SI: > case RS6000_BIF_CMPLE_2DI: > case RS6000_BIF_CMPLE_U2DI: > + case RS6000_BIF_CMPLE_1TI: > + case RS6000_BIF_CMPLE_U1TI: > /* We deliberately omit RS6000_BIF_CMPLE_1TI and RS6000_BIF_CMPLE_U1TI > for now, because gimple folding produces worse code for 128-bit > compares. */ here... > diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md > index b87a742cca8..1afb8a6d786 100644 > --- a/gcc/config/rs6000/vector.md > +++ b/gcc/config/rs6000/vector.md > @@ -26,6 +26,9 @@ > ;; Vector int modes > (define_mode_iterator VEC_I [V16QI V8HI V4SI V2DI]) > > +;; Vector int modes for comparison > +(define_mode_iterator VEC_IC [V16QI V8HI V4SI V2DI V1TI]) > + Maybe we can make this define be like: (define_mode_iterator VEC_IC [V16QI V8HI V4SI V2DI (V1TI "TARGET_POWER10")]) ... > ;; 128-bit int modes > (define_mode_iterator VEC_TI [V1TI TI]) > > @@ -533,11 +536,12 @@ (define_expand "vcond_mask_<mode><VEC_int>" > > ;; For signed integer vectors comparison. > (define_expand "vec_cmp<mode><mode>" > - [(set (match_operand:VEC_I 0 "vint_operand") > + [(set (match_operand:VEC_IC 0 "vint_operand") > (match_operator 1 "signed_or_equality_comparison_operator" > - [(match_operand:VEC_I 2 "vint_operand") > - (match_operand:VEC_I 3 "vint_operand")]))] > - "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" > + [(match_operand:VEC_IC 2 "vint_operand") > + (match_operand:VEC_IC 3 "vint_operand")]))] > + "(VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode) && <MODE>mode!= V1TImode) > + || (<MODE>mode == V1TImode && TARGET_POWER10)" and this condition can be kept as simple with VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)? > { > enum rtx_code code = GET_CODE (operands[1]); > rtx tmp = gen_reg_rtx (<MODE>mode); > @@ -573,11 +577,12 @@ (define_expand "vec_cmp<mode><mode>" > > ;; For unsigned integer vectors comparison. > (define_expand "vec_cmpu<mode><mode>" > - [(set (match_operand:VEC_I 0 "vint_operand") > + [(set (match_operand:VEC_IC 0 "vint_operand") > (match_operator 1 "unsigned_or_equality_comparison_operator" > - [(match_operand:VEC_I 2 "vint_operand") > - (match_operand:VEC_I 3 "vint_operand")]))] > - "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" > + [(match_operand:VEC_IC 2 "vint_operand") > + (match_operand:VEC_IC 3 "vint_operand")]))] > + "(VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode) && <MODE>mode != V1TImode) > + || (<MODE>mode == V1TImode && TARGET_POWER10)" same as above. > { > enum rtx_code code = GET_CODE (operands[1]); > rtx tmp = gen_reg_rtx (<MODE>mode); > diff --git a/gcc/testsuite/gcc.target/powerpc/pr103316.c > b/gcc/testsuite/gcc.target/powerpc/pr103316.c It seems better to have another test case with explicit builtin function calls like vec_cmp{gt, eq, ge ... } to cover the changes in rs6000_gimple_fold_builtin? > new file mode 100644 > index 00000000000..da2121a30de > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/pr103316.c > @@ -0,0 +1,79 @@ > +/* { dg-do compile } */ > +/* { dg-require-effective-target power10_ok } */ > +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ > + > +#include <altivec.h> Nit: I guess this include is useless? BR, Kewen