Soumya AR <soum...@nvidia.com> writes: > This patch implements constant folding for svlsl. Test cases have been added > to > check for the following cases: > > Zero, merge, and don't care predication. > Shift by 0. > Shift by register width. > Overflow shift on signed and unsigned integers. > Shift on a negative integer. > Maximum possible shift, eg. shift by 7 on an 8-bit integer. > > The patch was bootstrapped and regtested on aarch64-linux-gnu, no regression. > OK for mainline? > > Signed-off-by: Soumya AR <soum...@nvidia.com> > > gcc/ChangeLog: > > * config/aarch64/aarch64-sve-builtins-base.cc (svlsl_impl::fold): > Try constant folding. > > gcc/testsuite/ChangeLog: > > * gcc.target/aarch64/sve/const_fold_lsl_1.c: New test. > > From 0cf5223e51623dcdbc47a06cbd17d927c74094e2 Mon Sep 17 00:00:00 2001 > From: Soumya AR <soum...@nvidia.com> > Date: Tue, 24 Sep 2024 09:09:32 +0530 > Subject: [PATCH] SVE intrinsics: Fold constant operands for svlsl. > > This patch implements constant folding for svlsl. Test cases have been added > to > check for the following cases: > > Zero, merge, and don't care predication. > Shift by 0. > Shift by register width. > Overflow shift on signed and unsigned integers. > Shift on a negative integer. > Maximum possible shift, eg. shift by 7 on an 8-bit integer. > > The patch was bootstrapped and regtested on aarch64-linux-gnu, no regression. > OK for mainline? > > Signed-off-by: Soumya AR <soum...@nvidia.com> > > gcc/ChangeLog: > > * config/aarch64/aarch64-sve-builtins-base.cc (svlsl_impl::fold): > Try constant folding. > > gcc/testsuite/ChangeLog: > > * gcc.target/aarch64/sve/const_fold_lsl_1.c: New test. > --- > .../aarch64/aarch64-sve-builtins-base.cc | 15 +- > .../gcc.target/aarch64/sve/const_fold_lsl_1.c | 133 ++++++++++++++++++ > 2 files changed, 147 insertions(+), 1 deletion(-) > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/const_fold_lsl_1.c > > diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc > b/gcc/config/aarch64/aarch64-sve-builtins-base.cc > index afce52a7e8d..be5d6eae525 100644 > --- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc > +++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc > @@ -1893,6 +1893,19 @@ public: > } > }; > > +class svlsl_impl : public rtx_code_function > +{ > +public: > + CONSTEXPR svlsl_impl () > + : rtx_code_function (ASHIFT, ASHIFT) {} > + > + gimple * > + fold (gimple_folder &f) const override > + { > + return f.fold_const_binary (LSHIFT_EXPR); > + } > +}; > +
Sorry for the slow review. I think we should also make aarch64_const_binop return 0 for LSHIFT_EXPR when the shift is out of range, to match the behaviour of the underlying instruction. It looks good otherwise. Thanks, Richard > class svmad_impl : public function_base > { > public: > @@ -3199,7 +3212,7 @@ FUNCTION (svldnf1uh, svldxf1_extend_impl, > (TYPE_SUFFIX_u16, UNSPEC_LDNF1)) > FUNCTION (svldnf1uw, svldxf1_extend_impl, (TYPE_SUFFIX_u32, UNSPEC_LDNF1)) > FUNCTION (svldnt1, svldnt1_impl,) > FUNCTION (svlen, svlen_impl,) > -FUNCTION (svlsl, rtx_code_function, (ASHIFT, ASHIFT)) > +FUNCTION (svlsl, svlsl_impl,) > FUNCTION (svlsl_wide, shift_wide, (ASHIFT, UNSPEC_ASHIFT_WIDE)) > FUNCTION (svlsr, rtx_code_function, (LSHIFTRT, LSHIFTRT)) > FUNCTION (svlsr_wide, shift_wide, (LSHIFTRT, UNSPEC_LSHIFTRT_WIDE)) > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/const_fold_lsl_1.c > b/gcc/testsuite/gcc.target/aarch64/sve/const_fold_lsl_1.c > new file mode 100644 > index 00000000000..4299dbd850e > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve/const_fold_lsl_1.c > @@ -0,0 +1,133 @@ > +/* { dg-final { check-function-bodies "**" "" } } */ > +/* { dg-options "-O2" } */ > + > +#include "arm_sve.h" > + > +/* > +** s64_x: > +** mov z[0-9]+\.d, #20 > +** ret > +*/ > +svint64_t s64_x (svbool_t pg) { > + return svlsl_n_s64_x (pg, svdup_s64 (5), 2); > +} > + > +/* > +** s64_x_vect: > +** mov z[0-9]+\.d, #20 > +** ret > +*/ > +svint64_t s64_x_vect (svbool_t pg) { > + return svlsl_s64_x (pg, svdup_s64 (5), svdup_u64 (2)); > +} > + > +/* > +** s64_z: > +** mov z[0-9]+\.d, p[0-7]/z, #20 > +** ret > +*/ > +svint64_t s64_z (svbool_t pg) { > + return svlsl_n_s64_z (pg, svdup_s64 (5), 2); > +} > + > +/* > +** s64_z_vect: > +** mov z[0-9]+\.d, p[0-7]/z, #20 > +** ret > +*/ > +svint64_t s64_z_vect (svbool_t pg) { > + return svlsl_s64_z (pg, svdup_s64 (5), svdup_u64 (2)); > +} > + > +/* > +** s64_m_ptrue: > +** mov z[0-9]+\.d, #20 > +** ret > +*/ > +svint64_t s64_m_ptrue () { > + return svlsl_n_s64_m (svptrue_b64 (), svdup_s64 (5), 2); > +} > + > +/* > +** s64_m_ptrue_vect: > +** mov z[0-9]+\.d, #20 > +** ret > +*/ > +svint64_t s64_m_ptrue_vect () { > + return svlsl_s64_m (svptrue_b64 (), svdup_s64 (5), svdup_u64 (2)); > +} > + > +/* > +** s64_m_pg: > +** mov z[0-9]+\.d, #5 > +** lsl z[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #2 > +** ret > +*/ > +svint64_t s64_m_pg (svbool_t pg) { > + return svlsl_n_s64_m (pg, svdup_s64 (5), 2); > +} > + > +/* > +** s64_m_pg_vect: > +** mov z[0-9]+\.d, #5 > +** lsl z[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #2 > +** ret > +*/ > +svint64_t s64_m_pg_vect (svbool_t pg) { > + return svlsl_s64_m (pg, svdup_s64 (5), svdup_u64 (2)); > +} > + > +/* > +** s64_x_0: > +** mov z[0-9]+\.d, #5 > +** ret > +*/ > +svint64_t s64_x_0 (svbool_t pg) { > + return svlsl_n_s64_x (pg, svdup_s64 (5), 0); > +} > + > +/* > +** s64_x_bit_width: > +** mov z[0-9]+\.b, #0 > +** ret > +*/ > +svint64_t s64_x_bit_width (svbool_t pg) { > + return svlsl_n_s64_x (pg, svdup_s64 (5), 64); > +} > + > +/* > +** u8_x_unsigned_overflow: > +** mov z[0-9]+\.b, #-2 > +** ret > +*/ > +svuint8_t u8_x_unsigned_overflow (svbool_t pg) { > + return svlsl_n_u8_x (pg, svdup_u8 (255), 1); > +} > + > +/* > +** s8_x_signed_overflow: > +** mov z[0-9]+\.b, #-2 > +** ret > +*/ > +svint8_t s8_x_signed_overflow (svbool_t pg) { > + return svlsl_n_s8_x (pg, svdup_s8 (255), 1); > +} > + > +/* > +** s8_x_neg_shift: > +** mov z[0-9]+\.b, #-2 > +** ret > +*/ > +svint8_t s8_x_neg_shift (svbool_t pg) { > + return svlsl_n_s8_x (pg, svdup_s8 (-1), 1); > +} > + > +/* > +** s8_x_max_shift: > +** mov z[0-9]+\.b, #-128 > +** ret > +*/ > +svint8_t s8_x_max_shift (svbool_t pg) { > + return svlsl_n_s8_x (pg, svdup_s8 (1), 7); > +} > +