Hi Richard, Thanks for the feedback. I’ve updated the patch with the suggested change. Ok for mainline?
Best, Soumya > On 14 Oct 2024, at 6:40 PM, Richard Sandiford <richard.sandif...@arm.com> > wrote: > > External email: Use caution opening links or attachments > > > Soumya AR <soum...@nvidia.com> writes: >> This patch implements constant folding for svlsl. Test cases have been added >> to >> check for the following cases: >> >> Zero, merge, and don't care predication. >> Shift by 0. >> Shift by register width. >> Overflow shift on signed and unsigned integers. >> Shift on a negative integer. >> Maximum possible shift, eg. shift by 7 on an 8-bit integer. >> >> The patch was bootstrapped and regtested on aarch64-linux-gnu, no regression. >> OK for mainline? >> >> Signed-off-by: Soumya AR <soum...@nvidia.com> >> >> gcc/ChangeLog: >> >> * config/aarch64/aarch64-sve-builtins-base.cc (svlsl_impl::fold): >> Try constant folding. >> >> gcc/testsuite/ChangeLog: >> >> * gcc.target/aarch64/sve/const_fold_lsl_1.c: New test. >> >> From 0cf5223e51623dcdbc47a06cbd17d927c74094e2 Mon Sep 17 00:00:00 2001 >> From: Soumya AR <soum...@nvidia.com> >> Date: Tue, 24 Sep 2024 09:09:32 +0530 >> Subject: [PATCH] SVE intrinsics: Fold constant operands for svlsl. >> >> This patch implements constant folding for svlsl. Test cases have been added >> to >> check for the following cases: >> >> Zero, merge, and don't care predication. >> Shift by 0. >> Shift by register width. >> Overflow shift on signed and unsigned integers. >> Shift on a negative integer. >> Maximum possible shift, eg. shift by 7 on an 8-bit integer. >> >> The patch was bootstrapped and regtested on aarch64-linux-gnu, no regression. >> OK for mainline? >> >> Signed-off-by: Soumya AR <soum...@nvidia.com> >> >> gcc/ChangeLog: >> >> * config/aarch64/aarch64-sve-builtins-base.cc (svlsl_impl::fold): >> Try constant folding. >> >> gcc/testsuite/ChangeLog: >> >> * gcc.target/aarch64/sve/const_fold_lsl_1.c: New test. >> --- >> .../aarch64/aarch64-sve-builtins-base.cc | 15 +- >> .../gcc.target/aarch64/sve/const_fold_lsl_1.c | 133 ++++++++++++++++++ >> 2 files changed, 147 insertions(+), 1 deletion(-) >> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/const_fold_lsl_1.c >> >> diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc >> b/gcc/config/aarch64/aarch64-sve-builtins-base.cc >> index afce52a7e8d..be5d6eae525 100644 >> --- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc >> +++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc >> @@ -1893,6 +1893,19 @@ public: >> } >> }; >> >> +class svlsl_impl : public rtx_code_function >> +{ >> +public: >> + CONSTEXPR svlsl_impl () >> + : rtx_code_function (ASHIFT, ASHIFT) {} >> + >> + gimple * >> + fold (gimple_folder &f) const override >> + { >> + return f.fold_const_binary (LSHIFT_EXPR); >> + } >> +}; >> + > > Sorry for the slow review. I think we should also make aarch64_const_binop > return 0 for LSHIFT_EXPR when the shift is out of range, to match the > behaviour of the underlying instruction. > > It looks good otherwise. > > Thanks, > Richard > >> class svmad_impl : public function_base >> { >> public: >> @@ -3199,7 +3212,7 @@ FUNCTION (svldnf1uh, svldxf1_extend_impl, >> (TYPE_SUFFIX_u16, UNSPEC_LDNF1)) >> FUNCTION (svldnf1uw, svldxf1_extend_impl, (TYPE_SUFFIX_u32, UNSPEC_LDNF1)) >> FUNCTION (svldnt1, svldnt1_impl,) >> FUNCTION (svlen, svlen_impl,) >> -FUNCTION (svlsl, rtx_code_function, (ASHIFT, ASHIFT)) >> +FUNCTION (svlsl, svlsl_impl,) >> FUNCTION (svlsl_wide, shift_wide, (ASHIFT, UNSPEC_ASHIFT_WIDE)) >> FUNCTION (svlsr, rtx_code_function, (LSHIFTRT, LSHIFTRT)) >> FUNCTION (svlsr_wide, shift_wide, (LSHIFTRT, UNSPEC_LSHIFTRT_WIDE)) >> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/const_fold_lsl_1.c >> b/gcc/testsuite/gcc.target/aarch64/sve/const_fold_lsl_1.c >> new file mode 100644 >> index 00000000000..4299dbd850e >> --- /dev/null >> +++ b/gcc/testsuite/gcc.target/aarch64/sve/const_fold_lsl_1.c >> @@ -0,0 +1,133 @@ >> +/* { dg-final { check-function-bodies "**" "" } } */ >> +/* { dg-options "-O2" } */ >> + >> +#include "arm_sve.h" >> + >> +/* >> +** s64_x: >> +** mov z[0-9]+\.d, #20 >> +** ret >> +*/ >> +svint64_t s64_x (svbool_t pg) { >> + return svlsl_n_s64_x (pg, svdup_s64 (5), 2); >> +} >> + >> +/* >> +** s64_x_vect: >> +** mov z[0-9]+\.d, #20 >> +** ret >> +*/ >> +svint64_t s64_x_vect (svbool_t pg) { >> + return svlsl_s64_x (pg, svdup_s64 (5), svdup_u64 (2)); >> +} >> + >> +/* >> +** s64_z: >> +** mov z[0-9]+\.d, p[0-7]/z, #20 >> +** ret >> +*/ >> +svint64_t s64_z (svbool_t pg) { >> + return svlsl_n_s64_z (pg, svdup_s64 (5), 2); >> +} >> + >> +/* >> +** s64_z_vect: >> +** mov z[0-9]+\.d, p[0-7]/z, #20 >> +** ret >> +*/ >> +svint64_t s64_z_vect (svbool_t pg) { >> + return svlsl_s64_z (pg, svdup_s64 (5), svdup_u64 (2)); >> +} >> + >> +/* >> +** s64_m_ptrue: >> +** mov z[0-9]+\.d, #20 >> +** ret >> +*/ >> +svint64_t s64_m_ptrue () { >> + return svlsl_n_s64_m (svptrue_b64 (), svdup_s64 (5), 2); >> +} >> + >> +/* >> +** s64_m_ptrue_vect: >> +** mov z[0-9]+\.d, #20 >> +** ret >> +*/ >> +svint64_t s64_m_ptrue_vect () { >> + return svlsl_s64_m (svptrue_b64 (), svdup_s64 (5), svdup_u64 (2)); >> +} >> + >> +/* >> +** s64_m_pg: >> +** mov z[0-9]+\.d, #5 >> +** lsl z[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #2 >> +** ret >> +*/ >> +svint64_t s64_m_pg (svbool_t pg) { >> + return svlsl_n_s64_m (pg, svdup_s64 (5), 2); >> +} >> + >> +/* >> +** s64_m_pg_vect: >> +** mov z[0-9]+\.d, #5 >> +** lsl z[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #2 >> +** ret >> +*/ >> +svint64_t s64_m_pg_vect (svbool_t pg) { >> + return svlsl_s64_m (pg, svdup_s64 (5), svdup_u64 (2)); >> +} >> + >> +/* >> +** s64_x_0: >> +** mov z[0-9]+\.d, #5 >> +** ret >> +*/ >> +svint64_t s64_x_0 (svbool_t pg) { >> + return svlsl_n_s64_x (pg, svdup_s64 (5), 0); >> +} >> + >> +/* >> +** s64_x_bit_width: >> +** mov z[0-9]+\.b, #0 >> +** ret >> +*/ >> +svint64_t s64_x_bit_width (svbool_t pg) { >> + return svlsl_n_s64_x (pg, svdup_s64 (5), 64); >> +} >> + >> +/* >> +** u8_x_unsigned_overflow: >> +** mov z[0-9]+\.b, #-2 >> +** ret >> +*/ >> +svuint8_t u8_x_unsigned_overflow (svbool_t pg) { >> + return svlsl_n_u8_x (pg, svdup_u8 (255), 1); >> +} >> + >> +/* >> +** s8_x_signed_overflow: >> +** mov z[0-9]+\.b, #-2 >> +** ret >> +*/ >> +svint8_t s8_x_signed_overflow (svbool_t pg) { >> + return svlsl_n_s8_x (pg, svdup_s8 (255), 1); >> +} >> + >> +/* >> +** s8_x_neg_shift: >> +** mov z[0-9]+\.b, #-2 >> +** ret >> +*/ >> +svint8_t s8_x_neg_shift (svbool_t pg) { >> + return svlsl_n_s8_x (pg, svdup_s8 (-1), 1); >> +} >> + >> +/* >> +** s8_x_max_shift: >> +** mov z[0-9]+\.b, #-128 >> +** ret >> +*/ >> +svint8_t s8_x_max_shift (svbool_t pg) { >> + return svlsl_n_s8_x (pg, svdup_s8 (1), 7); >> +} >> +
0001-SVE-intrinsics-Fold-constant-operands-for-svlsl.patch
Description: 0001-SVE-intrinsics-Fold-constant-operands-for-svlsl.patch