> On 30 Aug 2024, at 14:21, Richard Sandiford <richard.sandif...@arm.com> wrote: > > External email: Use caution opening links or attachments > > > Jennifer Schmitz <jschm...@nvidia.com> writes: >> This patch implements constant folding for svmul. If the predicate is >> ptrue or predication is _x, it uses vector_const_binop with >> aarch64_const_binop as callback and tree_code MULT_EXPR to fold constant >> integer operands. >> Tests were added to check the produced assembly for different >> predicates, signed and unsigned integers, and the svmul_n_* case. >> >> The patch was bootstrapped and regtested on aarch64-linux-gnu, no regression. >> OK for mainline? >> >> Signed-off-by: Jennifer Schmitz <jschm...@nvidia.com> >> >> gcc/ >> * config/aarch64/aarch64-sve-builtins-base.cc (svmul_impl::fold): >> Implement and try constant folding. >> >> gcc/testsuite/ >> * gcc.target/aarch64/sve/const_fold_mul_1.c: New test. >> >> From 648d7bfe4f5dbab734e8823f82b289aa381aafb9 Mon Sep 17 00:00:00 2001 >> From: Jennifer Schmitz <jschm...@nvidia.com> >> Date: Thu, 29 Aug 2024 05:12:53 -0700 >> Subject: [PATCH 3/3] SVE intrinsics: Fold constant operands for svmul. >> >> This patch implements constant folding for svmul. If the predicate is >> ptrue or predication is _x, it uses vector_const_binop with >> aarch64_const_binop as callback and tree_code MULT_EXPR to fold constant >> integer operands. >> Tests were added to check the produced assembly for different >> predicates, signed and unsigned integers, and the svmul_n_* case. >> >> The patch was bootstrapped and regtested on aarch64-linux-gnu, no regression. >> OK for mainline? >> >> Signed-off-by: Jennifer Schmitz <jschm...@nvidia.com> >> >> gcc/ >> * config/aarch64/aarch64-sve-builtins-base.cc (svmul_impl::fold): >> Implement and try constant folding. >> >> gcc/testsuite/ >> * gcc.target/aarch64/sve/const_fold_mul_1.c: New test. >> --- >> .../aarch64/aarch64-sve-builtins-base.cc | 29 +- >> .../gcc.target/aarch64/sve/const_fold_mul_1.c | 292 ++++++++++++++++++ >> 2 files changed, 320 insertions(+), 1 deletion(-) >> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/const_fold_mul_1.c >> >> diff --git >> a/gcc/config/aarch64/aarch64-sve-builtins-base.ccb/gcc/config/aarch64/aarch64-sve-builtins-base.cc >> index 617c7fc87e5..0136fa2fef6 100644 >> --- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc >> +++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc >> @@ -2008,6 +2008,33 @@ public: >> } >> }; >> >> +class svmul_impl : public rtx_code_function >> +{ >> +public: >> + CONSTEXPR svmul_impl () >> + : rtx_code_function (MULT, MULT, UNSPEC_COND_FMUL) {} >> + >> + gimple * >> + fold (gimple_folder &f) const override >> + { >> + tree pg = gimple_call_arg (f.call, 0); >> + int step = f.type_suffix (0).element_bytes; >> + >> + /* Try to fold constant integer operands. */ >> + if (f.type_suffix (0).integer_p >> + && (f.pred == PRED_x || is_ptrue (pg, step))) >> + { >> + tree op1 = gimple_call_arg (f.call, 1); >> + tree op2 = gimple_call_arg (f.call, 2); >> + if (tree res = vector_const_binop (MULT_EXPR, op1, op2, >> + aarch64_const_binop)) >> + return gimple_build_assign (f.lhs, res); >> + } >> + >> + return NULL; > > With the change suggested for 2/3, this would be just: > > return f.fold_const_binary (MULT_EXPR); Done. > >> + } >> +}; >> + >> class svnand_impl : public function_base >> { >> public: >> @@ -3192,7 +3219,7 @@ FUNCTION (svmls_lane, svmls_lane_impl,) >> FUNCTION (svmmla, svmmla_impl,) >> FUNCTION (svmov, svmov_impl,) >> FUNCTION (svmsb, svmsb_impl,) >> -FUNCTION (svmul, rtx_code_function, (MULT, MULT, UNSPEC_COND_FMUL)) >> +FUNCTION (svmul, svmul_impl,) >> FUNCTION (svmul_lane, CODE_FOR_MODE0 (aarch64_mul_lane),) >> FUNCTION (svmulh, unspec_based_function, (UNSPEC_SMUL_HIGHPART, >> UNSPEC_UMUL_HIGHPART, -1)) >> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/const_fold_mul_1.c >> b/gcc/testsuite/gcc.target/aarch64/sve/const_fold_mul_1.c >> new file mode 100644 >> index 00000000000..2ab5cfc1b46 >> --- /dev/null >> +++ b/gcc/testsuite/gcc.target/aarch64/sve/const_fold_mul_1.c >> @@ -0,0 +1,292 @@ >> +/* { dg-final { check-function-bodies "**" "" } } */ >> +/* { dg-options "-O2" } */ >> + >> +#include "arm_sve.h" >> + >> +/* >> +** s64_x_pg: >> +** mov z[0-9]+\.d, #15 >> +** ret >> +*/ >> +svint64_t s64_x_pg (svbool_t pg) >> +{ >> + return svmul_x (pg, svdup_s64 (5), svdup_s64 (3)); >> +} >> + >> +/* >> +** s64_x_pg_0: >> +** mov z[0-9]+\.b, #0 >> +** ret >> +*/ >> +svint64_t s64_x_pg_0 (svbool_t pg) >> +{ >> + return svmul_x (pg, svdup_s64 (0), svdup_s64 (3)); >> +} >> + >> +/* >> +** s64_z_pg: >> +** mov z[0-9]+\.d, p[0-7]/z, #15 >> +** ret >> +*/ >> +svint64_t s64_z_pg (svbool_t pg) >> +{ >> + return svmul_z (pg, svdup_s64 (5), svdup_s64 (3)); >> +} >> + >> +/* >> +** s64_z_pg_0: >> +** mov z[0-9]+\.d, p[0-7]/z, #0 >> +** ret >> +*/ >> +svint64_t s64_z_pg_0 (svbool_t pg) >> +{ >> + return svmul_z (pg, svdup_s64 (0), svdup_s64 (3)); >> +} >> + >> +/* >> +** s64_m_pg: >> +** mov (z[0-9]+\.d), #3 >> +** mov (z[0-9]+\.d), #5 >> +** mul \2, p[0-7]/m, \2, \1 >> +** ret >> +*/ >> +svint64_t s64_m_pg (svbool_t pg) >> +{ >> + return svmul_m (pg, svdup_s64 (5), svdup_s64 (3)); >> +} >> + >> +/* >> +** s64_x_ptrue: >> +** mov z[0-9]+\.d, #15 >> +** ret >> +*/ >> +svint64_t s64_x_ptrue () >> +{ >> + return svmul_x (svptrue_b64 (), svdup_s64 (5), svdup_s64 (3)); >> +} >> + >> +/* >> +** s64_z_ptrue: >> +** mov z[0-9]+\.d, #15 >> +** ret >> +*/ >> +svint64_t s64_z_ptrue () >> +{ >> + return svmul_z (svptrue_b64 (), svdup_s64 (5), svdup_s64 (3)); >> +} >> + >> +/* >> +** s64_m_ptrue: >> +** mov z[0-9]+\.d, #15 >> +** ret >> +*/ >> +svint64_t s64_m_ptrue () >> +{ >> + return svmul_m (svptrue_b64 (), svdup_s64 (5), svdup_s64 (3)); >> +} >> + >> +/* >> +** s64_x_pg_n: >> +** mov z[0-9]+\.d, #15 >> +** ret >> +*/ >> +svint64_t s64_x_pg_n (svbool_t pg) >> +{ >> + return svmul_n_s64_x (pg, svdup_s64 (5), 3); >> +} >> + >> +/* >> +** s64_x_pg_n_s64_0: >> +** mov z[0-9]+\.b, #0 >> +** ret >> +*/ >> +svint64_t s64_x_pg_n_s64_0 (svbool_t pg) >> +{ >> + return svmul_n_s64_x (pg, svdup_s64 (5), 0); >> +} >> + >> +/* >> +** s64_z_pg_n: >> +** mov z[0-9]+\.d, p[0-7]/z, #15 >> +** ret >> +*/ >> +svint64_t s64_z_pg_n (svbool_t pg) >> +{ >> + return svmul_n_s64_z (pg, svdup_s64 (5), 3); >> +} >> + >> +/* >> +** s64_z_pg_n_s64_0: >> +** mov z[0-9]+\.d, p[0-7]/z, #0 >> +** ret >> +*/ >> +svint64_t s64_z_pg_n_s64_0 (svbool_t pg) >> +{ >> + return svmul_n_s64_z (pg, svdup_s64 (5), 0); >> +} >> + >> +/* >> +** s64_m_pg_n: >> +** mov (z[0-9]+\.d), #3 >> +** mov (z[0-9]+\.d), #5 >> +** mul \2, p[0-7]/m, \2, \1 >> +** ret >> +*/ >> +svint64_t s64_m_pg_n (svbool_t pg) >> +{ >> + return svmul_n_s64_m (pg, svdup_s64 (5), 3); >> +} >> + >> +/* >> +** s64_x_ptrue_n: >> +** mov z[0-9]+\.d, #15 >> +** ret >> +*/ >> +svint64_t s64_x_ptrue_n () >> +{ >> + return svmul_n_s64_x (svptrue_b64 (), svdup_s64 (5), 3); >> +} >> + >> +/* >> +** s64_z_ptrue_n: >> +** mov z[0-9]+\.d, #15 >> +** ret >> +*/ >> +svint64_t s64_z_ptrue_n () >> +{ >> + return svmul_n_s64_z (svptrue_b64 (), svdup_s64 (5), 3); >> +} >> + >> +/* >> +** s64_m_ptrue_n: >> +** mov z[0-9]+\.d, #15 >> +** ret >> +*/ >> +svint64_t s64_m_ptrue_n () >> +{ >> + return svmul_n_s64_m (svptrue_b64 (), svdup_s64 (5), 3); >> +} >> + >> +/* >> +** u64_x_pg: >> +** mov z[0-9]+\.d, #15 >> +** ret >> +*/ >> +svuint64_t u64_x_pg (svbool_t pg) >> +{ >> + return svmul_x (pg, svdup_u64 (5), svdup_u64 (3)); >> +} >> + >> +/* >> +** u64_z_pg: >> +** mov z[0-9]+\.d, p[0-7]/z, #15 >> +** ret >> +*/ >> +svuint64_t u64_z_pg (svbool_t pg) >> +{ >> + return svmul_z (pg, svdup_u64 (5), svdup_u64 (3)); >> +} >> + >> +/* >> +** u64_m_pg: >> +** mov (z[0-9]+\.d), #3 >> +** mov (z[0-9]+\.d), #5 >> +** mul \2, p[0-7]/m, \2, \1 >> +** ret >> +*/ >> +svuint64_t u64_m_pg (svbool_t pg) >> +{ >> + return svmul_m (pg, svdup_u64 (5), svdup_u64 (3)); >> +} >> + >> +/* >> +** u64_x_ptrue: >> +** mov z[0-9]+\.d, #15 >> +** ret >> +*/ >> +svuint64_t u64_x_ptrue () >> +{ >> + return svmul_x (svptrue_b64 (), svdup_u64 (5), svdup_u64 (3)); >> +} >> + >> +/* >> +** u64_z_ptrue: >> +** mov z[0-9]+\.d, #15 >> +** ret >> +*/ >> +svuint64_t u64_z_ptrue () >> +{ >> + return svmul_z (svptrue_b64 (), svdup_u64 (5), svdup_u64 (3)); >> +} >> + >> +/* >> +** u64_m_ptrue: >> +** mov z[0-9]+\.d, #15 >> +** ret >> +*/ >> +svuint64_t u64_m_ptrue () >> +{ >> + return svmul_m (svptrue_b64 (), svdup_u64 (5), svdup_u64 (3)); >> +} >> + >> +/* >> +** u64_x_pg_n: >> +** mov z[0-9]+\.d, #15 >> +** ret >> +*/ >> +svuint64_t u64_x_pg_n (svbool_t pg) >> +{ >> + return svmul_n_u64_x (pg, svdup_u64 (5), 3); >> +} >> + >> +/* >> +** u64_z_pg_n: >> +** mov z[0-9]+\.d, p[0-7]/z, #15 >> +** ret >> +*/ >> +svuint64_t u64_z_pg_n (svbool_t pg) >> +{ >> + return svmul_n_u64_z (pg, svdup_u64 (5), 3); >> +} >> + >> +/* >> +** u64_m_pg_n: >> +** mov (z[0-9]+\.d), #3 >> +** mov (z[0-9]+\.d), #5 >> +** mul \2, p[0-7]/m, \2, \1 >> +** ret >> +*/ >> +svuint64_t u64_m_pg_n (svbool_t pg) >> +{ >> + return svmul_n_u64_m (pg, svdup_u64 (5), 3); >> +} >> + >> +/* >> +** u64_x_ptrue_n: >> +** mov z[0-9]+\.d, #15 >> +** ret >> +*/ >> +svuint64_t u64_x_ptrue_n () >> +{ >> + return svmul_n_u64_x (svptrue_b64 (), svdup_u64 (5), 3); >> +} >> + >> +/* >> +** u64_z_ptrue_n: >> +** mov z[0-9]+\.d, #15 >> +** ret >> +*/ >> +svuint64_t u64_z_ptrue_n () >> +{ >> + return svmul_n_u64_z (svptrue_b64 (), svdup_u64 (5), 3); >> +} >> + >> +/* >> +** u64_m_ptrue_n: >> +** mov z[0-9]+\.d, #15 >> +** ret >> +*/ >> +svuint64_t u64_m_ptrue_n () >> +{ >> + return svmul_n_u64_m (svptrue_b64 (), svdup_u64 (5), 3); >> +} > > Does: > > svmul_n_u32_x (pg, svindex_u32 (4, 1), 2); > > get optimised to a single INDEX of #8, #2? It would be good to test > things like that if so. > > Looks good to me otherwise. > > Thanks, > Richard I tested this test case, but - unlike svdup - svindex is currently not folded in gimple and the test case still compiles to using index and multiplication instructions. Implementing the gimple_folder for svindex could be a follow-up patch. In the meantime, I added a svdupq test as for svdiv. Best, Jennifer
0003-SVE-intrinsics-Fold-constant-operands-for-svmul.patch
Description: Binary data
smime.p7s
Description: S/MIME cryptographic signature