> On 30 Aug 2024, at 14:21, Richard Sandiford <richard.sandif...@arm.com> wrote:
> 
> External email: Use caution opening links or attachments
> 
> 
> Jennifer Schmitz <jschm...@nvidia.com> writes:
>> This patch implements constant folding for svmul. If the predicate is
>> ptrue or predication is _x, it uses vector_const_binop with
>> aarch64_const_binop as callback and tree_code MULT_EXPR to fold constant
>> integer operands.
>> Tests were added to check the produced assembly for different
>> predicates, signed and unsigned integers, and the svmul_n_* case.
>> 
>> The patch was bootstrapped and regtested on aarch64-linux-gnu, no regression.
>> OK for mainline?
>> 
>> Signed-off-by: Jennifer Schmitz <jschm...@nvidia.com>
>> 
>> gcc/
>>      * config/aarch64/aarch64-sve-builtins-base.cc (svmul_impl::fold):
>>      Implement and try constant folding.
>> 
>> gcc/testsuite/
>>      * gcc.target/aarch64/sve/const_fold_mul_1.c: New test.
>> 
>> From 648d7bfe4f5dbab734e8823f82b289aa381aafb9 Mon Sep 17 00:00:00 2001
>> From: Jennifer Schmitz <jschm...@nvidia.com>
>> Date: Thu, 29 Aug 2024 05:12:53 -0700
>> Subject: [PATCH 3/3] SVE intrinsics: Fold constant operands for svmul.
>> 
>> This patch implements constant folding for svmul. If the predicate is
>> ptrue or predication is _x, it uses vector_const_binop with
>> aarch64_const_binop as callback and tree_code MULT_EXPR to fold constant
>> integer operands.
>> Tests were added to check the produced assembly for different
>> predicates, signed and unsigned integers, and the svmul_n_* case.
>> 
>> The patch was bootstrapped and regtested on aarch64-linux-gnu, no regression.
>> OK for mainline?
>> 
>> Signed-off-by: Jennifer Schmitz <jschm...@nvidia.com>
>> 
>> gcc/
>>      * config/aarch64/aarch64-sve-builtins-base.cc (svmul_impl::fold):
>>      Implement and try constant folding.
>> 
>> gcc/testsuite/
>>      * gcc.target/aarch64/sve/const_fold_mul_1.c: New test.
>> ---
>> .../aarch64/aarch64-sve-builtins-base.cc      |  29 +-
>> .../gcc.target/aarch64/sve/const_fold_mul_1.c | 292 ++++++++++++++++++
>> 2 files changed, 320 insertions(+), 1 deletion(-)
>> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/const_fold_mul_1.c
>> 
>> diff --git 
>> a/gcc/config/aarch64/aarch64-sve-builtins-base.ccb/gcc/config/aarch64/aarch64-sve-builtins-base.cc
>> index 617c7fc87e5..0136fa2fef6 100644
>> --- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
>> +++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
>> @@ -2008,6 +2008,33 @@ public:
>>   }
>> };
>> 
>> +class svmul_impl : public rtx_code_function
>> +{
>> +public:
>> +  CONSTEXPR svmul_impl ()
>> +    : rtx_code_function (MULT, MULT, UNSPEC_COND_FMUL) {}
>> +
>> +  gimple *
>> +  fold (gimple_folder &f) const override
>> +  {
>> +    tree pg = gimple_call_arg (f.call, 0);
>> +    int step = f.type_suffix (0).element_bytes;
>> +
>> +    /* Try to fold constant integer operands.  */
>> +    if (f.type_suffix (0).integer_p
>> +     && (f.pred == PRED_x || is_ptrue (pg, step)))
>> +      {
>> +     tree op1 = gimple_call_arg (f.call, 1);
>> +     tree op2 = gimple_call_arg (f.call, 2);
>> +     if (tree res = vector_const_binop (MULT_EXPR, op1, op2,
>> +                                        aarch64_const_binop))
>> +       return gimple_build_assign (f.lhs, res);
>> +      }
>> +
>> +    return NULL;
> 
> With the change suggested for 2/3, this would be just:
> 
>  return f.fold_const_binary (MULT_EXPR);
Done.
> 
>> +  }
>> +};
>> +
>> class svnand_impl : public function_base
>> {
>> public:
>> @@ -3192,7 +3219,7 @@ FUNCTION (svmls_lane, svmls_lane_impl,)
>> FUNCTION (svmmla, svmmla_impl,)
>> FUNCTION (svmov, svmov_impl,)
>> FUNCTION (svmsb, svmsb_impl,)
>> -FUNCTION (svmul, rtx_code_function, (MULT, MULT, UNSPEC_COND_FMUL))
>> +FUNCTION (svmul, svmul_impl,)
>> FUNCTION (svmul_lane, CODE_FOR_MODE0 (aarch64_mul_lane),)
>> FUNCTION (svmulh, unspec_based_function, (UNSPEC_SMUL_HIGHPART,
>>                                        UNSPEC_UMUL_HIGHPART, -1))
>> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/const_fold_mul_1.c 
>> b/gcc/testsuite/gcc.target/aarch64/sve/const_fold_mul_1.c
>> new file mode 100644
>> index 00000000000..2ab5cfc1b46
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/aarch64/sve/const_fold_mul_1.c
>> @@ -0,0 +1,292 @@
>> +/* { dg-final { check-function-bodies "**" "" } } */
>> +/* { dg-options "-O2" } */
>> +
>> +#include "arm_sve.h"
>> +
>> +/*
>> +** s64_x_pg:
>> +**   mov     z[0-9]+\.d, #15
>> +**   ret
>> +*/
>> +svint64_t s64_x_pg (svbool_t pg)
>> +{
>> +  return svmul_x (pg, svdup_s64 (5), svdup_s64 (3));
>> +}
>> +
>> +/*
>> +** s64_x_pg_0:
>> +**   mov     z[0-9]+\.b, #0
>> +**   ret
>> +*/
>> +svint64_t s64_x_pg_0 (svbool_t pg)
>> +{
>> +  return svmul_x (pg, svdup_s64 (0), svdup_s64 (3));
>> +}
>> +
>> +/*
>> +** s64_z_pg:
>> +**   mov     z[0-9]+\.d, p[0-7]/z, #15
>> +**   ret
>> +*/
>> +svint64_t s64_z_pg (svbool_t pg)
>> +{
>> +  return svmul_z (pg, svdup_s64 (5), svdup_s64 (3));
>> +}
>> +
>> +/*
>> +** s64_z_pg_0:
>> +**   mov     z[0-9]+\.d, p[0-7]/z, #0
>> +**   ret
>> +*/
>> +svint64_t s64_z_pg_0 (svbool_t pg)
>> +{
>> +  return svmul_z (pg, svdup_s64 (0), svdup_s64 (3));
>> +}
>> +
>> +/*
>> +** s64_m_pg:
>> +**   mov     (z[0-9]+\.d), #3
>> +**   mov     (z[0-9]+\.d), #5
>> +**   mul     \2, p[0-7]/m, \2, \1
>> +**   ret
>> +*/
>> +svint64_t s64_m_pg (svbool_t pg)
>> +{
>> +  return svmul_m (pg, svdup_s64 (5), svdup_s64 (3));
>> +}
>> +
>> +/*
>> +** s64_x_ptrue:
>> +**   mov     z[0-9]+\.d, #15
>> +**   ret
>> +*/
>> +svint64_t s64_x_ptrue ()
>> +{
>> +  return svmul_x (svptrue_b64 (), svdup_s64 (5), svdup_s64 (3));
>> +}
>> +
>> +/*
>> +** s64_z_ptrue:
>> +**   mov     z[0-9]+\.d, #15
>> +**   ret
>> +*/
>> +svint64_t s64_z_ptrue ()
>> +{
>> +  return svmul_z (svptrue_b64 (), svdup_s64 (5), svdup_s64 (3));
>> +}
>> +
>> +/*
>> +** s64_m_ptrue:
>> +**   mov     z[0-9]+\.d, #15
>> +**   ret
>> +*/
>> +svint64_t s64_m_ptrue ()
>> +{
>> +  return svmul_m (svptrue_b64 (), svdup_s64 (5), svdup_s64 (3));
>> +}
>> +
>> +/*
>> +** s64_x_pg_n:
>> +**   mov     z[0-9]+\.d, #15
>> +**   ret
>> +*/
>> +svint64_t s64_x_pg_n (svbool_t pg)
>> +{
>> +  return svmul_n_s64_x (pg, svdup_s64 (5), 3);
>> +}
>> +
>> +/*
>> +** s64_x_pg_n_s64_0:
>> +**   mov     z[0-9]+\.b, #0
>> +**   ret
>> +*/
>> +svint64_t s64_x_pg_n_s64_0 (svbool_t pg)
>> +{
>> +  return svmul_n_s64_x (pg, svdup_s64 (5), 0);
>> +}
>> +
>> +/*
>> +** s64_z_pg_n:
>> +**   mov     z[0-9]+\.d, p[0-7]/z, #15
>> +**   ret
>> +*/
>> +svint64_t s64_z_pg_n (svbool_t pg)
>> +{
>> +  return svmul_n_s64_z (pg, svdup_s64 (5), 3);
>> +}
>> +
>> +/*
>> +** s64_z_pg_n_s64_0:
>> +**   mov     z[0-9]+\.d, p[0-7]/z, #0
>> +**   ret
>> +*/
>> +svint64_t s64_z_pg_n_s64_0 (svbool_t pg)
>> +{
>> +  return svmul_n_s64_z (pg, svdup_s64 (5), 0);
>> +}
>> +
>> +/*
>> +** s64_m_pg_n:
>> +**   mov     (z[0-9]+\.d), #3
>> +**   mov     (z[0-9]+\.d), #5
>> +**   mul     \2, p[0-7]/m, \2, \1
>> +**   ret
>> +*/
>> +svint64_t s64_m_pg_n (svbool_t pg)
>> +{
>> +  return svmul_n_s64_m (pg, svdup_s64 (5), 3);
>> +}
>> +
>> +/*
>> +** s64_x_ptrue_n:
>> +**   mov     z[0-9]+\.d, #15
>> +**   ret
>> +*/
>> +svint64_t s64_x_ptrue_n ()
>> +{
>> +  return svmul_n_s64_x (svptrue_b64 (), svdup_s64 (5), 3);
>> +}
>> +
>> +/*
>> +** s64_z_ptrue_n:
>> +**   mov     z[0-9]+\.d, #15
>> +**   ret
>> +*/
>> +svint64_t s64_z_ptrue_n ()
>> +{
>> +  return svmul_n_s64_z (svptrue_b64 (), svdup_s64 (5), 3);
>> +}
>> +
>> +/*
>> +** s64_m_ptrue_n:
>> +**   mov     z[0-9]+\.d, #15
>> +**   ret
>> +*/
>> +svint64_t s64_m_ptrue_n ()
>> +{
>> +  return svmul_n_s64_m (svptrue_b64 (), svdup_s64 (5), 3);
>> +}
>> +
>> +/*
>> +** u64_x_pg:
>> +**   mov     z[0-9]+\.d, #15
>> +**   ret
>> +*/
>> +svuint64_t u64_x_pg (svbool_t pg)
>> +{
>> +  return svmul_x (pg, svdup_u64 (5), svdup_u64 (3));
>> +}
>> +
>> +/*
>> +** u64_z_pg:
>> +**   mov     z[0-9]+\.d, p[0-7]/z, #15
>> +**   ret
>> +*/
>> +svuint64_t u64_z_pg (svbool_t pg)
>> +{
>> +  return svmul_z (pg, svdup_u64 (5), svdup_u64 (3));
>> +}
>> +
>> +/*
>> +** u64_m_pg:
>> +**   mov     (z[0-9]+\.d), #3
>> +**   mov     (z[0-9]+\.d), #5
>> +**   mul     \2, p[0-7]/m, \2, \1
>> +**   ret
>> +*/
>> +svuint64_t u64_m_pg (svbool_t pg)
>> +{
>> +  return svmul_m (pg, svdup_u64 (5), svdup_u64 (3));
>> +}
>> +
>> +/*
>> +** u64_x_ptrue:
>> +**   mov     z[0-9]+\.d, #15
>> +**   ret
>> +*/
>> +svuint64_t u64_x_ptrue ()
>> +{
>> +  return svmul_x (svptrue_b64 (), svdup_u64 (5), svdup_u64 (3));
>> +}
>> +
>> +/*
>> +** u64_z_ptrue:
>> +**   mov     z[0-9]+\.d, #15
>> +**   ret
>> +*/
>> +svuint64_t u64_z_ptrue ()
>> +{
>> +  return svmul_z (svptrue_b64 (), svdup_u64 (5), svdup_u64 (3));
>> +}
>> +
>> +/*
>> +** u64_m_ptrue:
>> +**   mov     z[0-9]+\.d, #15
>> +**   ret
>> +*/
>> +svuint64_t u64_m_ptrue ()
>> +{
>> +  return svmul_m (svptrue_b64 (), svdup_u64 (5), svdup_u64 (3));
>> +}
>> +
>> +/*
>> +** u64_x_pg_n:
>> +**   mov     z[0-9]+\.d, #15
>> +**   ret
>> +*/
>> +svuint64_t u64_x_pg_n (svbool_t pg)
>> +{
>> +  return svmul_n_u64_x (pg, svdup_u64 (5), 3);
>> +}
>> +
>> +/*
>> +** u64_z_pg_n:
>> +**   mov     z[0-9]+\.d, p[0-7]/z, #15
>> +**   ret
>> +*/
>> +svuint64_t u64_z_pg_n (svbool_t pg)
>> +{
>> +  return svmul_n_u64_z (pg, svdup_u64 (5), 3);
>> +}
>> +
>> +/*
>> +** u64_m_pg_n:
>> +**   mov     (z[0-9]+\.d), #3
>> +**   mov     (z[0-9]+\.d), #5
>> +**   mul     \2, p[0-7]/m, \2, \1
>> +**   ret
>> +*/
>> +svuint64_t u64_m_pg_n (svbool_t pg)
>> +{
>> +  return svmul_n_u64_m (pg, svdup_u64 (5), 3);
>> +}
>> +
>> +/*
>> +** u64_x_ptrue_n:
>> +**   mov     z[0-9]+\.d, #15
>> +**   ret
>> +*/
>> +svuint64_t u64_x_ptrue_n ()
>> +{
>> +  return svmul_n_u64_x (svptrue_b64 (), svdup_u64 (5), 3);
>> +}
>> +
>> +/*
>> +** u64_z_ptrue_n:
>> +**   mov     z[0-9]+\.d, #15
>> +**   ret
>> +*/
>> +svuint64_t u64_z_ptrue_n ()
>> +{
>> +  return svmul_n_u64_z (svptrue_b64 (), svdup_u64 (5), 3);
>> +}
>> +
>> +/*
>> +** u64_m_ptrue_n:
>> +**   mov     z[0-9]+\.d, #15
>> +**   ret
>> +*/
>> +svuint64_t u64_m_ptrue_n ()
>> +{
>> +  return svmul_n_u64_m (svptrue_b64 (), svdup_u64 (5), 3);
>> +}
> 
> Does:
> 
>  svmul_n_u32_x (pg, svindex_u32 (4, 1), 2);
> 
> get optimised to a single INDEX of #8, #2?  It would be good to test
> things like that if so.
> 
> Looks good to me otherwise.
> 
> Thanks,
> Richard
I tested this test case, but - unlike svdup - svindex is currently not folded 
in gimple and the test case still compiles to using index and multiplication 
instructions. Implementing the gimple_folder for svindex could be a follow-up 
patch. In the meantime, I added a svdupq test as for svdiv.
Best, Jennifer

Attachment: 0003-SVE-intrinsics-Fold-constant-operands-for-svmul.patch
Description: Binary data


Attachment: smime.p7s
Description: S/MIME cryptographic signature

Reply via email to