Hi Richard,

Thanks for the feedback. I’ve updated the patch with the suggested change.
Ok for mainline?

Best,
Soumya

> On 14 Oct 2024, at 6:40 PM, Richard Sandiford <richard.sandif...@arm.com> 
> wrote:
>
> External email: Use caution opening links or attachments
>
>
> Soumya AR <soum...@nvidia.com> writes:
>> This patch implements constant folding for svlsl. Test cases have been added 
>> to
>> check for the following cases:
>>
>> Zero, merge, and don't care predication.
>> Shift by 0.
>> Shift by register width.
>> Overflow shift on signed and unsigned integers.
>> Shift on a negative integer.
>> Maximum possible shift, eg. shift by 7 on an 8-bit integer.
>>
>> The patch was bootstrapped and regtested on aarch64-linux-gnu, no regression.
>> OK for mainline?
>>
>> Signed-off-by: Soumya AR <soum...@nvidia.com>
>>
>> gcc/ChangeLog:
>>
>>      * config/aarch64/aarch64-sve-builtins-base.cc (svlsl_impl::fold):
>>      Try constant folding.
>>
>> gcc/testsuite/ChangeLog:
>>
>>      * gcc.target/aarch64/sve/const_fold_lsl_1.c: New test.
>>
>> From 0cf5223e51623dcdbc47a06cbd17d927c74094e2 Mon Sep 17 00:00:00 2001
>> From: Soumya AR <soum...@nvidia.com>
>> Date: Tue, 24 Sep 2024 09:09:32 +0530
>> Subject: [PATCH] SVE intrinsics: Fold constant operands for svlsl.
>>
>> This patch implements constant folding for svlsl. Test cases have been added 
>> to
>> check for the following cases:
>>
>> Zero, merge, and don't care predication.
>> Shift by 0.
>> Shift by register width.
>> Overflow shift on signed and unsigned integers.
>> Shift on a negative integer.
>> Maximum possible shift, eg. shift by 7 on an 8-bit integer.
>>
>> The patch was bootstrapped and regtested on aarch64-linux-gnu, no regression.
>> OK for mainline?
>>
>> Signed-off-by: Soumya AR <soum...@nvidia.com>
>>
>> gcc/ChangeLog:
>>
>>      * config/aarch64/aarch64-sve-builtins-base.cc (svlsl_impl::fold):
>>      Try constant folding.
>>
>> gcc/testsuite/ChangeLog:
>>
>>      * gcc.target/aarch64/sve/const_fold_lsl_1.c: New test.
>> ---
>> .../aarch64/aarch64-sve-builtins-base.cc      |  15 +-
>> .../gcc.target/aarch64/sve/const_fold_lsl_1.c | 133 ++++++++++++++++++
>> 2 files changed, 147 insertions(+), 1 deletion(-)
>> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/const_fold_lsl_1.c
>>
>> diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc 
>> b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
>> index afce52a7e8d..be5d6eae525 100644
>> --- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
>> +++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
>> @@ -1893,6 +1893,19 @@ public:
>>   }
>> };
>>
>> +class svlsl_impl : public rtx_code_function
>> +{
>> +public:
>> +  CONSTEXPR svlsl_impl ()
>> +    : rtx_code_function (ASHIFT, ASHIFT) {}
>> +
>> +  gimple *
>> +  fold (gimple_folder &f) const override
>> +  {
>> +    return f.fold_const_binary (LSHIFT_EXPR);
>> +  }
>> +};
>> +
>
> Sorry for the slow review.  I think we should also make aarch64_const_binop
> return 0 for LSHIFT_EXPR when the shift is out of range, to match the
> behaviour of the underlying instruction.
>
> It looks good otherwise.
>
> Thanks,
> Richard
>
>> class svmad_impl : public function_base
>> {
>> public:
>> @@ -3199,7 +3212,7 @@ FUNCTION (svldnf1uh, svldxf1_extend_impl, 
>> (TYPE_SUFFIX_u16, UNSPEC_LDNF1))
>> FUNCTION (svldnf1uw, svldxf1_extend_impl, (TYPE_SUFFIX_u32, UNSPEC_LDNF1))
>> FUNCTION (svldnt1, svldnt1_impl,)
>> FUNCTION (svlen, svlen_impl,)
>> -FUNCTION (svlsl, rtx_code_function, (ASHIFT, ASHIFT))
>> +FUNCTION (svlsl, svlsl_impl,)
>> FUNCTION (svlsl_wide, shift_wide, (ASHIFT, UNSPEC_ASHIFT_WIDE))
>> FUNCTION (svlsr, rtx_code_function, (LSHIFTRT, LSHIFTRT))
>> FUNCTION (svlsr_wide, shift_wide, (LSHIFTRT, UNSPEC_LSHIFTRT_WIDE))
>> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/const_fold_lsl_1.c 
>> b/gcc/testsuite/gcc.target/aarch64/sve/const_fold_lsl_1.c
>> new file mode 100644
>> index 00000000000..4299dbd850e
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/aarch64/sve/const_fold_lsl_1.c
>> @@ -0,0 +1,133 @@
>> +/* { dg-final { check-function-bodies "**" "" } } */
>> +/* { dg-options "-O2" } */
>> +
>> +#include "arm_sve.h"
>> +
>> +/*
>> +** s64_x:
>> +**   mov     z[0-9]+\.d, #20
>> +**   ret
>> +*/
>> +svint64_t s64_x (svbool_t pg) {
>> +    return svlsl_n_s64_x (pg, svdup_s64 (5), 2);
>> +}
>> +
>> +/*
>> +** s64_x_vect:
>> +**   mov     z[0-9]+\.d, #20
>> +**   ret
>> +*/
>> +svint64_t s64_x_vect (svbool_t pg) {
>> +    return svlsl_s64_x (pg, svdup_s64 (5), svdup_u64 (2));
>> +}
>> +
>> +/*
>> +** s64_z:
>> +**   mov     z[0-9]+\.d, p[0-7]/z, #20
>> +**   ret
>> +*/
>> +svint64_t s64_z (svbool_t pg) {
>> +    return svlsl_n_s64_z (pg, svdup_s64 (5), 2);
>> +}
>> +
>> +/*
>> +** s64_z_vect:
>> +**   mov     z[0-9]+\.d, p[0-7]/z, #20
>> +**   ret
>> +*/
>> +svint64_t s64_z_vect (svbool_t pg) {
>> +    return svlsl_s64_z (pg, svdup_s64 (5), svdup_u64 (2));
>> +}
>> +
>> +/*
>> +** s64_m_ptrue:
>> +**   mov     z[0-9]+\.d, #20
>> +**   ret
>> +*/
>> +svint64_t s64_m_ptrue () {
>> +    return svlsl_n_s64_m (svptrue_b64 (), svdup_s64 (5), 2);
>> +}
>> +
>> +/*
>> +** s64_m_ptrue_vect:
>> +**   mov     z[0-9]+\.d, #20
>> +**   ret
>> +*/
>> +svint64_t s64_m_ptrue_vect () {
>> +    return svlsl_s64_m (svptrue_b64 (), svdup_s64 (5), svdup_u64 (2));
>> +}
>> +
>> +/*
>> +** s64_m_pg:
>> +**   mov     z[0-9]+\.d, #5
>> +**   lsl     z[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #2
>> +**   ret
>> +*/
>> +svint64_t s64_m_pg (svbool_t pg) {
>> +    return svlsl_n_s64_m (pg, svdup_s64 (5), 2);
>> +}
>> +
>> +/*
>> +** s64_m_pg_vect:
>> +**   mov     z[0-9]+\.d, #5
>> +**   lsl     z[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #2
>> +**   ret
>> +*/
>> +svint64_t s64_m_pg_vect (svbool_t pg) {
>> +    return svlsl_s64_m (pg, svdup_s64 (5), svdup_u64 (2));
>> +}
>> +
>> +/*
>> +** s64_x_0:
>> +**   mov     z[0-9]+\.d, #5
>> +**   ret
>> +*/
>> +svint64_t s64_x_0 (svbool_t pg) {
>> +    return svlsl_n_s64_x (pg, svdup_s64 (5), 0);
>> +}
>> +
>> +/*
>> +** s64_x_bit_width:
>> +**   mov     z[0-9]+\.b, #0
>> +**   ret
>> +*/
>> +svint64_t s64_x_bit_width (svbool_t pg) {
>> +    return svlsl_n_s64_x (pg, svdup_s64 (5), 64);
>> +}
>> +
>> +/*
>> +** u8_x_unsigned_overflow:
>> +**   mov     z[0-9]+\.b, #-2
>> +**   ret
>> +*/
>> +svuint8_t u8_x_unsigned_overflow (svbool_t pg) {
>> +    return svlsl_n_u8_x (pg, svdup_u8 (255), 1);
>> +}
>> +
>> +/*
>> +** s8_x_signed_overflow:
>> +**   mov     z[0-9]+\.b, #-2
>> +**   ret
>> +*/
>> +svint8_t s8_x_signed_overflow (svbool_t pg) {
>> +    return svlsl_n_s8_x (pg, svdup_s8 (255), 1);
>> +}
>> +
>> +/*
>> +** s8_x_neg_shift:
>> +**   mov     z[0-9]+\.b, #-2
>> +**   ret
>> +*/
>> +svint8_t s8_x_neg_shift (svbool_t pg) {
>> +    return svlsl_n_s8_x (pg, svdup_s8 (-1), 1);
>> +}
>> +
>> +/*
>> +** s8_x_max_shift:
>> +**   mov     z[0-9]+\.b, #-128
>> +**   ret
>> +*/
>> +svint8_t s8_x_max_shift (svbool_t pg) {
>> +    return svlsl_n_s8_x (pg, svdup_s8 (1), 7);
>> +}
>> +


Attachment: 0001-SVE-intrinsics-Fold-constant-operands-for-svlsl.patch
Description: 0001-SVE-intrinsics-Fold-constant-operands-for-svlsl.patch

Reply via email to