Re: SVE intrinsics: Fold constant operands for svlsl.

Soumya AR Thu, 24 Oct 2024 05:28:45 -0700

Hi Richard,

> On 23 Oct 2024, at 5:58 PM, Richard Sandiford <richard.sandif...@arm.com> 
> wrote:
>
> External email: Use caution opening links or attachments
>
>
> Soumya AR <soum...@nvidia.com> writes:
>> diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc 
>> b/gcc/config/aarch64/aarch64-sve-builtins.cc
>> index 41673745cfe..aa556859d2e 100644
>> --- a/gcc/config/aarch64/aarch64-sve-builtins.cc
>> +++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
>> @@ -1143,11 +1143,14 @@ aarch64_const_binop (enum tree_code code, tree arg1, 
>> tree arg2)
>>       tree type = TREE_TYPE (arg1);
>>       signop sign = TYPE_SIGN (type);
>>       wi::overflow_type overflow = wi::OVF_NONE;
>> -
>> +      unsigned int element_bytes = tree_to_uhwi (TYPE_SIZE_UNIT (type));
>>       /* Return 0 for division by 0, like SDIV and UDIV do.  */
>>       if (code == TRUNC_DIV_EXPR && integer_zerop (arg2))
>>      return arg2;
>> -
>> +      /* Return 0 if shift amount is out of range. */
>> +      if (code == LSHIFT_EXPR
>> +               && tree_to_uhwi (arg2) >= (element_bytes * BITS_PER_UNIT))
>
> tree_to_uhwi is dangerous because a general shift might be negative
> (even if these particular shift amounts are unsigned).  We should
> probably also key off TYPE_PRECISION rather than TYPE_SIZE_UNIT.  So:
>
>        if (code == LSHIFT_EXPR
>            && wi::geu_p (wi::to_wide (arg2), TYPE_PRECISION (type)))
>
> without the element_bytes variable.  Also: the indentation looks a bit off;
> it should be tabs only followed by spaces only.


Thanks for the feedback, posting an updated patch with the suggested changes.

Thanks,
Soumya

> OK with those change, thanks.
>
> Richard
>
>
>> +     return build_int_cst (type, 0);
>>       if (!poly_int_binop (poly_res, code, arg1, arg2, sign, &overflow))
>>      return NULL_TREE;
>>       return force_fit_type (type, poly_res, false,
>> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/const_fold_lsl_1.c 
>> b/gcc/testsuite/gcc.target/aarch64/sve/const_fold_lsl_1.c
>> new file mode 100644
>> index 00000000000..6109558001a
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/aarch64/sve/const_fold_lsl_1.c
>> @@ -0,0 +1,142 @@
>> +/* { dg-final { check-function-bodies "**" "" } } */
>> +/* { dg-options "-O2" } */
>> +
>> +#include "arm_sve.h"
>> +
>> +/*
>> +** s64_x:
>> +**   mov     z[0-9]+\.d, #20
>> +**   ret
>> +*/
>> +svint64_t s64_x (svbool_t pg) {
>> +    return svlsl_n_s64_x (pg, svdup_s64 (5), 2);
>> +}
>> +
>> +/*
>> +** s64_x_vect:
>> +**   mov     z[0-9]+\.d, #20
>> +**   ret
>> +*/
>> +svint64_t s64_x_vect (svbool_t pg) {
>> +    return svlsl_s64_x (pg, svdup_s64 (5), svdup_u64 (2));
>> +}
>> +
>> +/*
>> +** s64_z:
>> +**   mov     z[0-9]+\.d, p[0-7]/z, #20
>> +**   ret
>> +*/
>> +svint64_t s64_z (svbool_t pg) {
>> +    return svlsl_n_s64_z (pg, svdup_s64 (5), 2);
>> +}
>> +
>> +/*
>> +** s64_z_vect:
>> +**   mov     z[0-9]+\.d, p[0-7]/z, #20
>> +**   ret
>> +*/
>> +svint64_t s64_z_vect (svbool_t pg) {
>> +    return svlsl_s64_z (pg, svdup_s64 (5), svdup_u64 (2));
>> +}
>> +
>> +/*
>> +** s64_m_ptrue:
>> +**   mov     z[0-9]+\.d, #20
>> +**   ret
>> +*/
>> +svint64_t s64_m_ptrue () {
>> +    return svlsl_n_s64_m (svptrue_b64 (), svdup_s64 (5), 2);
>> +}
>> +
>> +/*
>> +** s64_m_ptrue_vect:
>> +**   mov     z[0-9]+\.d, #20
>> +**   ret
>> +*/
>> +svint64_t s64_m_ptrue_vect () {
>> +    return svlsl_s64_m (svptrue_b64 (), svdup_s64 (5), svdup_u64 (2));
>> +}
>> +
>> +/*
>> +** s64_m_pg:
>> +**   mov     z[0-9]+\.d, #5
>> +**   lsl     z[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #2
>> +**   ret
>> +*/
>> +svint64_t s64_m_pg (svbool_t pg) {
>> +    return svlsl_n_s64_m (pg, svdup_s64 (5), 2);
>> +}
>> +
>> +/*
>> +** s64_m_pg_vect:
>> +**   mov     z[0-9]+\.d, #5
>> +**   lsl     z[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #2
>> +**   ret
>> +*/
>> +svint64_t s64_m_pg_vect (svbool_t pg) {
>> +    return svlsl_s64_m (pg, svdup_s64 (5), svdup_u64 (2));
>> +}
>> +
>> +/*
>> +** s64_x_0:
>> +**   mov     z[0-9]+\.d, #5
>> +**   ret
>> +*/
>> +svint64_t s64_x_0 (svbool_t pg) {
>> +    return svlsl_n_s64_x (pg, svdup_s64 (5), 0);
>> +}
>> +
>> +/*
>> +** s64_x_bit_width:
>> +**   movi?   [vdz]([0-9]+)\.?(?:[0-9]*[bhsd])?, #?0
>> +**   ret
>> +*/
>> +svint64_t s64_x_bit_width (svbool_t pg) {
>> +    return svlsl_n_s64_x (pg, svdup_s64 (5), 64);
>> +}
>> +
>> +/*
>> +** s64_x_out_of_range:
>> +**   movi?   [vdz]([0-9]+)\.?(?:[0-9]*[bhsd])?, #?0
>> +**   ret
>> +*/
>> +svint64_t s64_x_out_of_range (svbool_t pg) {
>> +    return svlsl_n_s64_x (pg, svdup_s64 (5), 68);
>> +}
>> +
>> +/*
>> +** u8_x_unsigned_overflow:
>> +**   mov     z[0-9]+\.b, #-2
>> +**   ret
>> +*/
>> +svuint8_t u8_x_unsigned_overflow (svbool_t pg) {
>> +    return svlsl_n_u8_x (pg, svdup_u8 (255), 1);
>> +}
>> +
>> +/*
>> +** s8_x_signed_overflow:
>> +**   mov     z[0-9]+\.b, #-2
>> +**   ret
>> +*/
>> +svint8_t s8_x_signed_overflow (svbool_t pg) {
>> +    return svlsl_n_s8_x (pg, svdup_s8 (255), 1);
>> +}
>> +
>> +/*
>> +** s8_x_neg_shift:
>> +**   mov     z[0-9]+\.b, #-2
>> +**   ret
>> +*/
>> +svint8_t s8_x_neg_shift (svbool_t pg) {
>> +    return svlsl_n_s8_x (pg, svdup_s8 (-1), 1);
>> +}
>> +
>> +/*
>> +** s8_x_max_shift:
>> +**   mov     z[0-9]+\.b, #-128
>> +**   ret
>> +*/
>> +svint8_t s8_x_max_shift (svbool_t pg) {
>> +    return svlsl_n_s8_x (pg, svdup_s8 (1), 7);
>> +}
>> +

0001-SVE-intrinsics-Fold-constant-operands-for-svlsl.patch
Description: 0001-SVE-intrinsics-Fold-constant-operands-for-svlsl.patch

Re: SVE intrinsics: Fold constant operands for svlsl.

Reply via email to