> On 24 Oct 2024, at 10:39, Soumya AR <soum...@nvidia.com> wrote:
> 
> Hi Richard,
> 
> > On 23 Oct 2024, at 5:58 PM, Richard Sandiford <richard.sandif...@arm.com> 
> > wrote:
> > 
> > External email: Use caution opening links or attachments
> > 
> > 
> > Soumya AR <soum...@nvidia.com> writes:
> >> diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc 
> >> b/gcc/config/aarch64/aarch64-sve-builtins.cc
> >> index 41673745cfe..aa556859d2e 100644
> >> --- a/gcc/config/aarch64/aarch64-sve-builtins.cc
> >> +++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
> >> @@ -1143,11 +1143,14 @@ aarch64_const_binop (enum tree_code code, tree 
> >> arg1, tree arg2)
> >>       tree type = TREE_TYPE (arg1);
> >>       signop sign = TYPE_SIGN (type);
> >>       wi::overflow_type overflow = wi::OVF_NONE;
> >> -
> >> +      unsigned int element_bytes = tree_to_uhwi (TYPE_SIZE_UNIT (type));
> >>       /* Return 0 for division by 0, like SDIV and UDIV do.  */
> >>       if (code == TRUNC_DIV_EXPR && integer_zerop (arg2))
> >>      return arg2;
> >> -
> >> +      /* Return 0 if shift amount is out of range. */
> >> +      if (code == LSHIFT_EXPR
> >> +               && tree_to_uhwi (arg2) >= (element_bytes * BITS_PER_UNIT))
> > 
> > tree_to_uhwi is dangerous because a general shift might be negative
> > (even if these particular shift amounts are unsigned).  We should
> > probably also key off TYPE_PRECISION rather than TYPE_SIZE_UNIT.  So:
> > 
> >        if (code == LSHIFT_EXPR
> >            && wi::geu_p (wi::to_wide (arg2), TYPE_PRECISION (type)))
> > 
> > without the element_bytes variable.  Also: the indentation looks a bit off;
> > it should be tabs only followed by spaces only.
> 
> Thanks for the feedback, posting an updated patch with the suggested changes.

Thanks Soumya, I’ve pushed this patch to trunk as commit 3e7549ece7c after 
adjusting
the ChangeLog slightly to start the lines with tabs instead of spaces.
Kyrill

> 
> Thanks,
> Soumya
> 
> > OK with those change, thanks.
> > 
> > Richard
> > 
> > 
> >> +     return build_int_cst (type, 0);
> >>       if (!poly_int_binop (poly_res, code, arg1, arg2, sign, &overflow))
> >>      return NULL_TREE;
> >>       return force_fit_type (type, poly_res, false,
> >> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/const_fold_lsl_1.c 
> >> b/gcc/testsuite/gcc.target/aarch64/sve/const_fold_lsl_1.c
> >> new file mode 100644
> >> index 00000000000..6109558001a
> >> --- /dev/null
> >> +++ b/gcc/testsuite/gcc.target/aarch64/sve/const_fold_lsl_1.c
> >> @@ -0,0 +1,142 @@
> >> +/* { dg-final { check-function-bodies "**" "" } } */
> >> +/* { dg-options "-O2" } */
> >> +
> >> +#include "arm_sve.h"
> >> +
> >> +/*
> >> +** s64_x:
> >> +**   mov     z[0-9]+\.d, #20
> >> +**   ret
> >> +*/
> >> +svint64_t s64_x (svbool_t pg) {
> >> +    return svlsl_n_s64_x (pg, svdup_s64 (5), 2);
> >> +}
> >> +
> >> +/*
> >> +** s64_x_vect:
> >> +**   mov     z[0-9]+\.d, #20
> >> +**   ret
> >> +*/
> >> +svint64_t s64_x_vect (svbool_t pg) {
> >> +    return svlsl_s64_x (pg, svdup_s64 (5), svdup_u64 (2));
> >> +}
> >> +
> >> +/*
> >> +** s64_z:
> >> +**   mov     z[0-9]+\.d, p[0-7]/z, #20
> >> +**   ret
> >> +*/
> >> +svint64_t s64_z (svbool_t pg) {
> >> +    return svlsl_n_s64_z (pg, svdup_s64 (5), 2);
> >> +}
> >> +
> >> +/*
> >> +** s64_z_vect:
> >> +**   mov     z[0-9]+\.d, p[0-7]/z, #20
> >> +**   ret
> >> +*/
> >> +svint64_t s64_z_vect (svbool_t pg) {
> >> +    return svlsl_s64_z (pg, svdup_s64 (5), svdup_u64 (2));
> >> +}
> >> +
> >> +/*
> >> +** s64_m_ptrue:
> >> +**   mov     z[0-9]+\.d, #20
> >> +**   ret
> >> +*/
> >> +svint64_t s64_m_ptrue () {
> >> +    return svlsl_n_s64_m (svptrue_b64 (), svdup_s64 (5), 2);
> >> +}
> >> +
> >> +/*
> >> +** s64_m_ptrue_vect:
> >> +**   mov     z[0-9]+\.d, #20
> >> +**   ret
> >> +*/
> >> +svint64_t s64_m_ptrue_vect () {
> >> +    return svlsl_s64_m (svptrue_b64 (), svdup_s64 (5), svdup_u64 (2));
> >> +}
> >> +
> >> +/*
> >> +** s64_m_pg:
> >> +**   mov     z[0-9]+\.d, #5
> >> +**   lsl     z[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #2
> >> +**   ret
> >> +*/
> >> +svint64_t s64_m_pg (svbool_t pg) {
> >> +    return svlsl_n_s64_m (pg, svdup_s64 (5), 2);
> >> +}
> >> +
> >> +/*
> >> +** s64_m_pg_vect:
> >> +**   mov     z[0-9]+\.d, #5
> >> +**   lsl     z[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #2
> >> +**   ret
> >> +*/
> >> +svint64_t s64_m_pg_vect (svbool_t pg) {
> >> +    return svlsl_s64_m (pg, svdup_s64 (5), svdup_u64 (2));
> >> +}
> >> +
> >> +/*
> >> +** s64_x_0:
> >> +**   mov     z[0-9]+\.d, #5
> >> +**   ret
> >> +*/
> >> +svint64_t s64_x_0 (svbool_t pg) {
> >> +    return svlsl_n_s64_x (pg, svdup_s64 (5), 0);
> >> +}
> >> +
> >> +/*
> >> +** s64_x_bit_width:
> >> +**   movi?   [vdz]([0-9]+)\.?(?:[0-9]*[bhsd])?, #?0
> >> +**   ret
> >> +*/
> >> +svint64_t s64_x_bit_width (svbool_t pg) {
> >> +    return svlsl_n_s64_x (pg, svdup_s64 (5), 64);
> >> +}
> >> +
> >> +/*
> >> +** s64_x_out_of_range:
> >> +**   movi?   [vdz]([0-9]+)\.?(?:[0-9]*[bhsd])?, #?0
> >> +**   ret
> >> +*/
> >> +svint64_t s64_x_out_of_range (svbool_t pg) {
> >> +    return svlsl_n_s64_x (pg, svdup_s64 (5), 68);
> >> +}
> >> +
> >> +/*
> >> +** u8_x_unsigned_overflow:
> >> +**   mov     z[0-9]+\.b, #-2
> >> +**   ret
> >> +*/
> >> +svuint8_t u8_x_unsigned_overflow (svbool_t pg) {
> >> +    return svlsl_n_u8_x (pg, svdup_u8 (255), 1);
> >> +}
> >> +
> >> +/*
> >> +** s8_x_signed_overflow:
> >> +**   mov     z[0-9]+\.b, #-2
> >> +**   ret
> >> +*/
> >> +svint8_t s8_x_signed_overflow (svbool_t pg) {
> >> +    return svlsl_n_s8_x (pg, svdup_s8 (255), 1);
> >> +}
> >> +
> >> +/*
> >> +** s8_x_neg_shift:
> >> +**   mov     z[0-9]+\.b, #-2
> >> +**   ret
> >> +*/
> >> +svint8_t s8_x_neg_shift (svbool_t pg) {
> >> +    return svlsl_n_s8_x (pg, svdup_s8 (-1), 1);
> >> +}
> >> +
> >> +/*
> >> +** s8_x_max_shift:
> >> +**   mov     z[0-9]+\.b, #-128
> >> +**   ret
> >> +*/
> >> +svint8_t s8_x_max_shift (svbool_t pg) {
> >> +    return svlsl_n_s8_x (pg, svdup_s8 (1), 7);
> >> +}
> >> +
> 
> <0001-SVE-intrinsics-Fold-constant-operands-for-svlsl.patch>

Reply via email to