Hi Richard, > On 23 Oct 2024, at 5:58 PM, Richard Sandiford <richard.sandif...@arm.com> > wrote: > > External email: Use caution opening links or attachments > > > Soumya AR <soum...@nvidia.com> writes: >> diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc >> b/gcc/config/aarch64/aarch64-sve-builtins.cc >> index 41673745cfe..aa556859d2e 100644 >> --- a/gcc/config/aarch64/aarch64-sve-builtins.cc >> +++ b/gcc/config/aarch64/aarch64-sve-builtins.cc >> @@ -1143,11 +1143,14 @@ aarch64_const_binop (enum tree_code code, tree arg1, >> tree arg2) >> tree type = TREE_TYPE (arg1); >> signop sign = TYPE_SIGN (type); >> wi::overflow_type overflow = wi::OVF_NONE; >> - >> + unsigned int element_bytes = tree_to_uhwi (TYPE_SIZE_UNIT (type)); >> /* Return 0 for division by 0, like SDIV and UDIV do. */ >> if (code == TRUNC_DIV_EXPR && integer_zerop (arg2)) >> return arg2; >> - >> + /* Return 0 if shift amount is out of range. */ >> + if (code == LSHIFT_EXPR >> + && tree_to_uhwi (arg2) >= (element_bytes * BITS_PER_UNIT)) > > tree_to_uhwi is dangerous because a general shift might be negative > (even if these particular shift amounts are unsigned). We should > probably also key off TYPE_PRECISION rather than TYPE_SIZE_UNIT. So: > > if (code == LSHIFT_EXPR > && wi::geu_p (wi::to_wide (arg2), TYPE_PRECISION (type))) > > without the element_bytes variable. Also: the indentation looks a bit off; > it should be tabs only followed by spaces only.
Thanks for the feedback, posting an updated patch with the suggested changes. Thanks, Soumya > OK with those change, thanks. > > Richard > > >> + return build_int_cst (type, 0); >> if (!poly_int_binop (poly_res, code, arg1, arg2, sign, &overflow)) >> return NULL_TREE; >> return force_fit_type (type, poly_res, false, >> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/const_fold_lsl_1.c >> b/gcc/testsuite/gcc.target/aarch64/sve/const_fold_lsl_1.c >> new file mode 100644 >> index 00000000000..6109558001a >> --- /dev/null >> +++ b/gcc/testsuite/gcc.target/aarch64/sve/const_fold_lsl_1.c >> @@ -0,0 +1,142 @@ >> +/* { dg-final { check-function-bodies "**" "" } } */ >> +/* { dg-options "-O2" } */ >> + >> +#include "arm_sve.h" >> + >> +/* >> +** s64_x: >> +** mov z[0-9]+\.d, #20 >> +** ret >> +*/ >> +svint64_t s64_x (svbool_t pg) { >> + return svlsl_n_s64_x (pg, svdup_s64 (5), 2); >> +} >> + >> +/* >> +** s64_x_vect: >> +** mov z[0-9]+\.d, #20 >> +** ret >> +*/ >> +svint64_t s64_x_vect (svbool_t pg) { >> + return svlsl_s64_x (pg, svdup_s64 (5), svdup_u64 (2)); >> +} >> + >> +/* >> +** s64_z: >> +** mov z[0-9]+\.d, p[0-7]/z, #20 >> +** ret >> +*/ >> +svint64_t s64_z (svbool_t pg) { >> + return svlsl_n_s64_z (pg, svdup_s64 (5), 2); >> +} >> + >> +/* >> +** s64_z_vect: >> +** mov z[0-9]+\.d, p[0-7]/z, #20 >> +** ret >> +*/ >> +svint64_t s64_z_vect (svbool_t pg) { >> + return svlsl_s64_z (pg, svdup_s64 (5), svdup_u64 (2)); >> +} >> + >> +/* >> +** s64_m_ptrue: >> +** mov z[0-9]+\.d, #20 >> +** ret >> +*/ >> +svint64_t s64_m_ptrue () { >> + return svlsl_n_s64_m (svptrue_b64 (), svdup_s64 (5), 2); >> +} >> + >> +/* >> +** s64_m_ptrue_vect: >> +** mov z[0-9]+\.d, #20 >> +** ret >> +*/ >> +svint64_t s64_m_ptrue_vect () { >> + return svlsl_s64_m (svptrue_b64 (), svdup_s64 (5), svdup_u64 (2)); >> +} >> + >> +/* >> +** s64_m_pg: >> +** mov z[0-9]+\.d, #5 >> +** lsl z[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #2 >> +** ret >> +*/ >> +svint64_t s64_m_pg (svbool_t pg) { >> + return svlsl_n_s64_m (pg, svdup_s64 (5), 2); >> +} >> + >> +/* >> +** s64_m_pg_vect: >> +** mov z[0-9]+\.d, #5 >> +** lsl z[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #2 >> +** ret >> +*/ >> +svint64_t s64_m_pg_vect (svbool_t pg) { >> + return svlsl_s64_m (pg, svdup_s64 (5), svdup_u64 (2)); >> +} >> + >> +/* >> +** s64_x_0: >> +** mov z[0-9]+\.d, #5 >> +** ret >> +*/ >> +svint64_t s64_x_0 (svbool_t pg) { >> + return svlsl_n_s64_x (pg, svdup_s64 (5), 0); >> +} >> + >> +/* >> +** s64_x_bit_width: >> +** movi? [vdz]([0-9]+)\.?(?:[0-9]*[bhsd])?, #?0 >> +** ret >> +*/ >> +svint64_t s64_x_bit_width (svbool_t pg) { >> + return svlsl_n_s64_x (pg, svdup_s64 (5), 64); >> +} >> + >> +/* >> +** s64_x_out_of_range: >> +** movi? [vdz]([0-9]+)\.?(?:[0-9]*[bhsd])?, #?0 >> +** ret >> +*/ >> +svint64_t s64_x_out_of_range (svbool_t pg) { >> + return svlsl_n_s64_x (pg, svdup_s64 (5), 68); >> +} >> + >> +/* >> +** u8_x_unsigned_overflow: >> +** mov z[0-9]+\.b, #-2 >> +** ret >> +*/ >> +svuint8_t u8_x_unsigned_overflow (svbool_t pg) { >> + return svlsl_n_u8_x (pg, svdup_u8 (255), 1); >> +} >> + >> +/* >> +** s8_x_signed_overflow: >> +** mov z[0-9]+\.b, #-2 >> +** ret >> +*/ >> +svint8_t s8_x_signed_overflow (svbool_t pg) { >> + return svlsl_n_s8_x (pg, svdup_s8 (255), 1); >> +} >> + >> +/* >> +** s8_x_neg_shift: >> +** mov z[0-9]+\.b, #-2 >> +** ret >> +*/ >> +svint8_t s8_x_neg_shift (svbool_t pg) { >> + return svlsl_n_s8_x (pg, svdup_s8 (-1), 1); >> +} >> + >> +/* >> +** s8_x_max_shift: >> +** mov z[0-9]+\.b, #-128 >> +** ret >> +*/ >> +svint8_t s8_x_max_shift (svbool_t pg) { >> + return svlsl_n_s8_x (pg, svdup_s8 (1), 7); >> +} >> +
0001-SVE-intrinsics-Fold-constant-operands-for-svlsl.patch
Description: 0001-SVE-intrinsics-Fold-constant-operands-for-svlsl.patch