> On 12 Nov 2024, at 4:27 PM, Richard Sandiford <richard.sandif...@arm.com> > wrote: > > External email: Use caution opening links or attachments > > > Soumya AR <soum...@nvidia.com> writes: >> diff --git a/gcc/config/aarch64/aarch64-sve.md >> b/gcc/config/aarch64/aarch64-sve.md >> index 06bd3e4bb2c..119a0e53853 100644 >> --- a/gcc/config/aarch64/aarch64-sve.md >> +++ b/gcc/config/aarch64/aarch64-sve.md >> @@ -5088,6 +5088,21 @@ >> ;; - FTSSEL >> ;; ------------------------------------------------------------------------- >> >> +(define_expand "ldexp<mode>3" >> + [(set (match_operand:GPF_HF 0 "register_operand") >> + (unspec:GPF_HF >> + [(match_dup 3) >> + (const_int SVE_RELAXED_GP) > > Sorry for only noticing now, but: this should be SVE_STRICT_GP instead of > SVE_RELAXED_GP, since we don't want to allow other lanes to be made > active later. > >> + (match_operand:GPF_HF 1 "register_operand") >> + (match_operand:<V_INT_EQUIV> 2 "register_operand")] >> + UNSPEC_COND_FSCALE))] >> + "TARGET_SVE" >> + { >> + operands[3] = aarch64_ptrue_reg (<VPRED>mode, >> + GET_MODE_UNIT_SIZE (<MODE>mode)); >> + } >> +) >> + >> ;; Unpredicated floating-point binary operations that take an integer as >> ;; their second operand. >> (define_insn "@aarch64_sve_<optab><mode>" >> @@ -5103,17 +5118,17 @@ >> ;; Predicated floating-point binary operations that take an integer >> ;; as their second operand. >> (define_insn "@aarch64_pred_<optab><mode>" >> - [(set (match_operand:SVE_FULL_F 0 "register_operand") >> - (unspec:SVE_FULL_F >> + [(set (match_operand:SVE_FULL_F_SCALAR 0 "register_operand") >> + (unspec:SVE_FULL_F_SCALAR >> [(match_operand:<VPRED> 1 "register_operand") >> (match_operand:SI 4 "aarch64_sve_gp_strictness") >> - (match_operand:SVE_FULL_F 2 "register_operand") >> + (match_operand:SVE_FULL_F_SCALAR 2 "register_operand") >> (match_operand:<V_INT_EQUIV> 3 "register_operand")] >> SVE_COND_FP_BINARY_INT))] >> "TARGET_SVE" >> {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ] >> - [ w , Upl , 0 , w ; * ] <sve_fp_op>\t%0.<Vetype>, >> %1/m, %0.<Vetype>, %3.<Vetype> >> - [ ?&w , Upl , w , w ; yes ] movprfx\t%0, >> %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> >> + [ w , Upl , 0 , w ; * ] <sve_fp_op>\t%Z0.<Vetype>, >> %1/m, %Z0.<Vetype>, %Z3.<Vetype> >> + [ ?&w , Upl , w , w ; yes ] movprfx\t%Z0, >> %Z2\;<sve_fp_op>\t%Z0.<Vetype>, %1/m, %Z0.<Vetype>, %Z3.<Vetype> >> } >> ) >> >> diff --git a/gcc/config/aarch64/iterators.md >> b/gcc/config/aarch64/iterators.md >> index 8269b0cdcd9..4153c72954e 100644 >> --- a/gcc/config/aarch64/iterators.md >> +++ b/gcc/config/aarch64/iterators.md >> @@ -452,6 +452,9 @@ >> ;; All fully-packed SVE floating-point vector modes. >> (define_mode_iterator SVE_FULL_F [VNx8HF VNx4SF VNx2DF]) >> >> +;; Fully-packed SVE floating-point vector modes and their scalar >> equivalents. >> +(define_mode_iterator SVE_FULL_F_SCALAR [SVE_FULL_F GPF_HF]) >> + >> ;; Fully-packed SVE integer vector modes that have 8-bit or 16-bit elements. >> (define_mode_iterator SVE_FULL_BHI [VNx16QI VNx8HI]) >> >> @@ -2302,7 +2305,8 @@ >> (VNx8DI "VNx2BI") (VNx8DF "VNx2BI") >> (V8QI "VNx8BI") (V16QI "VNx16BI") >> (V4HI "VNx4BI") (V8HI "VNx8BI") (V2SI "VNx2BI") >> - (V4SI "VNx4BI") (V2DI "VNx2BI") (V1DI "VNx2BI")]) >> + (V4SI "VNx4BI") (V2DI "VNx2BI") (V1DI "VNx2BI") >> + (HF "VNx8BI") (SF "VNx4BI") (DF "VNx2BI")]) >> >> ;; ...and again in lower case. >> (define_mode_attr vpred [(VNx16QI "vnx16bi") (VNx8QI "vnx8bi") >> diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def >> index c3d0efc0f2c..09b7844d094 100644 >> --- a/gcc/internal-fn.def >> +++ b/gcc/internal-fn.def >> @@ -441,7 +441,7 @@ DEF_INTERNAL_OPTAB_FN (VEC_FMADDSUB, ECF_CONST, >> vec_fmaddsub, ternary) >> DEF_INTERNAL_OPTAB_FN (VEC_FMSUBADD, ECF_CONST, vec_fmsubadd, ternary) >> >> /* FP scales. */ >> -DEF_INTERNAL_FLT_FN (LDEXP, ECF_CONST, ldexp, binary) >> +DEF_INTERNAL_FLT_FLOATN_FN (LDEXP, ECF_CONST, ldexp, binary) >> >> /* Ternary math functions. */ >> DEF_INTERNAL_FLT_FLOATN_FN (FMA, ECF_CONST, fma, ternary) >> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fscale.c >> b/gcc/testsuite/gcc.target/aarch64/sve/fscale.c >> new file mode 100644 >> index 00000000000..2c32d410f6b >> --- /dev/null >> +++ b/gcc/testsuite/gcc.target/aarch64/sve/fscale.c >> @@ -0,0 +1,46 @@ >> +/* { dg-do compile } */ >> +/* { dg-additional-options "-Ofast" } */ >> +/* { dg-final { check-function-bodies "**" "" } } */ >> + >> +/* >> +** test_ldexpf16: >> +** ... >> +** ptrue p[0-7]\.b, vl2 > > It would be more robust to capture the register using: > > ** ptrue (p[0-7])\.b, vl2 > >> +** ... >> +** fscale z[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h > > and then match it here using: > > ** fscale z[0-9]+\.h, \1/m, z[0-9]+\.h, z[0-9]+\.h > > Same for the other tests. > > OK with those changes if they work (no need for another review unless > you'd prefer one).
Made the changes and committed: 9b2915d95d855333d4d8f66b71a75f653ee0d076 Thanks a lot! Best, Soumya > > Thanks, > Richard > >> +** ret >> +*/ >> +_Float16 >> +test_ldexpf16 (_Float16 x, int i) >> +{ >> + return __builtin_ldexpf16 (x, i); >> +} >> + >> +/* >> +** test_ldexpf: >> +** ... >> +** ptrue p[0-7]\.b, vl4 >> +** ... >> +** fscale z[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s >> +** ret >> +*/ >> +float >> +test_ldexpf (float x, int i) >> +{ >> + return __builtin_ldexpf (x, i); >> +} >> + >> +/* >> +** test_ldexp: >> +** ... >> +** ptrue p[0-7]\.b, vl8 >> +** ... >> +** fscale z[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d >> +** ret >> +*/ >> +double >> +test_ldexp (double x, int i) >> +{ >> + return __builtin_ldexp (x, i); >> +} >> +