<dhr...@nvidia.com> writes: > @@ -4899,7 +4876,9 @@ > if (CONST_INT_P (operands[2])) > { > amount = gen_const_vec_duplicate (<MODE>mode, operands[2]); > - if (!aarch64_sve_<lr>shift_operand (operands[2], <MODE>mode)) > + if (!aarch64_sve_<lr>shift_operand (operands[2], <MODE>mode) > + && !aarch64_simd_shift_imm_p (operands[2], <MODE>mode, > + <optab>_optab == ashl_optab)) > amount = force_reg (<MODE>mode, amount); > } > else
I think the problem here is that the old code is testing the wrong thing. It should instead be testing "amount", as in: amount = gen_const_vec_duplicate (<MODE>mode, operands[2]); if (!aarch64_sve_<lr>shift_operand (amount, <MODE>mode)) amount = force_reg (<MODE>mode, amount); > @@ -4923,6 +4902,13 @@ > UNSPEC_PRED_X))] > "TARGET_SVE" > { > + if (aarch64_simd_shift_imm_p (operands[2], <MODE>mode, > + <optab>_optab == ashl_optab)) <CODE> == ASHIFT would be more direct than <optab>_optab == ashl_optab. Thanks, Richard > + { > + emit_insn (gen_aarch64_v<optab><mode>3_const (operands[0], operands[1], > + operands[2])); > + DONE; > + } > operands[3] = aarch64_ptrue_reg (<VPRED>mode); > } > ) > @@ -4952,27 +4938,27 @@ > "" > ) > > -;; Unpredicated shift operations by a constant (post-RA only). > +;; Unpredicated shift operations by a constant. > ;; These are generated by splitting a predicated instruction whose > ;; predicate is unused. > -(define_insn "*post_ra_v_ashl<mode>3" > +(define_insn "aarch64_vashl<mode>3_const" > [(set (match_operand:SVE_I 0 "register_operand") > (ashift:SVE_I > (match_operand:SVE_I 1 "register_operand") > (match_operand:SVE_I 2 "aarch64_simd_lshift_imm")))] > - "TARGET_SVE && reload_completed" > + "TARGET_SVE" > {@ [ cons: =0 , 1 , 2 ] > [ w , w , vs1 ] add\t%0.<Vetype>, %1.<Vetype>, %1.<Vetype> > [ w , w , Dl ] lsl\t%0.<Vetype>, %1.<Vetype>, #%2 > } > ) > > -(define_insn "*post_ra_v_<optab><mode>3" > +(define_insn "aarch64_v<optab><mode>3_const" > [(set (match_operand:SVE_I 0 "register_operand" "=w") > (SHIFTRT:SVE_I > (match_operand:SVE_I 1 "register_operand" "w") > (match_operand:SVE_I 2 "aarch64_simd_rshift_imm")))] > - "TARGET_SVE && reload_completed" > + "TARGET_SVE" > "<shift>\t%0.<Vetype>, %1.<Vetype>, #%2" > ) > > diff --git a/gcc/config/aarch64/aarch64-sve2.md > b/gcc/config/aarch64/aarch64-sve2.md > index 871cf0bd2e8..62524f36de6 100644 > --- a/gcc/config/aarch64/aarch64-sve2.md > +++ b/gcc/config/aarch64/aarch64-sve2.md > @@ -1932,40 +1932,27 @@ > (define_expand "@aarch64_sve_add_<sve_int_op><mode>" > [(set (match_operand:SVE_FULL_I 0 "register_operand") > (plus:SVE_FULL_I > - (unspec:SVE_FULL_I > - [(match_dup 4) > - (SHIFTRT:SVE_FULL_I > - (match_operand:SVE_FULL_I 2 "register_operand") > - (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm"))] > - UNSPEC_PRED_X) > - (match_operand:SVE_FULL_I 1 "register_operand")))] > + (SHIFTRT:SVE_FULL_I > + (match_operand:SVE_FULL_I 2 "register_operand") > + (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm")) > + (match_operand:SVE_FULL_I 1 "register_operand")))] > "TARGET_SVE2" > - { > - operands[4] = CONSTM1_RTX (<VPRED>mode); > - } > ) > > ;; Pattern-match SSRA and USRA as a predicated operation whose predicate > ;; isn't needed. > -(define_insn_and_rewrite "*aarch64_sve2_sra<mode>" > +(define_insn "*aarch64_sve2_sra<mode>" > [(set (match_operand:SVE_FULL_I 0 "register_operand") > (plus:SVE_FULL_I > - (unspec:SVE_FULL_I > - [(match_operand 4) > - (SHIFTRT:SVE_FULL_I > - (match_operand:SVE_FULL_I 2 "register_operand") > - (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm"))] > - UNSPEC_PRED_X) > + (SHIFTRT:SVE_FULL_I > + (match_operand:SVE_FULL_I 2 "register_operand") > + (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm")) > (match_operand:SVE_FULL_I 1 "register_operand")))] > "TARGET_SVE2" > {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ] > [ w , 0 , w ; * ] <sra_op>sra\t%0.<Vetype>, > %2.<Vetype>, #%3 > [ ?&w , w , w ; yes ] movprfx\t%0, > %1\;<sra_op>sra\t%0.<Vetype>, %2.<Vetype>, #%3 > } > - "&& !CONSTANT_P (operands[4])" > - { > - operands[4] = CONSTM1_RTX (<VPRED>mode); > - } > ) > > ;; SRSRA and URSRA. > @@ -2715,17 +2702,14 @@ > ;; Optimize ((a + b) >> n) where n is half the bitsize of the vector > (define_insn "*bitmask_shift_plus<mode>" > [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w") > - (unspec:SVE_FULL_HSDI > - [(match_operand:<VPRED> 1) > - (lshiftrt:SVE_FULL_HSDI > - (plus:SVE_FULL_HSDI > - (match_operand:SVE_FULL_HSDI 2 "register_operand" "w") > - (match_operand:SVE_FULL_HSDI 3 "register_operand" "w")) > - (match_operand:SVE_FULL_HSDI 4 > - "aarch64_simd_shift_imm_vec_exact_top" ""))] > - UNSPEC_PRED_X))] > + (lshiftrt:SVE_FULL_HSDI > + (plus:SVE_FULL_HSDI > + (match_operand:SVE_FULL_HSDI 1 "register_operand" "w") > + (match_operand:SVE_FULL_HSDI 2 "register_operand" "w")) > + (match_operand:SVE_FULL_HSDI 3 > + "aarch64_simd_shift_imm_vec_exact_top" "")))] > "TARGET_SVE2" > - "addhnb\t%0.<Ventype>, %2.<Vetype>, %3.<Vetype>" > + "addhnb\t%0.<Ventype>, %1.<Vetype>, %2.<Vetype>" > ) > > ;; -------------------------------------------------------------------------