Re: [PATCH v3 1/2] aarch64: Match unpredicated shift patterns for ADR, SRA and ADDHNB instructions

Richard Sandiford Tue, 20 May 2025 03:22:00 -0700

<dhr...@nvidia.com> writes:
> @@ -4899,7 +4876,9 @@
>      if (CONST_INT_P (operands[2]))
>        {
>       amount = gen_const_vec_duplicate (<MODE>mode, operands[2]);
> -     if (!aarch64_sve_<lr>shift_operand (operands[2], <MODE>mode))
> +     if (!aarch64_sve_<lr>shift_operand (operands[2], <MODE>mode)
> +         && !aarch64_simd_shift_imm_p (operands[2], <MODE>mode,
> +                                       <optab>_optab == ashl_optab))
>         amount = force_reg (<MODE>mode, amount);
>        }
>      else


I think the problem here is that the old code is testing the wrong thing.
It should instead be testing "amount", as in:

        amount = gen_const_vec_duplicate (<MODE>mode, operands[2]);
        if (!aarch64_sve_<lr>shift_operand (amount, <MODE>mode))
          amount = force_reg (<MODE>mode, amount);

> @@ -4923,6 +4902,13 @@
>         UNSPEC_PRED_X))]
>    "TARGET_SVE"
>    {
> +    if (aarch64_simd_shift_imm_p (operands[2], <MODE>mode,
> +                               <optab>_optab == ashl_optab))

<CODE> == ASHIFT would be more direct than <optab>_optab == ashl_optab.

Thanks,
Richard

> +      {
> +     emit_insn (gen_aarch64_v<optab><mode>3_const (operands[0], operands[1],
> +                                                   operands[2]));
> +     DONE;
> +      }
>      operands[3] = aarch64_ptrue_reg (<VPRED>mode);
>    }
>  )
> @@ -4952,27 +4938,27 @@
>    ""
>  )
>  
> -;; Unpredicated shift operations by a constant (post-RA only).
> +;; Unpredicated shift operations by a constant.
>  ;; These are generated by splitting a predicated instruction whose
>  ;; predicate is unused.
> -(define_insn "*post_ra_v_ashl<mode>3"
> +(define_insn "aarch64_vashl<mode>3_const"
>    [(set (match_operand:SVE_I 0 "register_operand")
>       (ashift:SVE_I
>         (match_operand:SVE_I 1 "register_operand")
>         (match_operand:SVE_I 2 "aarch64_simd_lshift_imm")))]
> -  "TARGET_SVE && reload_completed"
> +  "TARGET_SVE"
>    {@ [ cons: =0 , 1 , 2   ]
>       [ w     , w , vs1 ] add\t%0.<Vetype>, %1.<Vetype>, %1.<Vetype>
>       [ w     , w , Dl  ] lsl\t%0.<Vetype>, %1.<Vetype>, #%2
>    }
>  )
>  
> -(define_insn "*post_ra_v_<optab><mode>3"
> +(define_insn "aarch64_v<optab><mode>3_const"
>    [(set (match_operand:SVE_I 0 "register_operand" "=w")
>       (SHIFTRT:SVE_I
>         (match_operand:SVE_I 1 "register_operand" "w")
>         (match_operand:SVE_I 2 "aarch64_simd_rshift_imm")))]
> -  "TARGET_SVE && reload_completed"
> +  "TARGET_SVE"
>    "<shift>\t%0.<Vetype>, %1.<Vetype>, #%2"
>  )
>  
> diff --git a/gcc/config/aarch64/aarch64-sve2.md 
> b/gcc/config/aarch64/aarch64-sve2.md
> index 871cf0bd2e8..62524f36de6 100644
> --- a/gcc/config/aarch64/aarch64-sve2.md
> +++ b/gcc/config/aarch64/aarch64-sve2.md
> @@ -1932,40 +1932,27 @@
>  (define_expand "@aarch64_sve_add_<sve_int_op><mode>"
>    [(set (match_operand:SVE_FULL_I 0 "register_operand")
>       (plus:SVE_FULL_I
> -       (unspec:SVE_FULL_I
> -         [(match_dup 4)
> -          (SHIFTRT:SVE_FULL_I
> -            (match_operand:SVE_FULL_I 2 "register_operand")
> -            (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm"))]
> -         UNSPEC_PRED_X)
> -      (match_operand:SVE_FULL_I 1 "register_operand")))]
> +       (SHIFTRT:SVE_FULL_I
> +         (match_operand:SVE_FULL_I 2 "register_operand")
> +         (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm"))
> +       (match_operand:SVE_FULL_I 1 "register_operand")))]
>    "TARGET_SVE2"
> -  {
> -    operands[4] = CONSTM1_RTX (<VPRED>mode);
> -  }
>  )
>  
>  ;; Pattern-match SSRA and USRA as a predicated operation whose predicate
>  ;; isn't needed.
> -(define_insn_and_rewrite "*aarch64_sve2_sra<mode>"
> +(define_insn "*aarch64_sve2_sra<mode>"
>    [(set (match_operand:SVE_FULL_I 0 "register_operand")
>       (plus:SVE_FULL_I
> -       (unspec:SVE_FULL_I
> -         [(match_operand 4)
> -          (SHIFTRT:SVE_FULL_I
> -            (match_operand:SVE_FULL_I 2 "register_operand")
> -            (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm"))]
> -         UNSPEC_PRED_X)
> +       (SHIFTRT:SVE_FULL_I
> +         (match_operand:SVE_FULL_I 2 "register_operand")
> +         (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm"))
>        (match_operand:SVE_FULL_I 1 "register_operand")))]
>    "TARGET_SVE2"
>    {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
>       [ w        , 0 , w ; *              ] <sra_op>sra\t%0.<Vetype>, 
> %2.<Vetype>, #%3
>       [ ?&w      , w , w ; yes            ] movprfx\t%0, 
> %1\;<sra_op>sra\t%0.<Vetype>, %2.<Vetype>, #%3
>    }
> -  "&& !CONSTANT_P (operands[4])"
> -  {
> -    operands[4] = CONSTM1_RTX (<VPRED>mode);
> -  }
>  )
>  
>  ;; SRSRA and URSRA.
> @@ -2715,17 +2702,14 @@
>  ;; Optimize ((a + b) >> n) where n is half the bitsize of the vector
>  (define_insn "*bitmask_shift_plus<mode>"
>    [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w")
> -     (unspec:SVE_FULL_HSDI
> -        [(match_operand:<VPRED> 1)
> -         (lshiftrt:SVE_FULL_HSDI
> -           (plus:SVE_FULL_HSDI
> -             (match_operand:SVE_FULL_HSDI 2 "register_operand" "w")
> -             (match_operand:SVE_FULL_HSDI 3 "register_operand" "w"))
> -           (match_operand:SVE_FULL_HSDI 4
> -              "aarch64_simd_shift_imm_vec_exact_top" ""))]
> -          UNSPEC_PRED_X))]
> +     (lshiftrt:SVE_FULL_HSDI
> +       (plus:SVE_FULL_HSDI
> +         (match_operand:SVE_FULL_HSDI 1 "register_operand" "w")
> +         (match_operand:SVE_FULL_HSDI 2 "register_operand" "w"))
> +       (match_operand:SVE_FULL_HSDI 3
> +         "aarch64_simd_shift_imm_vec_exact_top" "")))]
>    "TARGET_SVE2"
> -  "addhnb\t%0.<Ventype>, %2.<Vetype>, %3.<Vetype>"
> +  "addhnb\t%0.<Ventype>, %1.<Vetype>, %2.<Vetype>"
>  )
>  
>  ;; -------------------------------------------------------------------------

Re: [PATCH v3 1/2] aarch64: Match unpredicated shift patterns for ADR, SRA and ADDHNB instructions

Reply via email to