Jonathan Wright <jonathan.wri...@arm.com> writes:
> Hi,
>
> As subject, this patch splits the aarch64_<sur>q<r>shr<u>n_n<mode>
> pattern into separate scalar and vector variants. It further splits the vector
> pattern into big/little endian variants that model the zero-high-half
> semantics of the underlying instruction - allowing for more combinations
> with the write-to-high-half variant
> (aarch64_<sur>q<r>shr<u>n2_n<mode>.) This improvement will be
> confirmed by a new test in gcc.target/aarch64/narrow_high_combine.c
> (patch 5/5 in this series.)
>
> Regression tested and bootstrapped on aarch64-none-linux-gnu - no
> issues.
>
> Ok for master?

OK, thanks.

Richard

> Thanks,
> Jonathan
>
> ---
>
> gcc/ChangeLog:
>
> 2021-05-14  Jonathan Wright  <jonathan.wri...@arm.com>
>
>         * config/aarch64/aarch64-simd-builtins.def: Split builtin
>         generation for aarch64_<sur>q<r>shr<u>n_n<mode> pattern into
>         separate scalar and vector generators.
>         * config/aarch64/aarch64-simd.md
>         (aarch64_<sur>q<r>shr<u>n_n<mode>): Define as an expander and
>         split into...
>         (aarch64_<sur>q<r>shr<u>n_n<mode>_insn_le): This and...
>         (aarch64_<sur>q<r>shr<u>n_n<mode>_insn_be): This.
>         * config/aarch64/iterators.md: Define SD_HSDI iterator.
>
> diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def 
> b/gcc/config/aarch64/aarch64-simd-builtins.def
> index 
> 1e81bb53287e9797f3539c2c64ed11c6c26d6e4e..18baa6720b09b2ebda8577b809f8a8683f8b44f0
>  100644
> --- a/gcc/config/aarch64/aarch64-simd-builtins.def
> +++ b/gcc/config/aarch64/aarch64-simd-builtins.def
> @@ -421,12 +421,18 @@
>    BUILTIN_VQW (SHIFTIMM, sshll2_n, 0, NONE)
>    BUILTIN_VQW (SHIFTIMM, ushll2_n, 0, NONE)
>    /* Implemented by aarch64_<sur>q<r>shr<u>n_n<mode>.  */
> -  BUILTIN_VSQN_HSDI (SHIFTIMM, sqshrun_n, 0, NONE)
> -  BUILTIN_VSQN_HSDI (SHIFTIMM, sqrshrun_n, 0, NONE)
> -  BUILTIN_VSQN_HSDI (SHIFTIMM, sqshrn_n, 0, NONE)
> -  BUILTIN_VSQN_HSDI (USHIFTIMM, uqshrn_n, 0, NONE)
> -  BUILTIN_VSQN_HSDI (SHIFTIMM, sqrshrn_n, 0, NONE)
> -  BUILTIN_VSQN_HSDI (USHIFTIMM, uqrshrn_n, 0, NONE)
> +  BUILTIN_VQN (SHIFTIMM, sqshrun_n, 0, NONE)
> +  BUILTIN_VQN (SHIFTIMM, sqrshrun_n, 0, NONE)
> +  BUILTIN_VQN (SHIFTIMM, sqshrn_n, 0, NONE)
> +  BUILTIN_VQN (USHIFTIMM, uqshrn_n, 0, NONE)
> +  BUILTIN_VQN (SHIFTIMM, sqrshrn_n, 0, NONE)
> +  BUILTIN_VQN (USHIFTIMM, uqrshrn_n, 0, NONE)
> +  BUILTIN_SD_HSDI (SHIFTIMM, sqshrun_n, 0, NONE)
> +  BUILTIN_SD_HSDI (SHIFTIMM, sqrshrun_n, 0, NONE)
> +  BUILTIN_SD_HSDI (SHIFTIMM, sqshrn_n, 0, NONE)
> +  BUILTIN_SD_HSDI (USHIFTIMM, uqshrn_n, 0, NONE)
> +  BUILTIN_SD_HSDI (SHIFTIMM, sqrshrn_n, 0, NONE)
> +  BUILTIN_SD_HSDI (USHIFTIMM, uqrshrn_n, 0, NONE)
>    /* Implemented by aarch64_<sur>q<r>shr<u>n2_n<mode>.  */
>    BUILTIN_VQN (SHIFT2IMM_UUSS, sqshrun2_n, 0, NONE)
>    BUILTIN_VQN (SHIFT2IMM_UUSS, sqrshrun2_n, 0, NONE)
> diff --git a/gcc/config/aarch64/aarch64-simd.md 
> b/gcc/config/aarch64/aarch64-simd.md
> index 
> 79523093ec327b826c0a6741bf315c6c2f67fe64..c67fa3fb6f0ca0a181a09a42677526d12e955c06
>  100644
> --- a/gcc/config/aarch64/aarch64-simd.md
> +++ b/gcc/config/aarch64/aarch64-simd.md
> @@ -6045,7 +6045,7 @@
>  
>  (define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
>    [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
> -        (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" 
> "w")
> +        (unspec:<VNARROWQ> [(match_operand:SD_HSDI 1 "register_operand" "w")
>                           (match_operand:SI 2
>                             "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
>                          VQSHRN_N))]
> @@ -6054,6 +6054,58 @@
>    [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
>  )
>  
> +(define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>_insn_le"
> +  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
> +     (vec_concat:<VNARROWQ2>
> +       (unspec:<VNARROWQ>
> +             [(match_operand:VQN 1 "register_operand" "w")
> +              (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>")]
> +             VQSHRN_N)
> +       (match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")))]
> +  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
> +  "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
> +  [(set_attr "type" "neon_shift_imm_narrow_q")]
> +)
> +
> +(define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>_insn_be"
> +  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
> +     (vec_concat:<VNARROWQ2>
> +       (match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")
> +       (unspec:<VNARROWQ>
> +             [(match_operand:VQN 1 "register_operand" "w")
> +              (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>")]
> +             VQSHRN_N)))]
> +  "TARGET_SIMD && BYTES_BIG_ENDIAN"
> +  "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
> +  [(set_attr "type" "neon_shift_imm_narrow_q")]
> +)
> +
> +(define_expand "aarch64_<sur>q<r>shr<u>n_n<mode>"
> +  [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
> +        (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand")
> +                         (match_operand:SI 2
> +                           "aarch64_simd_shift_imm_offset_<ve_mode>")]
> +                        VQSHRN_N))]
> +  "TARGET_SIMD"
> +  {
> +    operands[2] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
> +                                              INTVAL (operands[2]));
> +    rtx tmp = gen_reg_rtx (<VNARROWQ2>mode);
> +    if (BYTES_BIG_ENDIAN)
> +      emit_insn (gen_aarch64_<sur>q<r>shr<u>n_n<mode>_insn_be (tmp,
> +                 operands[1], operands[2], CONST0_RTX (<VNARROWQ>mode)));
> +    else
> +      emit_insn (gen_aarch64_<sur>q<r>shr<u>n_n<mode>_insn_le (tmp,
> +                 operands[1], operands[2], CONST0_RTX (<VNARROWQ>mode)));
> +
> +    /* The intrinsic expects a narrow result, so emit a subreg that will get
> +       optimized away as appropriate.  */
> +    emit_move_insn (operands[0], lowpart_subreg (<VNARROWQ>mode, tmp,
> +                                              <VNARROWQ2>mode));
> +    DONE;
> +  }
> +)
> +
>  (define_insn "aarch64_<sur>q<r>shr<u>n2_n<mode>_insn_le"
>    [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
>       (vec_concat:<VNARROWQ2>
> diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
> index 
> 96eaef9c749927394465bfe445f509807bfdc57c..e398aa7a65b8644c8eb376fc78cc8f4e45424511
>  100644
> --- a/gcc/config/aarch64/iterators.md
> +++ b/gcc/config/aarch64/iterators.md
> @@ -282,6 +282,9 @@
>  ;; Scalar 64-bit container: 16, 32-bit integer modes
>  (define_mode_iterator SD_HSI [HI SI])
>  
> +;; Scalar 64-bit container: 16-bit, 32-bit and 64-bit integer modes.
> +(define_mode_iterator SD_HSDI [HI SI DI])
> +
>  ;; Advanced SIMD 64-bit container: 16, 32-bit integer modes.
>  (define_mode_iterator VQ_HSI [V8HI V4SI])
>  

Reply via email to