Jonathan Wright <jonathan.wri...@arm.com> writes: > Hi, > > As subject, this patch splits the aarch64_<sur>q<r>shr<u>n_n<mode> > pattern into separate scalar and vector variants. It further splits the vector > pattern into big/little endian variants that model the zero-high-half > semantics of the underlying instruction - allowing for more combinations > with the write-to-high-half variant > (aarch64_<sur>q<r>shr<u>n2_n<mode>.) This improvement will be > confirmed by a new test in gcc.target/aarch64/narrow_high_combine.c > (patch 5/5 in this series.) > > Regression tested and bootstrapped on aarch64-none-linux-gnu - no > issues. > > Ok for master?
OK, thanks. Richard > Thanks, > Jonathan > > --- > > gcc/ChangeLog: > > 2021-05-14 Jonathan Wright <jonathan.wri...@arm.com> > > * config/aarch64/aarch64-simd-builtins.def: Split builtin > generation for aarch64_<sur>q<r>shr<u>n_n<mode> pattern into > separate scalar and vector generators. > * config/aarch64/aarch64-simd.md > (aarch64_<sur>q<r>shr<u>n_n<mode>): Define as an expander and > split into... > (aarch64_<sur>q<r>shr<u>n_n<mode>_insn_le): This and... > (aarch64_<sur>q<r>shr<u>n_n<mode>_insn_be): This. > * config/aarch64/iterators.md: Define SD_HSDI iterator. > > diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def > b/gcc/config/aarch64/aarch64-simd-builtins.def > index > 1e81bb53287e9797f3539c2c64ed11c6c26d6e4e..18baa6720b09b2ebda8577b809f8a8683f8b44f0 > 100644 > --- a/gcc/config/aarch64/aarch64-simd-builtins.def > +++ b/gcc/config/aarch64/aarch64-simd-builtins.def > @@ -421,12 +421,18 @@ > BUILTIN_VQW (SHIFTIMM, sshll2_n, 0, NONE) > BUILTIN_VQW (SHIFTIMM, ushll2_n, 0, NONE) > /* Implemented by aarch64_<sur>q<r>shr<u>n_n<mode>. */ > - BUILTIN_VSQN_HSDI (SHIFTIMM, sqshrun_n, 0, NONE) > - BUILTIN_VSQN_HSDI (SHIFTIMM, sqrshrun_n, 0, NONE) > - BUILTIN_VSQN_HSDI (SHIFTIMM, sqshrn_n, 0, NONE) > - BUILTIN_VSQN_HSDI (USHIFTIMM, uqshrn_n, 0, NONE) > - BUILTIN_VSQN_HSDI (SHIFTIMM, sqrshrn_n, 0, NONE) > - BUILTIN_VSQN_HSDI (USHIFTIMM, uqrshrn_n, 0, NONE) > + BUILTIN_VQN (SHIFTIMM, sqshrun_n, 0, NONE) > + BUILTIN_VQN (SHIFTIMM, sqrshrun_n, 0, NONE) > + BUILTIN_VQN (SHIFTIMM, sqshrn_n, 0, NONE) > + BUILTIN_VQN (USHIFTIMM, uqshrn_n, 0, NONE) > + BUILTIN_VQN (SHIFTIMM, sqrshrn_n, 0, NONE) > + BUILTIN_VQN (USHIFTIMM, uqrshrn_n, 0, NONE) > + BUILTIN_SD_HSDI (SHIFTIMM, sqshrun_n, 0, NONE) > + BUILTIN_SD_HSDI (SHIFTIMM, sqrshrun_n, 0, NONE) > + BUILTIN_SD_HSDI (SHIFTIMM, sqshrn_n, 0, NONE) > + BUILTIN_SD_HSDI (USHIFTIMM, uqshrn_n, 0, NONE) > + BUILTIN_SD_HSDI (SHIFTIMM, sqrshrn_n, 0, NONE) > + BUILTIN_SD_HSDI (USHIFTIMM, uqrshrn_n, 0, NONE) > /* Implemented by aarch64_<sur>q<r>shr<u>n2_n<mode>. */ > BUILTIN_VQN (SHIFT2IMM_UUSS, sqshrun2_n, 0, NONE) > BUILTIN_VQN (SHIFT2IMM_UUSS, sqrshrun2_n, 0, NONE) > diff --git a/gcc/config/aarch64/aarch64-simd.md > b/gcc/config/aarch64/aarch64-simd.md > index > 79523093ec327b826c0a6741bf315c6c2f67fe64..c67fa3fb6f0ca0a181a09a42677526d12e955c06 > 100644 > --- a/gcc/config/aarch64/aarch64-simd.md > +++ b/gcc/config/aarch64/aarch64-simd.md > @@ -6045,7 +6045,7 @@ > > (define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>" > [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w") > - (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" > "w") > + (unspec:<VNARROWQ> [(match_operand:SD_HSDI 1 "register_operand" "w") > (match_operand:SI 2 > "aarch64_simd_shift_imm_offset_<ve_mode>" "i")] > VQSHRN_N))] > @@ -6054,6 +6054,58 @@ > [(set_attr "type" "neon_sat_shift_imm_narrow_q")] > ) > > +(define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>_insn_le" > + [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w") > + (vec_concat:<VNARROWQ2> > + (unspec:<VNARROWQ> > + [(match_operand:VQN 1 "register_operand" "w") > + (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>")] > + VQSHRN_N) > + (match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")))] > + "TARGET_SIMD && !BYTES_BIG_ENDIAN" > + "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2" > + [(set_attr "type" "neon_shift_imm_narrow_q")] > +) > + > +(define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>_insn_be" > + [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w") > + (vec_concat:<VNARROWQ2> > + (match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero") > + (unspec:<VNARROWQ> > + [(match_operand:VQN 1 "register_operand" "w") > + (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>")] > + VQSHRN_N)))] > + "TARGET_SIMD && BYTES_BIG_ENDIAN" > + "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2" > + [(set_attr "type" "neon_shift_imm_narrow_q")] > +) > + > +(define_expand "aarch64_<sur>q<r>shr<u>n_n<mode>" > + [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w") > + (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand") > + (match_operand:SI 2 > + "aarch64_simd_shift_imm_offset_<ve_mode>")] > + VQSHRN_N))] > + "TARGET_SIMD" > + { > + operands[2] = aarch64_simd_gen_const_vector_dup (<MODE>mode, > + INTVAL (operands[2])); > + rtx tmp = gen_reg_rtx (<VNARROWQ2>mode); > + if (BYTES_BIG_ENDIAN) > + emit_insn (gen_aarch64_<sur>q<r>shr<u>n_n<mode>_insn_be (tmp, > + operands[1], operands[2], CONST0_RTX (<VNARROWQ>mode))); > + else > + emit_insn (gen_aarch64_<sur>q<r>shr<u>n_n<mode>_insn_le (tmp, > + operands[1], operands[2], CONST0_RTX (<VNARROWQ>mode))); > + > + /* The intrinsic expects a narrow result, so emit a subreg that will get > + optimized away as appropriate. */ > + emit_move_insn (operands[0], lowpart_subreg (<VNARROWQ>mode, tmp, > + <VNARROWQ2>mode)); > + DONE; > + } > +) > + > (define_insn "aarch64_<sur>q<r>shr<u>n2_n<mode>_insn_le" > [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w") > (vec_concat:<VNARROWQ2> > diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md > index > 96eaef9c749927394465bfe445f509807bfdc57c..e398aa7a65b8644c8eb376fc78cc8f4e45424511 > 100644 > --- a/gcc/config/aarch64/iterators.md > +++ b/gcc/config/aarch64/iterators.md > @@ -282,6 +282,9 @@ > ;; Scalar 64-bit container: 16, 32-bit integer modes > (define_mode_iterator SD_HSI [HI SI]) > > +;; Scalar 64-bit container: 16-bit, 32-bit and 64-bit integer modes. > +(define_mode_iterator SD_HSDI [HI SI DI]) > + > ;; Advanced SIMD 64-bit container: 16, 32-bit integer modes. > (define_mode_iterator VQ_HSI [V8HI V4SI]) >