Hello, The operands in RTL patterns of MVE vector scatter store intrinsics are wrongly grouped, because of which few vector loads and stores instructions are wrongly getting optimized out with -O2.
A new predicate "mve_scatter_memory" is defined in this patch, this predicate returns TRUE on matching: (mem(reg)) for MVE scatter store intrinsics. This patch fixes the issue by adding define_expand pattern with "mve_scatter_memory" predicate and calls the corresponding define_insn by passing register_operand as first argument. This register_operand is extracted from the operand with "mve_scatter_memory" predicate in define_expand pattern. Please refer to M-profile Vector Extension (MVE) intrinsics [1] for more details. [1] https://developer.arm.com/architectures/instruction-sets/simd-isas/helium/mve-intrinsics Regression tested on arm-none-eabi and found no regressions. Ok for trunk? Thanks, Srinath. gcc/ChangeLog: 2020-06-02 Srinath Parvathaneni <srinath.parvathan...@arm.com> PR target/94735 * config/arm//predicates.md (mve_scatter_memory): Define to match (mem (reg)) for scatter store memory. * config/arm/mve.md (mve_vstrbq_scatter_offset_<supf><mode>): Modify define_insn to define_expand. (mve_vstrbq_scatter_offset_p_<supf><mode>): Likewise. (mve_vstrhq_scatter_offset_<supf><mode>): Likewise. (mve_vstrhq_scatter_shifted_offset_p_<supf><mode>): Likewise. (mve_vstrhq_scatter_shifted_offset_<supf><mode>): Likewise. (mve_vstrdq_scatter_offset_p_<supf>v2di): Likewise. (mve_vstrdq_scatter_offset_<supf>v2di): Likewise. (mve_vstrdq_scatter_shifted_offset_p_<supf>v2di): Likewise. (mve_vstrdq_scatter_shifted_offset_<supf>v2di): Likewise. (mve_vstrhq_scatter_offset_fv8hf): Likewise. (mve_vstrhq_scatter_offset_p_fv8hf): Likewise. (mve_vstrhq_scatter_shifted_offset_fv8hf): Likewise. (mve_vstrhq_scatter_shifted_offset_p_fv8hf): Likewise. (mve_vstrwq_scatter_offset_fv4sf): Likewise. (mve_vstrwq_scatter_offset_p_fv4sf): Likewise. (mve_vstrwq_scatter_offset_p_<supf>v4si): Likewise. (mve_vstrwq_scatter_offset_<supf>v4si): Likewise. (mve_vstrwq_scatter_shifted_offset_fv4sf): Likewise. (mve_vstrwq_scatter_shifted_offset_p_fv4sf): Likewise. (mve_vstrwq_scatter_shifted_offset_p_<supf>v4si): Likewise. (mve_vstrwq_scatter_shifted_offset_<supf>v4si): Likewise. (mve_vstrbq_scatter_offset_<supf><mode>_insn): Define insn for scatter stores. (mve_vstrbq_scatter_offset_p_<supf><mode>_insn): Likewise. (mve_vstrhq_scatter_offset_<supf><mode>_insn): Likewise. (mve_vstrhq_scatter_shifted_offset_p_<supf><mode>_insn): Likewise. (mve_vstrhq_scatter_shifted_offset_<supf><mode>_insn): Likewise. (mve_vstrdq_scatter_offset_p_<supf>v2di_insn): Likewise. (mve_vstrdq_scatter_offset_<supf>v2di_insn): Likewise. (mve_vstrdq_scatter_shifted_offset_p_<supf>v2di_insn): Likewise. (mve_vstrdq_scatter_shifted_offset_<supf>v2di_insn): Likewise. (mve_vstrhq_scatter_offset_fv8hf_insn): Likewise. (mve_vstrhq_scatter_offset_p_fv8hf_insn): Likewise. (mve_vstrhq_scatter_shifted_offset_fv8hf_insn): Likewise. (mve_vstrhq_scatter_shifted_offset_p_fv8hf_insn): Likewise. (mve_vstrwq_scatter_offset_fv4sf_insn): Likewise. (mve_vstrwq_scatter_offset_p_fv4sf_insn): Likewise. (mve_vstrwq_scatter_offset_p_<supf>v4si_insn): Likewise. (mve_vstrwq_scatter_offset_<supf>v4si_insn): Likewise. (mve_vstrwq_scatter_shifted_offset_fv4sf_insn): Likewise. (mve_vstrwq_scatter_shifted_offset_p_fv4sf_insn): Likewise. (mve_vstrwq_scatter_shifted_offset_p_<supf>v4si_insn): Likewise. (mve_vstrwq_scatter_shifted_offset_<supf>v4si_insn): Likewise. gcc/testsuite/ChangeLog: 2020-06-02 Srinath Parvathaneni srinath.parvathan...@arm.com PR target/94735 * gcc.target/arm/mve/intrinsics/mve_vstore_scatter_base.c: New test. * gcc.target/arm/mve/intrinsics/mve_vstore_scatter_base_p.c: Likewise. * gcc.target/arm/mve/intrinsics/mve_vstore_scatter_offset.c: Likewise. * gcc.target/arm/mve/intrinsics/mve_vstore_scatter_offset_p.c: Likewise. * gcc.target/arm/mve/intrinsics/mve_vstore_scatter_shifted_offset.c: Likewise. * gcc.target/arm/mve/intrinsics/mve_vstore_scatter_shifted_offset_p.c: Likewise. ############### Attachment also inlined for ease of reply ############### diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md index 986fbfe2abae5f1e91e65f1ff5c84709c43c4617..3a57901bd5bcd770832d59dc77cd92b6d9b5ecb4 100644 --- a/gcc/config/arm/mve.md +++ b/gcc/config/arm/mve.md @@ -8102,22 +8102,29 @@ ;; ;; [vstrbq_scatter_offset_s vstrbq_scatter_offset_u] ;; -(define_insn "mve_vstrbq_scatter_offset_<supf><mode>" - [(set (match_operand:<MVE_B_ELEM> 0 "memory_operand" "=Us") - (unspec:<MVE_B_ELEM> - [(match_operand:MVE_2 1 "s_register_operand" "w") - (match_operand:MVE_2 2 "s_register_operand" "w")] - VSTRBSOQ)) - ] +(define_expand "mve_vstrbq_scatter_offset_<supf><mode>" + [(match_operand:<MVE_B_ELEM> 0 "mve_scatter_memory") + (match_operand:MVE_2 1 "s_register_operand") + (match_operand:MVE_2 2 "s_register_operand") + (unspec:V4SI [(const_int 0)] VSTRBSOQ)] "TARGET_HAVE_MVE" { - rtx ops[3]; - ops[0] = operands[0]; - ops[1] = operands[1]; - ops[2] = operands[2]; - output_asm_insn("vstrb.<V_sz_elem>\t%q2, [%m0, %q1]",ops); - return ""; -} + rtx ind = XEXP (operands[0], 0); + gcc_assert (REG_P (ind)); + emit_insn (gen_mve_vstrbq_scatter_offset_<supf><mode>_insn (ind, operands[1], + operands[2])); + DONE; +}) + +(define_insn "mve_vstrbq_scatter_offset_<supf><mode>_insn" + [(set (mem:BLK (scratch)) + (unspec:BLK + [(match_operand:SI 0 "register_operand" "r") + (match_operand:MVE_2 1 "s_register_operand" "w") + (match_operand:MVE_2 2 "s_register_operand" "w")] + VSTRBSOQ))] + "TARGET_HAVE_MVE" + "vstrb.<V_sz_elem>\t%q2, [%0, %q1]" [(set_attr "length" "4")]) ;; @@ -8210,23 +8217,33 @@ ;; ;; [vstrbq_scatter_offset_p_s vstrbq_scatter_offset_p_u] ;; -(define_insn "mve_vstrbq_scatter_offset_p_<supf><mode>" - [(set (match_operand:<MVE_B_ELEM> 0 "memory_operand" "=Us") - (unspec:<MVE_B_ELEM> - [(match_operand:MVE_2 1 "s_register_operand" "w") - (match_operand:MVE_2 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] - VSTRBSOQ)) - ] +(define_expand "mve_vstrbq_scatter_offset_p_<supf><mode>" + [(match_operand:<MVE_B_ELEM> 0 "mve_scatter_memory") + (match_operand:MVE_2 1 "s_register_operand") + (match_operand:MVE_2 2 "s_register_operand") + (match_operand:HI 3 "vpr_register_operand" "Up") + (unspec:V4SI [(const_int 0)] VSTRBSOQ)] "TARGET_HAVE_MVE" { - rtx ops[3]; - ops[0] = operands[0]; - ops[1] = operands[1]; - ops[2] = operands[2]; - output_asm_insn ("vpst\n\tvstrbt.<V_sz_elem>\t%q2, [%m0, %q1]",ops); - return ""; -} + rtx ind = XEXP (operands[0], 0); + gcc_assert (REG_P (ind)); + emit_insn ( + gen_mve_vstrbq_scatter_offset_p_<supf><mode>_insn (ind, operands[1], + operands[2], + operands[3])); + DONE; +}) + +(define_insn "mve_vstrbq_scatter_offset_p_<supf><mode>_insn" + [(set (mem:BLK (scratch)) + (unspec:BLK + [(match_operand:SI 0 "register_operand" "r") + (match_operand:MVE_2 1 "s_register_operand" "w") + (match_operand:MVE_2 2 "s_register_operand" "w") + (match_operand:HI 3 "vpr_register_operand" "Up")] + VSTRBSOQ))] + "TARGET_HAVE_MVE" + "vpst\;vstrbt.<V_sz_elem>\t%q2, [%0, %q1]" [(set_attr "length" "8")]) ;; @@ -9097,87 +9114,122 @@ ;; ;; [vstrhq_scatter_offset_p_s vstrhq_scatter_offset_p_u] ;; -(define_insn "mve_vstrhq_scatter_offset_p_<supf><mode>" - [(set (match_operand:<MVE_H_ELEM> 0 "memory_operand" "=Us") - (unspec:<MVE_H_ELEM> - [(match_operand:MVE_6 1 "s_register_operand" "w") - (match_operand:MVE_6 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] - VSTRHSOQ)) - ] +(define_expand "mve_vstrhq_scatter_offset_p_<supf><mode>" + [(match_operand:<MVE_H_ELEM> 0 "mve_scatter_memory") + (match_operand:MVE_6 1 "s_register_operand") + (match_operand:MVE_6 2 "s_register_operand") + (match_operand:HI 3 "vpr_register_operand") + (unspec:V4SI [(const_int 0)] VSTRHSOQ)] "TARGET_HAVE_MVE" { - rtx ops[3]; - ops[0] = operands[0]; - ops[1] = operands[1]; - ops[2] = operands[2]; - output_asm_insn ("vpst\n\tvstrht.<V_sz_elem>\t%q2, [%m0, %q1]",ops); - return ""; -} + rtx ind = XEXP (operands[0], 0); + gcc_assert (REG_P (ind)); + emit_insn ( + gen_mve_vstrhq_scatter_offset_p_<supf><mode>_insn (ind, operands[1], + operands[2], + operands[3])); + DONE; +}) + +(define_insn "mve_vstrhq_scatter_offset_p_<supf><mode>_insn" + [(set (mem:BLK (scratch)) + (unspec:BLK + [(match_operand:SI 0 "register_operand" "r") + (match_operand:MVE_6 1 "s_register_operand" "w") + (match_operand:MVE_6 2 "s_register_operand" "w") + (match_operand:HI 3 "vpr_register_operand" "Up")] + VSTRHSOQ))] + "TARGET_HAVE_MVE" + "vpst\;vstrht.<V_sz_elem>\t%q2, [%0, %q1]" [(set_attr "length" "8")]) ;; ;; [vstrhq_scatter_offset_s vstrhq_scatter_offset_u] ;; -(define_insn "mve_vstrhq_scatter_offset_<supf><mode>" - [(set (match_operand:<MVE_H_ELEM> 0 "memory_operand" "=Us") - (unspec:<MVE_H_ELEM> - [(match_operand:MVE_6 1 "s_register_operand" "w") - (match_operand:MVE_6 2 "s_register_operand" "w")] - VSTRHSOQ)) - ] +(define_expand "mve_vstrhq_scatter_offset_<supf><mode>" + [(match_operand:<MVE_H_ELEM> 0 "mve_scatter_memory") + (match_operand:MVE_6 1 "s_register_operand") + (match_operand:MVE_6 2 "s_register_operand") + (unspec:V4SI [(const_int 0)] VSTRHSOQ)] "TARGET_HAVE_MVE" { - rtx ops[3]; - ops[0] = operands[0]; - ops[1] = operands[1]; - ops[2] = operands[2]; - output_asm_insn ("vstrh.<V_sz_elem>\t%q2, [%m0, %q1]",ops); - return ""; -} + rtx ind = XEXP (operands[0], 0); + gcc_assert (REG_P (ind)); + emit_insn (gen_mve_vstrhq_scatter_offset_<supf><mode>_insn (ind, operands[1], + operands[2])); + DONE; +}) + +(define_insn "mve_vstrhq_scatter_offset_<supf><mode>_insn" + [(set (mem:BLK (scratch)) + (unspec:BLK + [(match_operand:SI 0 "register_operand" "r") + (match_operand:MVE_6 1 "s_register_operand" "w") + (match_operand:MVE_6 2 "s_register_operand" "w")] + VSTRHSOQ))] + "TARGET_HAVE_MVE" + "vstrh.<V_sz_elem>\t%q2, [%0, %q1]" [(set_attr "length" "4")]) ;; ;; [vstrhq_scatter_shifted_offset_p_s vstrhq_scatter_shifted_offset_p_u] ;; -(define_insn "mve_vstrhq_scatter_shifted_offset_p_<supf><mode>" - [(set (match_operand:<MVE_H_ELEM> 0 "memory_operand" "=Ux") - (unspec:<MVE_H_ELEM> - [(match_operand:MVE_6 1 "s_register_operand" "w") - (match_operand:MVE_6 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] - VSTRHSSOQ)) - ] +(define_expand "mve_vstrhq_scatter_shifted_offset_p_<supf><mode>" + [(match_operand:<MVE_H_ELEM> 0 "mve_scatter_memory") + (match_operand:MVE_6 1 "s_register_operand") + (match_operand:MVE_6 2 "s_register_operand") + (match_operand:HI 3 "vpr_register_operand") + (unspec:V4SI [(const_int 0)] VSTRHSSOQ)] "TARGET_HAVE_MVE" { - rtx ops[3]; - ops[0] = operands[0]; - ops[1] = operands[1]; - ops[2] = operands[2]; - output_asm_insn ("vpst\n\tvstrht.<V_sz_elem>\t%q2, [%m0, %q1, uxtw #1]",ops); - return ""; -} + rtx ind = XEXP (operands[0], 0); + gcc_assert (REG_P (ind)); + emit_insn ( + gen_mve_vstrhq_scatter_shifted_offset_p_<supf><mode>_insn (ind, operands[1], + operands[2], + operands[3])); + DONE; +}) + +(define_insn "mve_vstrhq_scatter_shifted_offset_p_<supf><mode>_insn" + [(set (mem:BLK (scratch)) + (unspec:BLK + [(match_operand:SI 0 "register_operand" "r") + (match_operand:MVE_6 1 "s_register_operand" "w") + (match_operand:MVE_6 2 "s_register_operand" "w") + (match_operand:HI 3 "vpr_register_operand" "Up")] + VSTRHSSOQ))] + "TARGET_HAVE_MVE" + "vpst\;vstrht.<V_sz_elem>\t%q2, [%0, %q1, uxtw #1]" [(set_attr "length" "8")]) ;; ;; [vstrhq_scatter_shifted_offset_s vstrhq_scatter_shifted_offset_u] ;; -(define_insn "mve_vstrhq_scatter_shifted_offset_<supf><mode>" - [(set (match_operand:<MVE_H_ELEM> 0 "memory_operand" "=Us") - (unspec:<MVE_H_ELEM> - [(match_operand:MVE_6 1 "s_register_operand" "w") - (match_operand:MVE_6 2 "s_register_operand" "w")] - VSTRHSSOQ)) - ] +(define_expand "mve_vstrhq_scatter_shifted_offset_<supf><mode>" + [(match_operand:<MVE_H_ELEM> 0 "mve_scatter_memory") + (match_operand:MVE_6 1 "s_register_operand") + (match_operand:MVE_6 2 "s_register_operand") + (unspec:V4SI [(const_int 0)] VSTRHSSOQ)] "TARGET_HAVE_MVE" { - rtx ops[3]; - ops[0] = operands[0]; - ops[1] = operands[1]; - ops[2] = operands[2]; - output_asm_insn ("vstrh.<V_sz_elem>\t%q2, [%m0, %q1, uxtw #1]",ops); - return ""; -} + rtx ind = XEXP (operands[0], 0); + gcc_assert (REG_P (ind)); + emit_insn ( + gen_mve_vstrhq_scatter_shifted_offset_<supf><mode>_insn (ind, operands[1], + operands[2])); + DONE; +}) + +(define_insn "mve_vstrhq_scatter_shifted_offset_<supf><mode>_insn" + [(set (mem:BLK (scratch)) + (unspec:BLK + [(match_operand:SI 0 "register_operand" "r") + (match_operand:MVE_6 1 "s_register_operand" "w") + (match_operand:MVE_6 2 "s_register_operand" "w")] + VSTRHSSOQ))] + "TARGET_HAVE_MVE" + "vstrh.<V_sz_elem>\t%q2, [%0, %q1, uxtw #1]" [(set_attr "length" "4")]) ;; @@ -9345,173 +9397,240 @@ ;; ;; [vstrdq_scatter_offset_p_s vstrdq_scatter_offset_p_u] ;; -(define_insn "mve_vstrdq_scatter_offset_p_<supf>v2di" - [(set (match_operand:V2DI 0 "memory_operand" "=Us") - (unspec:V2DI - [(match_operand:V2DI 1 "s_register_operand" "w") - (match_operand:V2DI 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] - VSTRDSOQ)) - ] +(define_expand "mve_vstrdq_scatter_offset_p_<supf>v2di" + [(match_operand:V2DI 0 "mve_scatter_memory") + (match_operand:V2DI 1 "s_register_operand") + (match_operand:V2DI 2 "s_register_operand") + (match_operand:HI 3 "vpr_register_operand") + (unspec:V4SI [(const_int 0)] VSTRDSOQ)] "TARGET_HAVE_MVE" { - rtx ops[3]; - ops[0] = operands[0]; - ops[1] = operands[1]; - ops[2] = operands[2]; - output_asm_insn ("vpst\;\tvstrdt.64\t%q2, [%m0, %q1]",ops); - return ""; -} + rtx ind = XEXP (operands[0], 0); + gcc_assert (REG_P (ind)); + emit_insn (gen_mve_vstrdq_scatter_offset_p_<supf>v2di_insn (ind, operands[1], + operands[2], + operands[3])); + DONE; +}) + +(define_insn "mve_vstrdq_scatter_offset_p_<supf>v2di_insn" + [(set (mem:BLK (scratch)) + (unspec:BLK + [(match_operand:SI 0 "register_operand" "r") + (match_operand:V2DI 1 "s_register_operand" "w") + (match_operand:V2DI 2 "s_register_operand" "w") + (match_operand:HI 3 "vpr_register_operand" "Up")] + VSTRDSOQ))] + "TARGET_HAVE_MVE" + "vpst\;vstrdt.64\t%q2, [%0, %q1]" [(set_attr "length" "8")]) ;; ;; [vstrdq_scatter_offset_s vstrdq_scatter_offset_u] ;; -(define_insn "mve_vstrdq_scatter_offset_<supf>v2di" - [(set (match_operand:V2DI 0 "memory_operand" "=Us") - (unspec:V2DI - [(match_operand:V2DI 1 "s_register_operand" "w") - (match_operand:V2DI 2 "s_register_operand" "w")] - VSTRDSOQ)) - ] +(define_expand "mve_vstrdq_scatter_offset_<supf>v2di" + [(match_operand:V2DI 0 "mve_scatter_memory") + (match_operand:V2DI 1 "s_register_operand") + (match_operand:V2DI 2 "s_register_operand") + (unspec:V4SI [(const_int 0)] VSTRDSOQ)] "TARGET_HAVE_MVE" { - rtx ops[3]; - ops[0] = operands[0]; - ops[1] = operands[1]; - ops[2] = operands[2]; - output_asm_insn ("vstrd.64\t%q2, [%m0, %q1]",ops); - return ""; -} + rtx ind = XEXP (operands[0], 0); + gcc_assert (REG_P (ind)); + emit_insn (gen_mve_vstrdq_scatter_offset_<supf>v2di_insn (ind, operands[1], + operands[2])); + DONE; +}) + +(define_insn "mve_vstrdq_scatter_offset_<supf>v2di_insn" + [(set (mem:BLK (scratch)) + (unspec:BLK + [(match_operand:SI 0 "register_operand" "r") + (match_operand:V2DI 1 "s_register_operand" "w") + (match_operand:V2DI 2 "s_register_operand" "w")] + VSTRDSOQ))] + "TARGET_HAVE_MVE" + "vstrd.64\t%q2, [%0, %q1]" [(set_attr "length" "4")]) ;; ;; [vstrdq_scatter_shifted_offset_p_s vstrdq_scatter_shifted_offset_p_u] ;; -(define_insn "mve_vstrdq_scatter_shifted_offset_p_<supf>v2di" - [(set (match_operand:V2DI 0 "memory_operand" "=Us") - (unspec:V2DI - [(match_operand:V2DI 1 "s_register_operand" "w") - (match_operand:V2DI 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] - VSTRDSSOQ)) - ] +(define_expand "mve_vstrdq_scatter_shifted_offset_p_<supf>v2di" + [(match_operand:V2DI 0 "mve_scatter_memory") + (match_operand:V2DI 1 "s_register_operand") + (match_operand:V2DI 2 "s_register_operand") + (match_operand:HI 3 "vpr_register_operand") + (unspec:V4SI [(const_int 0)] VSTRDSSOQ)] "TARGET_HAVE_MVE" { - rtx ops[3]; - ops[0] = operands[0]; - ops[1] = operands[1]; - ops[2] = operands[2]; - output_asm_insn ("vpst\;\tvstrdt.64\t%q2, [%m0, %q1, UXTW #3]",ops); - return ""; -} + rtx ind = XEXP (operands[0], 0); + gcc_assert (REG_P (ind)); + emit_insn ( + gen_mve_vstrdq_scatter_shifted_offset_p_<supf>v2di_insn (ind, operands[1], + operands[2], + operands[3])); + DONE; +}) + +(define_insn "mve_vstrdq_scatter_shifted_offset_p_<supf>v2di_insn" + [(set (mem:BLK (scratch)) + (unspec:BLK + [(match_operand:SI 0 "register_operand" "r") + (match_operand:V2DI 1 "s_register_operand" "w") + (match_operand:V2DI 2 "s_register_operand" "w") + (match_operand:HI 3 "vpr_register_operand" "Up")] + VSTRDSSOQ))] + "TARGET_HAVE_MVE" + "vpst\;vstrdt.64\t%q2, [%0, %q1, UXTW #3]" [(set_attr "length" "8")]) ;; ;; [vstrdq_scatter_shifted_offset_s vstrdq_scatter_shifted_offset_u] ;; -(define_insn "mve_vstrdq_scatter_shifted_offset_<supf>v2di" - [(set (match_operand:V2DI 0 "memory_operand" "=Us") - (unspec:V2DI - [(match_operand:V2DI 1 "s_register_operand" "w") - (match_operand:V2DI 2 "s_register_operand" "w")] - VSTRDSSOQ)) - ] +(define_expand "mve_vstrdq_scatter_shifted_offset_<supf>v2di" + [(match_operand:V2DI 0 "mve_scatter_memory") + (match_operand:V2DI 1 "s_register_operand") + (match_operand:V2DI 2 "s_register_operand") + (unspec:V4SI [(const_int 0)] VSTRDSSOQ)] "TARGET_HAVE_MVE" { - rtx ops[3]; - ops[0] = operands[0]; - ops[1] = operands[1]; - ops[2] = operands[2]; - output_asm_insn ("vstrd.64\t%q2, [%m0, %q1, UXTW #3]",ops); - return ""; -} + rtx ind = XEXP (operands[0], 0); + gcc_assert (REG_P (ind)); + emit_insn ( + gen_mve_vstrdq_scatter_shifted_offset_<supf>v2di_insn (ind, operands[1], + operands[2])); + DONE; +}) + +(define_insn "mve_vstrdq_scatter_shifted_offset_<supf>v2di_insn" + [(set (mem:BLK (scratch)) + (unspec:BLK + [(match_operand:SI 0 "register_operand" "r") + (match_operand:V2DI 1 "s_register_operand" "w") + (match_operand:V2DI 2 "s_register_operand" "w")] + VSTRDSSOQ))] + "TARGET_HAVE_MVE" + "vstrd.64\t%q2, [%0, %q1, UXTW #3]" [(set_attr "length" "4")]) ;; ;; [vstrhq_scatter_offset_f] ;; -(define_insn "mve_vstrhq_scatter_offset_fv8hf" - [(set (match_operand:V8HI 0 "memory_operand" "=Us") - (unspec:V8HI - [(match_operand:V8HI 1 "s_register_operand" "w") - (match_operand:V8HF 2 "s_register_operand" "w")] - VSTRHQSO_F)) - ] +(define_expand "mve_vstrhq_scatter_offset_fv8hf" + [(match_operand:V8HI 0 "mve_scatter_memory") + (match_operand:V8HI 1 "s_register_operand") + (match_operand:V8HF 2 "s_register_operand") + (unspec:V4SI [(const_int 0)] VSTRHQSO_F)] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" { - rtx ops[3]; - ops[0] = operands[0]; - ops[1] = operands[1]; - ops[2] = operands[2]; - output_asm_insn ("vstrh.16\t%q2, [%m0, %q1]",ops); - return ""; -} + rtx ind = XEXP (operands[0], 0); + gcc_assert (REG_P (ind)); + emit_insn (gen_mve_vstrhq_scatter_offset_fv8hf_insn (ind, operands[1], + operands[2])); + DONE; +}) + +(define_insn "mve_vstrhq_scatter_offset_fv8hf_insn" + [(set (mem:BLK (scratch)) + (unspec:BLK + [(match_operand:SI 0 "register_operand" "r") + (match_operand:V8HI 1 "s_register_operand" "w") + (match_operand:V8HF 2 "s_register_operand" "w")] + VSTRHQSO_F))] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" + "vstrh.16\t%q2, [%0, %q1]" [(set_attr "length" "4")]) ;; ;; [vstrhq_scatter_offset_p_f] ;; -(define_insn "mve_vstrhq_scatter_offset_p_fv8hf" - [(set (match_operand:V8HI 0 "memory_operand" "=Us") - (unspec:V8HI - [(match_operand:V8HI 1 "s_register_operand" "w") - (match_operand:V8HF 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] - VSTRHQSO_F)) - ] +(define_expand "mve_vstrhq_scatter_offset_p_fv8hf" + [(match_operand:V8HI 0 "mve_scatter_memory") + (match_operand:V8HI 1 "s_register_operand") + (match_operand:V8HF 2 "s_register_operand") + (match_operand:HI 3 "vpr_register_operand") + (unspec:V4SI [(const_int 0)] VSTRHQSO_F)] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" { - rtx ops[3]; - ops[0] = operands[0]; - ops[1] = operands[1]; - ops[2] = operands[2]; - output_asm_insn ("vpst\n\tvstrht.16\t%q2, [%m0, %q1]",ops); - return ""; -} + rtx ind = XEXP (operands[0], 0); + gcc_assert (REG_P (ind)); + emit_insn (gen_mve_vstrhq_scatter_offset_p_fv8hf_insn (ind, operands[1], + operands[2], + operands[3])); + DONE; +}) + +(define_insn "mve_vstrhq_scatter_offset_p_fv8hf_insn" + [(set (mem:BLK (scratch)) + (unspec:BLK + [(match_operand:SI 0 "register_operand" "r") + (match_operand:V8HI 1 "s_register_operand" "w") + (match_operand:V8HF 2 "s_register_operand" "w") + (match_operand:HI 3 "vpr_register_operand" "Up")] + VSTRHQSO_F))] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" + "vpst\;vstrht.16\t%q2, [%0, %q1]" [(set_attr "length" "8")]) ;; ;; [vstrhq_scatter_shifted_offset_f] ;; -(define_insn "mve_vstrhq_scatter_shifted_offset_fv8hf" - [(set (match_operand:V8HI 0 "memory_operand" "=Us") - (unspec:V8HI - [(match_operand:V8HI 1 "s_register_operand" "w") - (match_operand:V8HF 2 "s_register_operand" "w")] - VSTRHQSSO_F)) - ] +(define_expand "mve_vstrhq_scatter_shifted_offset_fv8hf" + [(match_operand:V8HI 0 "memory_operand" "=Us") + (match_operand:V8HI 1 "s_register_operand" "w") + (match_operand:V8HF 2 "s_register_operand" "w") + (unspec:V4SI [(const_int 0)] VSTRHQSSO_F)] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" { - rtx ops[3]; - ops[0] = operands[0]; - ops[1] = operands[1]; - ops[2] = operands[2]; - output_asm_insn ("vstrh.16\t%q2, [%m0, %q1, uxtw #1]",ops); - return ""; -} + rtx ind = XEXP (operands[0], 0); + gcc_assert (REG_P (ind)); + emit_insn (gen_mve_vstrhq_scatter_shifted_offset_fv8hf_insn (ind, operands[1], + operands[2])); + DONE; +}) + +(define_insn "mve_vstrhq_scatter_shifted_offset_fv8hf_insn" + [(set (mem:BLK (scratch)) + (unspec:BLK + [(match_operand:SI 0 "register_operand" "r") + (match_operand:V8HI 1 "s_register_operand" "w") + (match_operand:V8HF 2 "s_register_operand" "w")] + VSTRHQSSO_F))] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" + "vstrh.16\t%q2, [%0, %q1, uxtw #1]" [(set_attr "length" "4")]) ;; ;; [vstrhq_scatter_shifted_offset_p_f] ;; -(define_insn "mve_vstrhq_scatter_shifted_offset_p_fv8hf" - [(set (match_operand:V8HI 0 "memory_operand" "=Us") - (unspec:V8HI - [(match_operand:V8HI 1 "s_register_operand" "w") - (match_operand:V8HF 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] - VSTRHQSSO_F)) - ] +(define_expand "mve_vstrhq_scatter_shifted_offset_p_fv8hf" + [(match_operand:V8HI 0 "memory_operand" "=Us") + (match_operand:V8HI 1 "s_register_operand" "w") + (match_operand:V8HF 2 "s_register_operand" "w") + (match_operand:HI 3 "vpr_register_operand" "Up") + (unspec:V4SI [(const_int 0)] VSTRHQSSO_F)] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" { - rtx ops[3]; - ops[0] = operands[0]; - ops[1] = operands[1]; - ops[2] = operands[2]; - output_asm_insn ("vpst\n\tvstrht.16\t%q2, [%m0, %q1, uxtw #1]",ops); - return ""; -} + rtx ind = XEXP (operands[0], 0); + gcc_assert (REG_P (ind)); + emit_insn ( + gen_mve_vstrhq_scatter_shifted_offset_p_fv8hf_insn (ind, operands[1], + operands[2], + operands[3])); + DONE; +}) + +(define_insn "mve_vstrhq_scatter_shifted_offset_p_fv8hf_insn" + [(set (mem:BLK (scratch)) + (unspec:BLK + [(match_operand:SI 0 "register_operand" "r") + (match_operand:V8HI 1 "s_register_operand" "w") + (match_operand:V8HF 2 "s_register_operand" "w") + (match_operand:HI 3 "vpr_register_operand" "Up")] + VSTRHQSSO_F))] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" + "vpst\;vstrht.16\t%q2, [%0, %q1, uxtw #1]" [(set_attr "length" "8")]) ;; @@ -9562,173 +9681,240 @@ ;; ;; [vstrwq_scatter_offset_f] ;; -(define_insn "mve_vstrwq_scatter_offset_fv4sf" - [(set (match_operand:V4SI 0 "memory_operand" "=Us") - (unspec:V4SI - [(match_operand:V4SI 1 "s_register_operand" "w") - (match_operand:V4SF 2 "s_register_operand" "w")] - VSTRWQSO_F)) - ] +(define_expand "mve_vstrwq_scatter_offset_fv4sf" + [(match_operand:V4SI 0 "mve_scatter_memory") + (match_operand:V4SI 1 "s_register_operand") + (match_operand:V4SF 2 "s_register_operand") + (unspec:V4SI [(const_int 0)] VSTRWQSO_F)] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" { - rtx ops[3]; - ops[0] = operands[0]; - ops[1] = operands[1]; - ops[2] = operands[2]; - output_asm_insn ("vstrw.32\t%q2, [%m0, %q1]",ops); - return ""; -} + rtx ind = XEXP (operands[0], 0); + gcc_assert (REG_P (ind)); + emit_insn (gen_mve_vstrwq_scatter_offset_fv4sf_insn (ind, operands[1], + operands[2])); + DONE; +}) + +(define_insn "mve_vstrwq_scatter_offset_fv4sf_insn" + [(set (mem:BLK (scratch)) + (unspec:BLK + [(match_operand:SI 0 "register_operand" "r") + (match_operand:V4SI 1 "s_register_operand" "w") + (match_operand:V4SF 2 "s_register_operand" "w")] + VSTRWQSO_F))] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" + "vstrw.32\t%q2, [%0, %q1]" [(set_attr "length" "4")]) ;; ;; [vstrwq_scatter_offset_p_f] ;; -(define_insn "mve_vstrwq_scatter_offset_p_fv4sf" - [(set (match_operand:V4SI 0 "memory_operand" "=Us") - (unspec:V4SI - [(match_operand:V4SI 1 "s_register_operand" "w") - (match_operand:V4SF 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] - VSTRWQSO_F)) - ] +(define_expand "mve_vstrwq_scatter_offset_p_fv4sf" + [(match_operand:V4SI 0 "mve_scatter_memory") + (match_operand:V4SI 1 "s_register_operand") + (match_operand:V4SF 2 "s_register_operand") + (match_operand:HI 3 "vpr_register_operand") + (unspec:V4SI [(const_int 0)] VSTRWQSO_F)] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" { - rtx ops[3]; - ops[0] = operands[0]; - ops[1] = operands[1]; - ops[2] = operands[2]; - output_asm_insn ("vpst\n\tvstrwt.32\t%q2, [%m0, %q1]",ops); - return ""; -} + rtx ind = XEXP (operands[0], 0); + gcc_assert (REG_P (ind)); + emit_insn (gen_mve_vstrwq_scatter_offset_p_fv4sf_insn (ind, operands[1], + operands[2], + operands[3])); + DONE; +}) + +(define_insn "mve_vstrwq_scatter_offset_p_fv4sf_insn" + [(set (mem:BLK (scratch)) + (unspec:BLK + [(match_operand:SI 0 "register_operand" "r") + (match_operand:V4SI 1 "s_register_operand" "w") + (match_operand:V4SF 2 "s_register_operand" "w") + (match_operand:HI 3 "vpr_register_operand" "Up")] + VSTRWQSO_F))] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" + "vpst\;vstrwt.32\t%q2, [%0, %q1]" [(set_attr "length" "8")]) ;; -;; [vstrwq_scatter_offset_p_s vstrwq_scatter_offset_p_u] +;; [vstrwq_scatter_offset_s vstrwq_scatter_offset_u] ;; -(define_insn "mve_vstrwq_scatter_offset_p_<supf>v4si" - [(set (match_operand:V4SI 0 "memory_operand" "=Us") - (unspec:V4SI - [(match_operand:V4SI 1 "s_register_operand" "w") - (match_operand:V4SI 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] - VSTRWSOQ)) - ] +(define_expand "mve_vstrwq_scatter_offset_p_<supf>v4si" + [(match_operand:V4SI 0 "mve_scatter_memory") + (match_operand:V4SI 1 "s_register_operand") + (match_operand:V4SI 2 "s_register_operand") + (match_operand:HI 3 "vpr_register_operand") + (unspec:V4SI [(const_int 0)] VSTRWSOQ)] "TARGET_HAVE_MVE" { - rtx ops[3]; - ops[0] = operands[0]; - ops[1] = operands[1]; - ops[2] = operands[2]; - output_asm_insn ("vpst\n\tvstrwt.32\t%q2, [%m0, %q1]",ops); - return ""; -} + rtx ind = XEXP (operands[0], 0); + gcc_assert (REG_P (ind)); + emit_insn (gen_mve_vstrwq_scatter_offset_p_<supf>v4si_insn (ind, operands[1], + operands[2], + operands[3])); + DONE; +}) + +(define_insn "mve_vstrwq_scatter_offset_p_<supf>v4si_insn" + [(set (mem:BLK (scratch)) + (unspec:BLK + [(match_operand:SI 0 "register_operand" "r") + (match_operand:V4SI 1 "s_register_operand" "w") + (match_operand:V4SI 2 "s_register_operand" "w") + (match_operand:HI 3 "vpr_register_operand" "Up")] + VSTRWSOQ))] + "TARGET_HAVE_MVE" + "vpst\;vstrwt.32\t%q2, [%0, %q1]" [(set_attr "length" "8")]) ;; ;; [vstrwq_scatter_offset_s vstrwq_scatter_offset_u] ;; -(define_insn "mve_vstrwq_scatter_offset_<supf>v4si" - [(set (match_operand:V4SI 0 "memory_operand" "=Us") - (unspec:V4SI - [(match_operand:V4SI 1 "s_register_operand" "w") - (match_operand:V4SI 2 "s_register_operand" "w")] - VSTRWSOQ)) - ] +(define_expand "mve_vstrwq_scatter_offset_<supf>v4si" + [(match_operand:V4SI 0 "mve_scatter_memory") + (match_operand:V4SI 1 "s_register_operand") + (match_operand:V4SI 2 "s_register_operand") + (unspec:V4SI [(const_int 0)] VSTRWSOQ)] "TARGET_HAVE_MVE" { - rtx ops[3]; - ops[0] = operands[0]; - ops[1] = operands[1]; - ops[2] = operands[2]; - output_asm_insn ("vstrw.32\t%q2, [%m0, %q1]",ops); - return ""; -} + rtx ind = XEXP (operands[0], 0); + gcc_assert (REG_P (ind)); + emit_insn (gen_mve_vstrwq_scatter_offset_<supf>v4si_insn (ind, operands[1], + operands[2])); + DONE; +}) + +(define_insn "mve_vstrwq_scatter_offset_<supf>v4si_insn" + [(set (mem:BLK (scratch)) + (unspec:BLK + [(match_operand:SI 0 "register_operand" "r") + (match_operand:V4SI 1 "s_register_operand" "w") + (match_operand:V4SI 2 "s_register_operand" "w")] + VSTRWSOQ))] + "TARGET_HAVE_MVE" + "vstrw.32\t%q2, [%0, %q1]" [(set_attr "length" "4")]) ;; ;; [vstrwq_scatter_shifted_offset_f] ;; -(define_insn "mve_vstrwq_scatter_shifted_offset_fv4sf" - [(set (match_operand:V4SI 0 "memory_operand" "=Us") - (unspec:V4SI - [(match_operand:V4SI 1 "s_register_operand" "w") - (match_operand:V4SF 2 "s_register_operand" "w")] - VSTRWQSSO_F)) - ] +(define_expand "mve_vstrwq_scatter_shifted_offset_fv4sf" + [(match_operand:V4SI 0 "mve_scatter_memory") + (match_operand:V4SI 1 "s_register_operand") + (match_operand:V4SF 2 "s_register_operand") + (unspec:V4SI [(const_int 0)] VSTRWQSSO_F)] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" { - rtx ops[3]; - ops[0] = operands[0]; - ops[1] = operands[1]; - ops[2] = operands[2]; - output_asm_insn ("vstrw.32\t%q2, [%m0, %q1, uxtw #2]",ops); - return ""; -} - [(set_attr "length" "4")]) + rtx ind = XEXP (operands[0], 0); + gcc_assert (REG_P (ind)); + emit_insn (gen_mve_vstrwq_scatter_shifted_offset_fv4sf_insn (ind, operands[1], + operands[2])); + DONE; +}) + +(define_insn "mve_vstrwq_scatter_shifted_offset_fv4sf_insn" + [(set (mem:BLK (scratch)) + (unspec:BLK + [(match_operand:SI 0 "register_operand" "r") + (match_operand:V4SI 1 "s_register_operand" "w") + (match_operand:V4SF 2 "s_register_operand" "w")] + VSTRWQSSO_F))] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" + "vstrw.32\t%q2, [%0, %q1, uxtw #2]" + [(set_attr "length" "8")]) ;; ;; [vstrwq_scatter_shifted_offset_p_f] ;; -(define_insn "mve_vstrwq_scatter_shifted_offset_p_fv4sf" - [(set (match_operand:V4SI 0 "memory_operand" "=Us") - (unspec:V4SI - [(match_operand:V4SI 1 "s_register_operand" "w") - (match_operand:V4SF 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] - VSTRWQSSO_F)) - ] +(define_expand "mve_vstrwq_scatter_shifted_offset_p_fv4sf" + [(match_operand:V4SI 0 "mve_scatter_memory") + (match_operand:V4SI 1 "s_register_operand") + (match_operand:V4SF 2 "s_register_operand") + (match_operand:HI 3 "vpr_register_operand") + (unspec:V4SI [(const_int 0)] VSTRWQSSO_F)] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" { - rtx ops[3]; - ops[0] = operands[0]; - ops[1] = operands[1]; - ops[2] = operands[2]; - output_asm_insn ("vpst\;\tvstrwt.32\t%q2, [%m0, %q1, uxtw #2]",ops); - return ""; -} + rtx ind = XEXP (operands[0], 0); + gcc_assert (REG_P (ind)); + emit_insn ( + gen_mve_vstrwq_scatter_shifted_offset_p_fv4sf_insn (ind, operands[1], + operands[2], + operands[3])); + DONE; +}) + +(define_insn "mve_vstrwq_scatter_shifted_offset_p_fv4sf_insn" + [(set (mem:BLK (scratch)) + (unspec:BLK + [(match_operand:SI 0 "register_operand" "r") + (match_operand:V4SI 1 "s_register_operand" "w") + (match_operand:V4SF 2 "s_register_operand" "w") + (match_operand:HI 3 "vpr_register_operand" "Up")] + VSTRWQSSO_F))] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" + "vpst\;vstrwt.32\t%q2, [%0, %q1, uxtw #2]" [(set_attr "length" "8")]) ;; ;; [vstrwq_scatter_shifted_offset_p_s vstrwq_scatter_shifted_offset_p_u] ;; -(define_insn "mve_vstrwq_scatter_shifted_offset_p_<supf>v4si" - [(set (match_operand:V4SI 0 "memory_operand" "=Us") - (unspec:V4SI - [(match_operand:V4SI 1 "s_register_operand" "w") - (match_operand:V4SI 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] - VSTRWSSOQ)) - ] +(define_expand "mve_vstrwq_scatter_shifted_offset_p_<supf>v4si" + [(match_operand:V4SI 0 "mve_scatter_memory") + (match_operand:V4SI 1 "s_register_operand") + (match_operand:V4SI 2 "s_register_operand") + (match_operand:HI 3 "vpr_register_operand") + (unspec:V4SI [(const_int 0)] VSTRWSSOQ)] "TARGET_HAVE_MVE" { - rtx ops[3]; - ops[0] = operands[0]; - ops[1] = operands[1]; - ops[2] = operands[2]; - output_asm_insn ("vpst\;\tvstrwt.32\t%q2, [%m0, %q1, uxtw #2]",ops); - return ""; -} + rtx ind = XEXP (operands[0], 0); + gcc_assert (REG_P (ind)); + emit_insn ( + gen_mve_vstrwq_scatter_shifted_offset_p_<supf>v4si_insn (ind, operands[1], + operands[2], + operands[3])); + DONE; +}) + +(define_insn "mve_vstrwq_scatter_shifted_offset_p_<supf>v4si_insn" + [(set (mem:BLK (scratch)) + (unspec:BLK + [(match_operand:SI 0 "register_operand" "r") + (match_operand:V4SI 1 "s_register_operand" "w") + (match_operand:V4SI 2 "s_register_operand" "w") + (match_operand:HI 3 "vpr_register_operand" "Up")] + VSTRWSSOQ))] + "TARGET_HAVE_MVE" + "vpst\;vstrwt.32\t%q2, [%0, %q1, uxtw #2]" [(set_attr "length" "8")]) ;; ;; [vstrwq_scatter_shifted_offset_s vstrwq_scatter_shifted_offset_u] ;; -(define_insn "mve_vstrwq_scatter_shifted_offset_<supf>v4si" - [(set (match_operand:V4SI 0 "memory_operand" "=Us") - (unspec:V4SI - [(match_operand:V4SI 1 "s_register_operand" "w") - (match_operand:V4SI 2 "s_register_operand" "w")] - VSTRWSSOQ)) - ] +(define_expand "mve_vstrwq_scatter_shifted_offset_<supf>v4si" + [(match_operand:V4SI 0 "mve_scatter_memory") + (match_operand:V4SI 1 "s_register_operand") + (match_operand:V4SI 2 "s_register_operand") + (unspec:V4SI [(const_int 0)] VSTRWSSOQ)] "TARGET_HAVE_MVE" { - rtx ops[3]; - ops[0] = operands[0]; - ops[1] = operands[1]; - ops[2] = operands[2]; - output_asm_insn ("vstrw.32\t%q2, [%m0, %q1, uxtw #2]",ops); - return ""; -} + rtx ind = XEXP (operands[0], 0); + gcc_assert (REG_P (ind)); + emit_insn ( + gen_mve_vstrwq_scatter_shifted_offset_<supf>v4si_insn (ind, operands[1], + operands[2])); + DONE; +}) + +(define_insn "mve_vstrwq_scatter_shifted_offset_<supf>v4si_insn" + [(set (mem:BLK (scratch)) + (unspec:BLK + [(match_operand:SI 0 "register_operand" "r") + (match_operand:V4SI 1 "s_register_operand" "w") + (match_operand:V4SI 2 "s_register_operand" "w")] + VSTRWSSOQ))] + "TARGET_HAVE_MVE" + "vstrw.32\t%q2, [%0, %q1, uxtw #2]" [(set_attr "length" "4")]) ;; diff --git a/gcc/config/arm/predicates.md b/gcc/config/arm/predicates.md index c57ad73577e1eebebc8951ed5b4fb544dd3381f8..9e9bca4d87fdc31e045b2b5bb03b996f082079bd 100644 --- a/gcc/config/arm/predicates.md +++ b/gcc/config/arm/predicates.md @@ -37,6 +37,12 @@ && mve_vector_mem_operand (GET_MODE (op), XEXP (op, 0), false)"))) +(define_predicate "mve_scatter_memory" + (and (match_code "mem") + (match_test "TARGET_HAVE_MVE && REG_P (XEXP (op, 0)) + && mve_vector_mem_operand (GET_MODE (op), XEXP (op, 0), + false)"))) + ;; True for immediates in the range of 1 to 16 for MVE. (define_predicate "mve_imm_16" (match_test "satisfies_constraint_Rd (op)")) diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve_vstore_scatter_base.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve_vstore_scatter_base.c new file mode 100644 index 0000000000000000000000000000000000000000..21b9e12d57e064688e6d52493deffc1c2c39761d --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve_vstore_scatter_base.c @@ -0,0 +1,67 @@ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +int +foows32(uint32x4_t pDataDest, int32x4_t value, int32_t * ret) +{ + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; + vstrwq_scatter_base_s32 (pDataDest, 4, value); + vstrwq_scatter_base_s32 (pDataDest, 132, value); + vstrwq_scatter_offset_s32 (ret, vecOffs1, (int32x4_t) pDataDest); + vstrwq_scatter_offset_s32 (ret, vecOffs2, (int32x4_t) pDataDest); + return 0; +} + +int +foowu32(uint32x4_t pDataDest, uint32x4_t value, int32_t * ret) +{ + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; + vstrwq_scatter_base_u32 (pDataDest, 4, value); + vstrwq_scatter_base_u32 (pDataDest, 132, value); + vstrwq_scatter_offset_s32 (ret, vecOffs1, (int32x4_t) pDataDest); + vstrwq_scatter_offset_s32 (ret, vecOffs2, (int32x4_t) pDataDest); + return 0; +} + +int +foowf32(uint32x4_t pDataDest, float32x4_t value, int32_t * ret) +{ + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; + vstrwq_scatter_base_f32 (pDataDest, 4, value); + vstrwq_scatter_base_f32 (pDataDest, 132, value); + vstrwq_scatter_offset_s32 (ret, vecOffs1, (int32x4_t) pDataDest); + vstrwq_scatter_offset_s32 (ret, vecOffs2, (int32x4_t) pDataDest); + return 0; +} + +int +foods64(uint64x2_t pDataDest, int64x2_t value, int32_t * ret) +{ + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; + vstrdq_scatter_base_s64 (pDataDest, 256, value); + vstrdq_scatter_base_s64 (pDataDest, 512, value); + vstrwq_scatter_offset_s32 (ret, vecOffs1, (int32x4_t) pDataDest); + vstrwq_scatter_offset_s32 (ret, vecOffs2, (int32x4_t) pDataDest); + return 0; +} + +int +foodu64(uint64x2_t pDataDest, uint64x2_t value, int32_t * ret) +{ + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; + vstrdq_scatter_base_u64 (pDataDest, 256, value); + vstrdq_scatter_base_u64 (pDataDest, 512, value); + vstrwq_scatter_offset_s32 (ret, vecOffs1, (int32x4_t) pDataDest); + vstrwq_scatter_offset_s32 (ret, vecOffs2, (int32x4_t) pDataDest); + return 0; +} + +/* { dg-final { scan-assembler-times "vstr\[a-z\]" 20 } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve_vstore_scatter_base_p.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve_vstore_scatter_base_p.c new file mode 100644 index 0000000000000000000000000000000000000000..15c6496732a31259ebcceebeb8ac65e071a04b20 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve_vstore_scatter_base_p.c @@ -0,0 +1,69 @@ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +mve_pred16_t __p; + +int +foows32(uint32x4_t pDataDest, int32x4_t value, int32_t * ret) +{ + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; + vstrwq_scatter_base_p_s32 (pDataDest, 4, value, __p); + vstrwq_scatter_base_p_s32 (pDataDest, 132, value, __p); + vstrwq_scatter_offset_p_s32 (ret, vecOffs1, (int32x4_t) pDataDest, __p); + vstrwq_scatter_offset_p_s32 (ret, vecOffs2, (int32x4_t) pDataDest, __p); + return 0; +} + +int +foowu32(uint32x4_t pDataDest, uint32x4_t value, int32_t * ret) +{ + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; + vstrwq_scatter_base_p_u32 (pDataDest, 4, value, __p); + vstrwq_scatter_base_p_u32 (pDataDest, 132, value, __p); + vstrwq_scatter_offset_p_s32 (ret, vecOffs1, (int32x4_t) pDataDest, __p); + vstrwq_scatter_offset_p_s32 (ret, vecOffs2, (int32x4_t) pDataDest, __p); + return 0; +} + +int +foowf32(uint32x4_t pDataDest, float32x4_t value, int32_t * ret) +{ + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; + vstrwq_scatter_base_p_f32 (pDataDest, 4, value, __p); + vstrwq_scatter_base_p_f32 (pDataDest, 132, value, __p); + vstrwq_scatter_offset_p_s32 (ret, vecOffs1, (int32x4_t) pDataDest, __p); + vstrwq_scatter_offset_p_s32 (ret, vecOffs2, (int32x4_t) pDataDest, __p); + return 0; +} + +int +foods64(uint64x2_t pDataDest, int64x2_t value, int32_t * ret) +{ + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; + vstrdq_scatter_base_p_s64 (pDataDest, 256, value, __p); + vstrdq_scatter_base_p_s64 (pDataDest, 512, value, __p); + vstrwq_scatter_offset_p_s32 (ret, vecOffs1, (int32x4_t) pDataDest, __p); + vstrwq_scatter_offset_p_s32 (ret, vecOffs2, (int32x4_t) pDataDest, __p); + return 0; +} + +int +foodu64(uint64x2_t pDataDest, uint64x2_t value, int32_t * ret) +{ + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; + vstrdq_scatter_base_p_u64 (pDataDest, 256, value, __p); + vstrdq_scatter_base_p_u64 (pDataDest, 512, value, __p); + vstrwq_scatter_offset_p_s32 (ret, vecOffs1, (int32x4_t) pDataDest, __p); + vstrwq_scatter_offset_p_s32 (ret, vecOffs2, (int32x4_t) pDataDest, __p); + return 0; +} + +/* { dg-final { scan-assembler-times "vstr\[a-z\]t" 20 } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve_vstore_scatter_offset.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve_vstore_scatter_offset.c new file mode 100644 index 0000000000000000000000000000000000000000..6d123669c13f168e651b7aa3344c4324fd4afe50 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve_vstore_scatter_offset.c @@ -0,0 +1,215 @@ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +int +foobu8( uint8_t * pDataSrc, uint8_t * pDataDest) +{ + const uint8x16_t vecOffs1 = { 0, 3, 6, 1, 4, 7, 2, 5, 9, 11, 13, 10, 12, 15, 8, 14}; + const uint8x16_t vecOffs2 = { 31, 29, 27, 25, 23, 28, 21, 26, 19, 24, 17, 22, 16, 20, 18, 30}; + uint32x4_t vecIn1 = vldrwq_u32 ((uint32_t const *) pDataSrc); + uint32x4_t vecIn2 = vldrwq_u32 ((uint32_t const *) &pDataSrc[16]); + vstrbq_scatter_offset_u8 (pDataDest, vecOffs1, (uint8x16_t) vecIn1); + vstrbq_scatter_offset_u8 (pDataDest, vecOffs2, (uint8x16_t) vecIn2); + pDataDest[32] = pDataSrc[32]; + return 0; +} + +int +foobu16( uint8_t * pDataSrc, uint8_t * pDataDest) +{ + const uint16x8_t vecOffs1 = { 0, 3, 6, 1, 4, 7, 2, 5}; + const uint16x8_t vecOffs2 = { 11, 13, 10, 12, 15, 8, 14, 9}; + uint32x4_t vecIn1 = vldrwq_u32 ((uint32_t const *) pDataSrc); + uint32x4_t vecIn2 = vldrwq_u32 ((uint32_t const *) &pDataSrc[8]); + vstrbq_scatter_offset_u16 (pDataDest, vecOffs1, (uint16x8_t) vecIn1); + vstrbq_scatter_offset_u16 (pDataDest, vecOffs2, (uint16x8_t) vecIn2); + pDataDest[16] = pDataSrc[16]; + return 0; +} + +int +foobu32( uint8_t * pDataSrc, uint8_t * pDataDest) +{ + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; + uint32x4_t vecIn1 = vldrwq_u32 ((uint32_t const *) pDataSrc); + uint32x4_t vecIn2 = vldrwq_u32 ((uint32_t const *) &pDataSrc[4]); + vstrbq_scatter_offset_u32 (pDataDest, vecOffs1, (uint32x4_t) vecIn1); + vstrbq_scatter_offset_u32 (pDataDest, vecOffs2, (uint32x4_t) vecIn2); + pDataDest[8] = pDataSrc[8]; + return 0; +} + +int +foobs8( int8_t * pDataSrc, int8_t * pDataDest) +{ + const uint8x16_t vecOffs1 = { 0, 3, 6, 1, 4, 7, 2, 5, 9, 11, 13, 10, 12, 15, 8, 14}; + const uint8x16_t vecOffs2 = { 31, 29, 27, 25, 23, 28, 21, 26, 19, 24, 17, 22, 16, 20, 18, 30}; + int32x4_t vecIn1 = vldrwq_s32 ((int32_t const *) pDataSrc); + int32x4_t vecIn2 = vldrwq_s32 ((int32_t const *) &pDataSrc[16]); + vstrbq_scatter_offset_s8 (pDataDest, vecOffs1, (int8x16_t) vecIn1); + vstrbq_scatter_offset_s8 (pDataDest, vecOffs2, (int8x16_t) vecIn2); + pDataDest[32] = pDataSrc[32]; + return 0; +} + +int +foobs16( int8_t * pDataSrc, int8_t * pDataDest) +{ + const uint16x8_t vecOffs1 = { 0, 3, 6, 1, 4, 7, 2, 5}; + const uint16x8_t vecOffs2 = { 11, 13, 10, 12, 15, 8, 14, 9}; + int32x4_t vecIn1 = vldrwq_s32 ((int32_t const *) pDataSrc); + int32x4_t vecIn2 = vldrwq_s32 ((int32_t const *) &pDataSrc[8]); + vstrbq_scatter_offset_s16 (pDataDest, vecOffs1, (int16x8_t) vecIn1); + vstrbq_scatter_offset_s16 (pDataDest, vecOffs2, (int16x8_t) vecIn2); + pDataDest[16] = pDataSrc[16]; + return 0; +} + +int +foobs32( uint8_t * pDataSrc, int8_t * pDataDest) +{ + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; + int32x4_t vecIn1 = vldrwq_s32 ((int32_t const *) pDataSrc); + int32x4_t vecIn2 = vldrwq_s32 ((int32_t const *) &pDataSrc[4]); + vstrbq_scatter_offset_s32 (pDataDest, vecOffs1, (int32x4_t) vecIn1); + vstrbq_scatter_offset_s32 (pDataDest, vecOffs2, (int32x4_t) vecIn2); + pDataDest[8] = pDataSrc[8]; + return 0; +} + +int +foohu16( uint16_t * pDataSrc, uint16_t * pDataDest) +{ + const uint16x8_t vecOffs1 = { 0, 3, 6, 1, 4, 7, 2, 5}; + const uint16x8_t vecOffs2 = { 11, 13, 10, 12, 15, 8, 14, 9}; + uint32x4_t vecIn1 = vldrwq_u32 ((uint32_t const *) pDataSrc); + uint32x4_t vecIn2 = vldrwq_u32 ((uint32_t const *) &pDataSrc[8]); + vstrhq_scatter_offset_u16 (pDataDest, vecOffs1, (uint16x8_t) vecIn1); + vstrhq_scatter_offset_u16 (pDataDest, vecOffs2, (uint16x8_t) vecIn2); + pDataDest[16] = pDataSrc[16]; + return 0; +} + +int +foohu32( uint16_t * pDataSrc, uint16_t * pDataDest) +{ + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; + uint32x4_t vecIn1 = vldrwq_u32 ((uint32_t const *) pDataSrc); + uint32x4_t vecIn2 = vldrwq_u32 ((uint32_t const *) &pDataSrc[4]); + vstrhq_scatter_offset_u32 (pDataDest, vecOffs1, (uint32x4_t) vecIn1); + vstrhq_scatter_offset_u32 (pDataDest, vecOffs2, (uint32x4_t) vecIn2); + pDataDest[8] = pDataSrc[8]; + return 0; +} + +int +foohs16( int16_t * pDataSrc, int16_t * pDataDest) +{ + const uint16x8_t vecOffs1 = { 0, 3, 6, 1, 4, 7, 2, 5}; + const uint16x8_t vecOffs2 = { 11, 13, 10, 12, 15, 8, 14, 9}; + int32x4_t vecIn1 = vldrwq_s32 ((int32_t const *) pDataSrc); + int32x4_t vecIn2 = vldrwq_s32 ((int32_t const *) &pDataSrc[8]); + vstrhq_scatter_offset_s16 (pDataDest, vecOffs1, (int16x8_t) vecIn1); + vstrhq_scatter_offset_s16 (pDataDest, vecOffs2, (int16x8_t) vecIn2); + pDataDest[16] = pDataSrc[16]; + return 0; +} + +int +foohs32( uint16_t * pDataSrc, int16_t * pDataDest) +{ + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; + int32x4_t vecIn1 = vldrwq_s32 ((int32_t const *) pDataSrc); + int32x4_t vecIn2 = vldrwq_s32 ((int32_t const *) &pDataSrc[4]); + vstrhq_scatter_offset_s32 (pDataDest, vecOffs1, (int32x4_t) vecIn1); + vstrhq_scatter_offset_s32 (pDataDest, vecOffs2, (int32x4_t) vecIn2); + pDataDest[8] = pDataSrc[8]; + return 0; +} + +int +foohf16( float16_t * pDataSrc, float16_t * pDataDest) +{ + const uint16x8_t vecOffs1 = { 0, 3, 6, 1, 4, 7, 2, 5}; + const uint16x8_t vecOffs2 = { 11, 13, 10, 12, 15, 8, 14, 9}; + uint32x4_t vecIn1 = vldrwq_u32 ((uint32_t const *) pDataSrc); + uint32x4_t vecIn2 = vldrwq_u32 ((uint32_t const *) &pDataSrc[8]); + vstrhq_scatter_offset_f16 (pDataDest, vecOffs1, (float16x8_t) vecIn1); + vstrhq_scatter_offset_f16 (pDataDest, vecOffs2, (float16x8_t) vecIn2); + pDataDest[16] = pDataSrc[16]; + return 0; +} + +int +foowu32( uint32_t * pDataSrc, uint32_t * pDataDest) +{ + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; + uint32x4_t vecIn1 = vldrwq_u32 ((uint32_t const *) pDataSrc); + uint32x4_t vecIn2 = vldrwq_u32 ((uint32_t const *) &pDataSrc[4]); + vstrwq_scatter_offset_u32 (pDataDest, vecOffs1, (uint32x4_t) vecIn1); + vstrwq_scatter_offset_u32 (pDataDest, vecOffs2, (uint32x4_t) vecIn2); + pDataDest[8] = pDataSrc[8]; + return 0; +} + +int +foows32( int32_t * pDataSrc, int32_t * pDataDest) +{ + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; + uint32x4_t vecIn1 = vldrwq_u32 ((uint32_t const *) pDataSrc); + uint32x4_t vecIn2 = vldrwq_u32 ((uint32_t const *) &pDataSrc[4]); + vstrwq_scatter_offset_s32 (pDataDest, vecOffs1, (int32x4_t) vecIn1); + vstrwq_scatter_offset_s32 (pDataDest, vecOffs2, (int32x4_t) vecIn2); + pDataDest[8] = pDataSrc[8]; + return 0; +} + +int +foowf32( float32_t * pDataSrc, float32_t * pDataDest) +{ + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; + uint32x4_t vecIn1 = vldrwq_u32 ((uint32_t const *) pDataSrc); + uint32x4_t vecIn2 = vldrwq_u32 ((uint32_t const *) &pDataSrc[8]); + vstrwq_scatter_offset_f32 (pDataDest, vecOffs1, (float32x4_t) vecIn1); + vstrwq_scatter_offset_f32 (pDataDest, vecOffs2, (float32x4_t) vecIn2); + pDataDest[8] = pDataSrc[8]; + return 0; +} + +int +foowu64( uint64_t * pDataSrc, uint64_t * pDataDest) +{ + const uint64x2_t vecOffs1 = { 0, 3}; + const uint64x2_t vecOffs2 = { 1, 2}; + uint32x4_t vecIn1 = vldrwq_u32 ((uint32_t const *) pDataSrc); + uint32x4_t vecIn2 = vldrwq_u32 ((uint32_t const *) &pDataSrc[2]); + vstrdq_scatter_offset_u64 (pDataDest, vecOffs1, (uint64x2_t) vecIn1); + vstrdq_scatter_offset_u64 (pDataDest, vecOffs2, (uint64x2_t) vecIn2); + pDataDest[4] = pDataSrc[4]; + return 0; +} + +int +foows64( int64_t * pDataSrc, int64_t * pDataDest) +{ + const uint64x2_t vecOffs1 = { 0, 3}; + const uint64x2_t vecOffs2 = { 1, 2}; + uint32x4_t vecIn1 = vldrwq_u32 ((uint32_t const *) pDataSrc); + uint32x4_t vecIn2 = vldrwq_u32 ((uint32_t const *) &pDataSrc[2]); + vstrdq_scatter_offset_s64 (pDataDest, vecOffs1, (int64x2_t) vecIn1); + vstrdq_scatter_offset_s64 (pDataDest, vecOffs2, (int64x2_t) vecIn2); + pDataDest[4] = pDataSrc[4]; + return 0; +} + +/* { dg-final { scan-assembler-times "vstr\[a-z\]" 32 } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve_vstore_scatter_offset_p.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve_vstore_scatter_offset_p.c new file mode 100644 index 0000000000000000000000000000000000000000..cd2e1ee80f9dfe35955468a822bd202679039831 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve_vstore_scatter_offset_p.c @@ -0,0 +1,216 @@ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +mve_pred16_t __p; +int +foobu8( uint8_t * pDataSrc, uint8_t * pDataDest) +{ + const uint8x16_t vecOffs1 = { 0, 3, 6, 1, 4, 7, 2, 5, 9, 11, 13, 10, 12, 15, 8, 14}; + const uint8x16_t vecOffs2 = { 31, 29, 27, 25, 23, 28, 21, 26, 19, 24, 17, 22, 16, 20, 18, 30}; + uint32x4_t vecIn1 = vldrwq_u32 ((uint32_t const *) pDataSrc); + uint32x4_t vecIn2 = vldrwq_u32 ((uint32_t const *) &pDataSrc[16]); + vstrbq_scatter_offset_p_u8(pDataDest, vecOffs1, (uint8x16_t) vecIn1, __p); + vstrbq_scatter_offset_p_u8(pDataDest, vecOffs2, (uint8x16_t) vecIn2, __p); + pDataDest[32] = pDataSrc[32]; + return 0; +} + +int +foobu16( uint8_t * pDataSrc, uint8_t * pDataDest) +{ + const uint16x8_t vecOffs1 = { 0, 3, 6, 1, 4, 7, 2, 5}; + const uint16x8_t vecOffs2 = { 11, 13, 10, 12, 15, 8, 14, 9}; + uint32x4_t vecIn1 = vldrwq_u32 ((uint32_t const *) pDataSrc); + uint32x4_t vecIn2 = vldrwq_u32 ((uint32_t const *) &pDataSrc[8]); + vstrbq_scatter_offset_p_u16 (pDataDest, vecOffs1, (uint16x8_t) vecIn1, __p); + vstrbq_scatter_offset_p_u16 (pDataDest, vecOffs2, (uint16x8_t) vecIn2, __p); + pDataDest[16] = pDataSrc[16]; + return 0; +} + +int +foobu32( uint8_t * pDataSrc, uint8_t * pDataDest) +{ + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; + uint32x4_t vecIn1 = vldrwq_u32 ((uint32_t const *) pDataSrc); + uint32x4_t vecIn2 = vldrwq_u32 ((uint32_t const *) &pDataSrc[4]); + vstrbq_scatter_offset_p_u32 (pDataDest, vecOffs1, (uint32x4_t) vecIn1, __p); + vstrbq_scatter_offset_p_u32 (pDataDest, vecOffs2, (uint32x4_t) vecIn2, __p); + pDataDest[8] = pDataSrc[8]; + return 0; +} + +int +foobs8( int8_t * pDataSrc, int8_t * pDataDest) +{ + const uint8x16_t vecOffs1 = { 0, 3, 6, 1, 4, 7, 2, 5, 9, 11, 13, 10, 12, 15, 8, 14}; + const uint8x16_t vecOffs2 = { 31, 29, 27, 25, 23, 28, 21, 26, 19, 24, 17, 22, 16, 20, 18, 30}; + int32x4_t vecIn1 = vldrwq_s32 ((int32_t const *) pDataSrc); + int32x4_t vecIn2 = vldrwq_s32 ((int32_t const *) &pDataSrc[16]); + vstrbq_scatter_offset_p_s8 (pDataDest, vecOffs1, (int8x16_t) vecIn1, __p); + vstrbq_scatter_offset_p_s8 (pDataDest, vecOffs2, (int8x16_t) vecIn2, __p); + pDataDest[32] = pDataSrc[32]; + return 0; +} + +int +foobs16( int8_t * pDataSrc, int8_t * pDataDest) +{ + const uint16x8_t vecOffs1 = { 0, 3, 6, 1, 4, 7, 2, 5}; + const uint16x8_t vecOffs2 = { 11, 13, 10, 12, 15, 8, 14, 9}; + int32x4_t vecIn1 = vldrwq_s32 ((int32_t const *) pDataSrc); + int32x4_t vecIn2 = vldrwq_s32 ((int32_t const *) &pDataSrc[8]); + vstrbq_scatter_offset_p_s16 (pDataDest, vecOffs1, (int16x8_t) vecIn1, __p); + vstrbq_scatter_offset_p_s16 (pDataDest, vecOffs2, (int16x8_t) vecIn2, __p); + pDataDest[16] = pDataSrc[16]; + return 0; +} + +int +foobs32( uint8_t * pDataSrc, int8_t * pDataDest) +{ + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; + int32x4_t vecIn1 = vldrwq_s32 ((int32_t const *) pDataSrc); + int32x4_t vecIn2 = vldrwq_s32 ((int32_t const *) &pDataSrc[4]); + vstrbq_scatter_offset_p_s32 (pDataDest, vecOffs1, (int32x4_t) vecIn1, __p); + vstrbq_scatter_offset_p_s32 (pDataDest, vecOffs2, (int32x4_t) vecIn2, __p); + pDataDest[8] = pDataSrc[8]; + return 0; +} + +int +foohu16( uint16_t * pDataSrc, uint16_t * pDataDest) +{ + const uint16x8_t vecOffs1 = { 0, 3, 6, 1, 4, 7, 2, 5}; + const uint16x8_t vecOffs2 = { 11, 13, 10, 12, 15, 8, 14, 9}; + uint32x4_t vecIn1 = vldrwq_u32 ((uint32_t const *) pDataSrc); + uint32x4_t vecIn2 = vldrwq_u32 ((uint32_t const *) &pDataSrc[8]); + vstrhq_scatter_offset_p_u16 (pDataDest, vecOffs1, (uint16x8_t) vecIn1, __p); + vstrhq_scatter_offset_p_u16 (pDataDest, vecOffs2, (uint16x8_t) vecIn2, __p); + pDataDest[16] = pDataSrc[16]; + return 0; +} + +int +foohu32( uint16_t * pDataSrc, uint16_t * pDataDest) +{ + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; + uint32x4_t vecIn1 = vldrwq_u32 ((uint32_t const *) pDataSrc); + uint32x4_t vecIn2 = vldrwq_u32 ((uint32_t const *) &pDataSrc[4]); + vstrhq_scatter_offset_p_u32 (pDataDest, vecOffs1, (uint32x4_t) vecIn1, __p); + vstrhq_scatter_offset_p_u32 (pDataDest, vecOffs2, (uint32x4_t) vecIn2, __p); + pDataDest[8] = pDataSrc[8]; + return 0; +} + +int +foohs16( int16_t * pDataSrc, int16_t * pDataDest) +{ + const uint16x8_t vecOffs1 = { 0, 3, 6, 1, 4, 7, 2, 5}; + const uint16x8_t vecOffs2 = { 11, 13, 10, 12, 15, 8, 14, 9}; + int32x4_t vecIn1 = vldrwq_s32 ((int32_t const *) pDataSrc); + int32x4_t vecIn2 = vldrwq_s32 ((int32_t const *) &pDataSrc[8]); + vstrhq_scatter_offset_p_s16 (pDataDest, vecOffs1, (int16x8_t) vecIn1, __p); + vstrhq_scatter_offset_p_s16 (pDataDest, vecOffs2, (int16x8_t) vecIn2, __p); + pDataDest[16] = pDataSrc[16]; + return 0; +} + +int +foohs32( uint16_t * pDataSrc, int16_t * pDataDest) +{ + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; + int32x4_t vecIn1 = vldrwq_s32 ((int32_t const *) pDataSrc); + int32x4_t vecIn2 = vldrwq_s32 ((int32_t const *) &pDataSrc[4]); + vstrhq_scatter_offset_p_s32 (pDataDest, vecOffs1, (int32x4_t) vecIn1, __p); + vstrhq_scatter_offset_p_s32 (pDataDest, vecOffs2, (int32x4_t) vecIn2, __p); + pDataDest[8] = pDataSrc[8]; + return 0; +} + +int +foohf16( float16_t * pDataSrc, float16_t * pDataDest) +{ + const uint16x8_t vecOffs1 = { 0, 3, 6, 1, 4, 7, 2, 5}; + const uint16x8_t vecOffs2 = { 11, 13, 10, 12, 15, 8, 14, 9}; + uint32x4_t vecIn1 = vldrwq_u32 ((uint32_t const *) pDataSrc); + uint32x4_t vecIn2 = vldrwq_u32 ((uint32_t const *) &pDataSrc[8]); + vstrhq_scatter_offset_p_f16 (pDataDest, vecOffs1, (float16x8_t) vecIn1, __p); + vstrhq_scatter_offset_p_f16 (pDataDest, vecOffs2, (float16x8_t) vecIn2, __p); + pDataDest[16] = pDataSrc[16]; + return 0; +} + +int +foowu32( uint32_t * pDataSrc, uint32_t * pDataDest) +{ + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; + uint32x4_t vecIn1 = vldrwq_u32 ((uint32_t const *) pDataSrc); + uint32x4_t vecIn2 = vldrwq_u32 ((uint32_t const *) &pDataSrc[4]); + vstrwq_scatter_offset_p_u32 (pDataDest, vecOffs1, (uint32x4_t) vecIn1, __p); + vstrwq_scatter_offset_p_u32 (pDataDest, vecOffs2, (uint32x4_t) vecIn2, __p); + pDataDest[8] = pDataSrc[8]; + return 0; +} + +int +foows32( int32_t * pDataSrc, int32_t * pDataDest) +{ + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; + uint32x4_t vecIn1 = vldrwq_u32 ((uint32_t const *) pDataSrc); + uint32x4_t vecIn2 = vldrwq_u32 ((uint32_t const *) &pDataSrc[4]); + vstrwq_scatter_offset_p_s32 (pDataDest, vecOffs1, (int32x4_t) vecIn1, __p); + vstrwq_scatter_offset_p_s32 (pDataDest, vecOffs2, (int32x4_t) vecIn2, __p); + pDataDest[8] = pDataSrc[8]; + return 0; +} + +int +foowf32( float32_t * pDataSrc, float32_t * pDataDest) +{ + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; + uint32x4_t vecIn1 = vldrwq_u32 ((uint32_t const *) pDataSrc); + uint32x4_t vecIn2 = vldrwq_u32 ((uint32_t const *) &pDataSrc[8]); + vstrwq_scatter_offset_p_f32 (pDataDest, vecOffs1, (float32x4_t) vecIn1, __p); + vstrwq_scatter_offset_p_f32 (pDataDest, vecOffs2, (float32x4_t) vecIn2, __p); + pDataDest[8] = pDataSrc[8]; + return 0; +} + +int +foowu64( uint64_t * pDataSrc, uint64_t * pDataDest) +{ + const uint64x2_t vecOffs1 = { 0, 3}; + const uint64x2_t vecOffs2 = { 1, 2}; + uint32x4_t vecIn1 = vldrwq_u32 ((uint32_t const *) pDataSrc); + uint32x4_t vecIn2 = vldrwq_u32 ((uint32_t const *) &pDataSrc[2]); + vstrdq_scatter_offset_p_u64 (pDataDest, vecOffs1, (uint64x2_t) vecIn1, __p); + vstrdq_scatter_offset_p_u64 (pDataDest, vecOffs2, (uint64x2_t) vecIn2, __p); + pDataDest[4] = pDataSrc[4]; + return 0; +} + +int +foows64( int64_t * pDataSrc, int64_t * pDataDest) +{ + const uint64x2_t vecOffs1 = { 0, 3}; + const uint64x2_t vecOffs2 = { 1, 2}; + uint32x4_t vecIn1 = vldrwq_u32 ((uint32_t const *) pDataSrc); + uint32x4_t vecIn2 = vldrwq_u32 ((uint32_t const *) &pDataSrc[2]); + vstrdq_scatter_offset_p_s64 (pDataDest, vecOffs1, (int64x2_t) vecIn1, __p); + vstrdq_scatter_offset_p_s64 (pDataDest, vecOffs2, (int64x2_t) vecIn2, __p); + pDataDest[4] = pDataSrc[4]; + return 0; +} + +/* { dg-final { scan-assembler-times "vstr\[a-z\]t" 32 } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve_vstore_scatter_shifted_offset.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve_vstore_scatter_shifted_offset.c new file mode 100644 index 0000000000000000000000000000000000000000..62dfb450a6d30312472f5c8bb2d41e98fe6b6a32 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve_vstore_scatter_shifted_offset.c @@ -0,0 +1,141 @@ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +int +foowu32( uint32_t * pDataSrc, uint32_t * pDataDest) +{ + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; + uint32x4_t vecIn1 = vldrwq_u32 ((uint32_t const *) pDataSrc); + uint32x4_t vecIn2 = vldrwq_u32 ((uint32_t const *) &pDataSrc[4]); + vstrwq_scatter_shifted_offset_u32 (pDataDest, vecOffs1, vecIn1); + vstrwq_scatter_shifted_offset_u32 (pDataDest, vecOffs2, vecIn2); + pDataDest[8] = pDataSrc[8]; + return 0; +} + +int +foowf32( float32_t * pDataSrc, float32_t * pDataDest) +{ + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; + float32x4_t vecIn1 = vldrwq_f32 ((float32_t const *) pDataSrc); + float32x4_t vecIn2 = vldrwq_f32 ((float32_t const *) &pDataSrc[4]); + vstrwq_scatter_shifted_offset_f32 (pDataDest, vecOffs1, vecIn1); + vstrwq_scatter_shifted_offset_f32 (pDataDest, vecOffs2, vecIn2); + pDataDest[8] = pDataSrc[8]; + return 0; +} + +int +foohu16( uint16_t * pDataSrc, uint16_t * pDataDest) +{ + const uint16x8_t vecOffs1 = { 0, 3, 6, 1, 4, 7, 2, 5}; + const uint16x8_t vecOffs2 = { 9, 11, 13, 10, 12, 15, 8, 14}; + uint16x8_t vecIn1 = vldrhq_u16 ((uint16_t const *) pDataSrc); + uint16x8_t vecIn2 = vldrhq_u16 ((uint16_t const *) &pDataSrc[8]); + vstrhq_scatter_shifted_offset_u16 (pDataDest, vecOffs1, vecIn1); + vstrhq_scatter_shifted_offset_u16 (pDataDest, vecOffs2, vecIn2); + pDataDest[16] = pDataSrc[16]; + return 0; +} + +int +foohu32( uint32_t * pDataSrc, uint32_t * pDataDest) +{ + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; + uint32x4_t vecIn1 = vldrhq_u32 ((uint16_t const *) pDataSrc); + uint32x4_t vecIn2 = vldrhq_u32 ((uint16_t const *) &pDataSrc[4]); + vstrhq_scatter_shifted_offset_u32 ((uint16_t *)pDataDest, vecOffs1, vecIn1); + vstrhq_scatter_shifted_offset_u32 ((uint16_t *)pDataDest, vecOffs2, vecIn2); + pDataDest[8] = pDataSrc[8]; + return 0; +} + +int +foohf16( float16_t * pDataSrc, float16_t * pDataDest) +{ + const uint16x8_t vecOffs1 = { 0, 3, 6, 1, 4, 7, 2, 5}; + const uint16x8_t vecOffs2 = { 9, 11, 13, 10, 12, 15, 8, 14}; + float16x8_t vecIn1 = vldrhq_f16 ((float16_t const *) pDataSrc); + float16x8_t vecIn2 = vldrhq_f16 ((float16_t const *) &pDataSrc[8]); + vstrhq_scatter_shifted_offset_f16 (pDataDest, vecOffs1, vecIn1); + vstrhq_scatter_shifted_offset_f16 (pDataDest, vecOffs2, vecIn2); + pDataDest[16] = pDataSrc[16]; + return 0; +} + +int +foodu64( uint64_t * pDataSrc, uint64_t * pDataDest) +{ + const uint64x2_t vecOffs1 = { 0, 1}; + const uint64x2_t vecOffs2 = { 2, 3}; + uint32x4_t vecIn1 = vldrwq_u32 ((uint32_t const *) pDataSrc); + uint32x4_t vecIn2 = vldrwq_u32 ((uint32_t const *) &pDataSrc[2]); + + vstrdq_scatter_shifted_offset_u64 (pDataDest, vecOffs1, (uint64x2_t) vecIn1); + vstrdq_scatter_shifted_offset_u64 (pDataDest, vecOffs2, (uint64x2_t) vecIn2); + + pDataDest[2] = pDataSrc[2]; + return 0; +} + +int +foows32( int32_t * pDataSrc, int32_t * pDataDest) +{ + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; + int32x4_t vecIn1 = vldrwq_s32 ((int32_t const *) pDataSrc); + int32x4_t vecIn2 = vldrwq_s32 ((int32_t const *) &pDataSrc[4]); + vstrwq_scatter_shifted_offset_s32 (pDataDest, vecOffs1, vecIn1); + vstrwq_scatter_shifted_offset_s32 (pDataDest, vecOffs2, vecIn2); + pDataDest[8] = pDataSrc[8]; + return 0; +} + +int +foohs16( int16_t * pDataSrc, int16_t * pDataDest) +{ + const uint16x8_t vecOffs1 = { 0, 3, 6, 1, 4, 7, 2, 5}; + const uint16x8_t vecOffs2 = { 9, 11, 13, 10, 12, 15, 8, 14}; + int16x8_t vecIn1 = vldrhq_s16 ((int16_t const *) pDataSrc); + int16x8_t vecIn2 = vldrhq_s16 ((int16_t const *) &pDataSrc[8]); + vstrhq_scatter_shifted_offset_s16 (pDataDest, vecOffs1, vecIn1); + vstrhq_scatter_shifted_offset_s16 (pDataDest, vecOffs2, vecIn2); + pDataDest[16] = pDataSrc[16]; + return 0; +} + +int +foohs32( int32_t * pDataSrc, int32_t * pDataDest) +{ + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; + int32x4_t vecIn1 = vldrhq_s32 ((int16_t const *) pDataSrc); + int32x4_t vecIn2 = vldrhq_s32 ((int16_t const *) &pDataSrc[4]); + vstrhq_scatter_shifted_offset_s32 ((int16_t *)pDataDest, vecOffs1, vecIn1); + vstrhq_scatter_shifted_offset_s32 ((int16_t *)pDataDest, vecOffs2, vecIn2); + pDataDest[8] = pDataSrc[8]; + return 0; +} + +int +foods64( int64_t * pDataSrc, int64_t * pDataDest) +{ + const uint64x2_t vecOffs1 = { 0, 1}; + const uint64x2_t vecOffs2 = { 2, 3}; + int32x4_t vecIn1 = vldrwq_s32 ((int32_t const *) pDataSrc); + int32x4_t vecIn2 = vldrwq_s32 ((int32_t const *) &pDataSrc[2]); + + vstrdq_scatter_shifted_offset_s64 (pDataDest, vecOffs1, (int64x2_t) vecIn1); + vstrdq_scatter_shifted_offset_s64 (pDataDest, vecOffs2, (int64x2_t) vecIn2); + + pDataDest[2] = pDataSrc[2]; + return 0; +} + +/* { dg-final { scan-assembler-times "vstr\[a-z\]" 20 } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve_vstore_scatter_shifted_offset_p.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve_vstore_scatter_shifted_offset_p.c new file mode 100644 index 0000000000000000000000000000000000000000..a51d3a211672e74e99f571ef362445d13f2e2368 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve_vstore_scatter_shifted_offset_p.c @@ -0,0 +1,142 @@ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +mve_pred16_t __p; +int +foowu32( uint32_t * pDataSrc, uint32_t * pDataDest) +{ + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; + uint32x4_t vecIn1 = vldrwq_z_u32 ((uint32_t const *) pDataSrc, __p); + uint32x4_t vecIn2 = vldrwq_z_u32 ((uint32_t const *) &pDataSrc[4], __p); + vstrwq_scatter_shifted_offset_p_u32 (pDataDest, vecOffs1, vecIn1, __p); + vstrwq_scatter_shifted_offset_p_u32 (pDataDest, vecOffs2, vecIn2, __p); + pDataDest[8] = pDataSrc[8]; + return 0; +} + +int +foowf32( float32_t * pDataSrc, float32_t * pDataDest) +{ + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; + float32x4_t vecIn1 = vldrwq_z_f32 ((float32_t const *) pDataSrc, __p); + float32x4_t vecIn2 = vldrwq_z_f32 ((float32_t const *) &pDataSrc[4], __p); + vstrwq_scatter_shifted_offset_p_f32 (pDataDest, vecOffs1, vecIn1, __p); + vstrwq_scatter_shifted_offset_p_f32 (pDataDest, vecOffs2, vecIn2, __p); + pDataDest[8] = pDataSrc[8]; + return 0; +} + +int +foohu16( uint16_t * pDataSrc, uint16_t * pDataDest) +{ + const uint16x8_t vecOffs1 = { 0, 3, 6, 1, 4, 7, 2, 5}; + const uint16x8_t vecOffs2 = { 9, 11, 13, 10, 12, 15, 8, 14}; + uint16x8_t vecIn1 = vldrhq_z_u16 ((uint16_t const *) pDataSrc, __p); + uint16x8_t vecIn2 = vldrhq_z_u16 ((uint16_t const *) &pDataSrc[8], __p); + vstrhq_scatter_shifted_offset_p_u16 (pDataDest, vecOffs1, vecIn1, __p); + vstrhq_scatter_shifted_offset_p_u16 (pDataDest, vecOffs2, vecIn2, __p); + pDataDest[16] = pDataSrc[16]; + return 0; +} + +int +foohu32( uint32_t * pDataSrc, uint32_t * pDataDest) +{ + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; + uint32x4_t vecIn1 = vldrhq_z_u32 ((uint16_t const *) pDataSrc, __p); + uint32x4_t vecIn2 = vldrhq_z_u32 ((uint16_t const *) &pDataSrc[4], __p); + vstrhq_scatter_shifted_offset_p_u32 ((uint16_t *)pDataDest, vecOffs1, vecIn1, __p); + vstrhq_scatter_shifted_offset_p_u32 ((uint16_t *)pDataDest, vecOffs2, vecIn2, __p); + pDataDest[8] = pDataSrc[8]; + return 0; +} + +int +foohf16( float16_t * pDataSrc, float16_t * pDataDest) +{ + const uint16x8_t vecOffs1 = { 0, 3, 6, 1, 4, 7, 2, 5}; + const uint16x8_t vecOffs2 = { 9, 11, 13, 10, 12, 15, 8, 14}; + float16x8_t vecIn1 = vldrhq_z_f16 ((float16_t const *) pDataSrc, __p); + float16x8_t vecIn2 = vldrhq_z_f16 ((float16_t const *) &pDataSrc[8], __p); + vstrhq_scatter_shifted_offset_p_f16 (pDataDest, vecOffs1, vecIn1, __p); + vstrhq_scatter_shifted_offset_p_f16 (pDataDest, vecOffs2, vecIn2, __p); + pDataDest[16] = pDataSrc[16]; + return 0; +} + +int +foodu64( uint64_t * pDataSrc, uint64_t * pDataDest) +{ + const uint64x2_t vecOffs1 = { 0, 1}; + const uint64x2_t vecOffs2 = { 2, 3}; + uint32x4_t vecIn1 = vldrwq_z_u32 ((uint32_t const *) pDataSrc, __p); + uint32x4_t vecIn2 = vldrwq_z_u32 ((uint32_t const *) &pDataSrc[2], __p); + + vstrdq_scatter_shifted_offset_p_u64 (pDataDest, vecOffs1, (uint64x2_t) vecIn1, __p); + vstrdq_scatter_shifted_offset_p_u64 (pDataDest, vecOffs2, (uint64x2_t) vecIn2, __p); + + pDataDest[2] = pDataSrc[2]; + return 0; +} + +int +foows32( int32_t * pDataSrc, int32_t * pDataDest) +{ + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; + int32x4_t vecIn1 = vldrwq_z_s32 ((int32_t const *) pDataSrc, __p); + int32x4_t vecIn2 = vldrwq_z_s32 ((int32_t const *) &pDataSrc[4], __p); + vstrwq_scatter_shifted_offset_p_s32 (pDataDest, vecOffs1, vecIn1, __p); + vstrwq_scatter_shifted_offset_p_s32 (pDataDest, vecOffs2, vecIn2, __p); + pDataDest[8] = pDataSrc[8]; + return 0; +} + +int +foohs16( int16_t * pDataSrc, int16_t * pDataDest) +{ + const uint16x8_t vecOffs1 = { 0, 3, 6, 1, 4, 7, 2, 5}; + const uint16x8_t vecOffs2 = { 9, 11, 13, 10, 12, 15, 8, 14}; + int16x8_t vecIn1 = vldrhq_z_s16 ((int16_t const *) pDataSrc, __p); + int16x8_t vecIn2 = vldrhq_z_s16 ((int16_t const *) &pDataSrc[8], __p); + vstrhq_scatter_shifted_offset_p_s16 (pDataDest, vecOffs1, vecIn1, __p); + vstrhq_scatter_shifted_offset_p_s16 (pDataDest, vecOffs2, vecIn2, __p); + pDataDest[16] = pDataSrc[16]; + return 0; +} + +int +foohs32( int32_t * pDataSrc, int32_t * pDataDest) +{ + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; + int32x4_t vecIn1 = vldrhq_z_s32 ((int16_t const *) pDataSrc, __p); + int32x4_t vecIn2 = vldrhq_z_s32 ((int16_t const *) &pDataSrc[4], __p); + vstrhq_scatter_shifted_offset_p_s32 ((int16_t *)pDataDest, vecOffs1, vecIn1, __p); + vstrhq_scatter_shifted_offset_p_s32 ((int16_t *)pDataDest, vecOffs2, vecIn2, __p); + pDataDest[8] = pDataSrc[8]; + return 0; +} + +int +foods64( int64_t * pDataSrc, int64_t * pDataDest) +{ + const uint64x2_t vecOffs1 = { 0, 1}; + const uint64x2_t vecOffs2 = { 2, 3}; + int32x4_t vecIn1 = vldrwq_z_s32 ((int32_t const *) pDataSrc, __p); + int32x4_t vecIn2 = vldrwq_z_s32 ((int32_t const *) &pDataSrc[2], __p); + + vstrdq_scatter_shifted_offset_p_s64 (pDataDest, vecOffs1, (int64x2_t) vecIn1, __p); + vstrdq_scatter_shifted_offset_p_s64 (pDataDest, vecOffs2, (int64x2_t) vecIn2, __p); + + pDataDest[2] = pDataSrc[2]; + return 0; +} + +/* { dg-final { scan-assembler-times "vstr\[a-z\]t" 20 } } */
rb12939.patch.gz
Description: application/gzip