Hi Srinath, > -----Original Message----- > From: Srinath Parvathaneni <srinath.parvathan...@arm.com> > Sent: 02 June 2020 15:00 > To: gcc-patches@gcc.gnu.org > Cc: Kyrylo Tkachov <kyrylo.tkac...@arm.com> > Subject: [GCC][PATCH][ARM]: Correct the grouping of operands in MVE > vector scatter store intrinsics (PR94735). > > Hello, > > The operands in RTL patterns of MVE vector scatter store intrinsics are > wrongly grouped, because of which few > vector loads and stores instructions are wrongly getting optimized out with - > O2. > > A new predicate "mve_scatter_memory" is defined in this patch, this > predicate returns TRUE on > matching: (mem(reg)) for MVE scatter store intrinsics. > This patch fixes the issue by adding define_expand pattern with > "mve_scatter_memory" predicate and calls the > corresponding define_insn by passing register_operand as first argument. > This register_operand is extracted > from the operand with "mve_scatter_memory" predicate in define_expand > pattern. > > Please refer to M-profile Vector Extension (MVE) intrinsics [1] for more > details. > [1] https://developer.arm.com/architectures/instruction-sets/simd- > isas/helium/mve-intrinsics > > Regression tested on arm-none-eabi and found no regressions. > > Ok for trunk?
Ok. Thanks, Kyrill > > Thanks, > Srinath. > > gcc/ChangeLog: > > 2020-06-02 Srinath Parvathaneni <srinath.parvathan...@arm.com> > > PR target/94735 > * config/arm//predicates.md (mve_scatter_memory): Define to > match (mem (reg)) for scatter store memory. > * config/arm/mve.md (mve_vstrbq_scatter_offset_<supf><mode>): > Modify > define_insn to define_expand. > (mve_vstrbq_scatter_offset_p_<supf><mode>): Likewise. > (mve_vstrhq_scatter_offset_<supf><mode>): Likewise. > (mve_vstrhq_scatter_shifted_offset_p_<supf><mode>): Likewise. > (mve_vstrhq_scatter_shifted_offset_<supf><mode>): Likewise. > (mve_vstrdq_scatter_offset_p_<supf>v2di): Likewise. > (mve_vstrdq_scatter_offset_<supf>v2di): Likewise. > (mve_vstrdq_scatter_shifted_offset_p_<supf>v2di): Likewise. > (mve_vstrdq_scatter_shifted_offset_<supf>v2di): Likewise. > (mve_vstrhq_scatter_offset_fv8hf): Likewise. > (mve_vstrhq_scatter_offset_p_fv8hf): Likewise. > (mve_vstrhq_scatter_shifted_offset_fv8hf): Likewise. > (mve_vstrhq_scatter_shifted_offset_p_fv8hf): Likewise. > (mve_vstrwq_scatter_offset_fv4sf): Likewise. > (mve_vstrwq_scatter_offset_p_fv4sf): Likewise. > (mve_vstrwq_scatter_offset_p_<supf>v4si): Likewise. > (mve_vstrwq_scatter_offset_<supf>v4si): Likewise. > (mve_vstrwq_scatter_shifted_offset_fv4sf): Likewise. > (mve_vstrwq_scatter_shifted_offset_p_fv4sf): Likewise. > (mve_vstrwq_scatter_shifted_offset_p_<supf>v4si): Likewise. > (mve_vstrwq_scatter_shifted_offset_<supf>v4si): Likewise. > (mve_vstrbq_scatter_offset_<supf><mode>_insn): Define insn for > scatter > stores. > (mve_vstrbq_scatter_offset_p_<supf><mode>_insn): Likewise. > (mve_vstrhq_scatter_offset_<supf><mode>_insn): Likewise. > (mve_vstrhq_scatter_shifted_offset_p_<supf><mode>_insn): > Likewise. > (mve_vstrhq_scatter_shifted_offset_<supf><mode>_insn): Likewise. > (mve_vstrdq_scatter_offset_p_<supf>v2di_insn): Likewise. > (mve_vstrdq_scatter_offset_<supf>v2di_insn): Likewise. > (mve_vstrdq_scatter_shifted_offset_p_<supf>v2di_insn): Likewise. > (mve_vstrdq_scatter_shifted_offset_<supf>v2di_insn): Likewise. > (mve_vstrhq_scatter_offset_fv8hf_insn): Likewise. > (mve_vstrhq_scatter_offset_p_fv8hf_insn): Likewise. > (mve_vstrhq_scatter_shifted_offset_fv8hf_insn): Likewise. > (mve_vstrhq_scatter_shifted_offset_p_fv8hf_insn): Likewise. > (mve_vstrwq_scatter_offset_fv4sf_insn): Likewise. > (mve_vstrwq_scatter_offset_p_fv4sf_insn): Likewise. > (mve_vstrwq_scatter_offset_p_<supf>v4si_insn): Likewise. > (mve_vstrwq_scatter_offset_<supf>v4si_insn): Likewise. > (mve_vstrwq_scatter_shifted_offset_fv4sf_insn): Likewise. > (mve_vstrwq_scatter_shifted_offset_p_fv4sf_insn): Likewise. > (mve_vstrwq_scatter_shifted_offset_p_<supf>v4si_insn): Likewise. > (mve_vstrwq_scatter_shifted_offset_<supf>v4si_insn): Likewise. > > gcc/testsuite/ChangeLog: > > 2020-06-02 Srinath Parvathaneni srinath.parvathan...@arm.com > > PR target/94735 > * gcc.target/arm/mve/intrinsics/mve_vstore_scatter_base.c: New > test. > * gcc.target/arm/mve/intrinsics/mve_vstore_scatter_base_p.c: > Likewise. > * gcc.target/arm/mve/intrinsics/mve_vstore_scatter_offset.c: > Likewise. > * gcc.target/arm/mve/intrinsics/mve_vstore_scatter_offset_p.c: > Likewise. > * gcc.target/arm/mve/intrinsics/mve_vstore_scatter_shifted_offset.c: > Likewise. > * > gcc.target/arm/mve/intrinsics/mve_vstore_scatter_shifted_offset_p.c: > Likewise. > > > ############### Attachment also inlined for ease of reply > ############### > > > diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md > index > 986fbfe2abae5f1e91e65f1ff5c84709c43c4617..3a57901bd5bcd770832d59dc7 > 7cd92b6d9b5ecb4 100644 > --- a/gcc/config/arm/mve.md > +++ b/gcc/config/arm/mve.md > @@ -8102,22 +8102,29 @@ > ;; > ;; [vstrbq_scatter_offset_s vstrbq_scatter_offset_u] > ;; > -(define_insn "mve_vstrbq_scatter_offset_<supf><mode>" > - [(set (match_operand:<MVE_B_ELEM> 0 "memory_operand" "=Us") > - (unspec:<MVE_B_ELEM> > - [(match_operand:MVE_2 1 "s_register_operand" "w") > - (match_operand:MVE_2 2 "s_register_operand" "w")] > - VSTRBSOQ)) > - ] > +(define_expand "mve_vstrbq_scatter_offset_<supf><mode>" > + [(match_operand:<MVE_B_ELEM> 0 "mve_scatter_memory") > + (match_operand:MVE_2 1 "s_register_operand") > + (match_operand:MVE_2 2 "s_register_operand") > + (unspec:V4SI [(const_int 0)] VSTRBSOQ)] > "TARGET_HAVE_MVE" > { > - rtx ops[3]; > - ops[0] = operands[0]; > - ops[1] = operands[1]; > - ops[2] = operands[2]; > - output_asm_insn("vstrb.<V_sz_elem>\t%q2, [%m0, %q1]",ops); > - return ""; > -} > + rtx ind = XEXP (operands[0], 0); > + gcc_assert (REG_P (ind)); > + emit_insn (gen_mve_vstrbq_scatter_offset_<supf><mode>_insn (ind, > operands[1], > + operands[2])); > + DONE; > +}) > + > +(define_insn "mve_vstrbq_scatter_offset_<supf><mode>_insn" > + [(set (mem:BLK (scratch)) > + (unspec:BLK > + [(match_operand:SI 0 "register_operand" "r") > + (match_operand:MVE_2 1 "s_register_operand" "w") > + (match_operand:MVE_2 2 "s_register_operand" "w")] > + VSTRBSOQ))] > + "TARGET_HAVE_MVE" > + "vstrb.<V_sz_elem>\t%q2, [%0, %q1]" > [(set_attr "length" "4")]) > > ;; > @@ -8210,23 +8217,33 @@ > ;; > ;; [vstrbq_scatter_offset_p_s vstrbq_scatter_offset_p_u] > ;; > -(define_insn "mve_vstrbq_scatter_offset_p_<supf><mode>" > - [(set (match_operand:<MVE_B_ELEM> 0 "memory_operand" "=Us") > - (unspec:<MVE_B_ELEM> > - [(match_operand:MVE_2 1 "s_register_operand" "w") > - (match_operand:MVE_2 2 "s_register_operand" "w") > - (match_operand:HI 3 "vpr_register_operand" "Up")] > - VSTRBSOQ)) > - ] > +(define_expand "mve_vstrbq_scatter_offset_p_<supf><mode>" > + [(match_operand:<MVE_B_ELEM> 0 "mve_scatter_memory") > + (match_operand:MVE_2 1 "s_register_operand") > + (match_operand:MVE_2 2 "s_register_operand") > + (match_operand:HI 3 "vpr_register_operand" "Up") > + (unspec:V4SI [(const_int 0)] VSTRBSOQ)] > "TARGET_HAVE_MVE" > { > - rtx ops[3]; > - ops[0] = operands[0]; > - ops[1] = operands[1]; > - ops[2] = operands[2]; > - output_asm_insn ("vpst\n\tvstrbt.<V_sz_elem>\t%q2, [%m0, %q1]",ops); > - return ""; > -} > + rtx ind = XEXP (operands[0], 0); > + gcc_assert (REG_P (ind)); > + emit_insn ( > + gen_mve_vstrbq_scatter_offset_p_<supf><mode>_insn (ind, operands[1], > + operands[2], > + operands[3])); > + DONE; > +}) > + > +(define_insn "mve_vstrbq_scatter_offset_p_<supf><mode>_insn" > + [(set (mem:BLK (scratch)) > + (unspec:BLK > + [(match_operand:SI 0 "register_operand" "r") > + (match_operand:MVE_2 1 "s_register_operand" "w") > + (match_operand:MVE_2 2 "s_register_operand" "w") > + (match_operand:HI 3 "vpr_register_operand" "Up")] > + VSTRBSOQ))] > + "TARGET_HAVE_MVE" > + "vpst\;vstrbt.<V_sz_elem>\t%q2, [%0, %q1]" > [(set_attr "length" "8")]) > > ;; > @@ -9097,87 +9114,122 @@ > ;; > ;; [vstrhq_scatter_offset_p_s vstrhq_scatter_offset_p_u] > ;; > -(define_insn "mve_vstrhq_scatter_offset_p_<supf><mode>" > - [(set (match_operand:<MVE_H_ELEM> 0 "memory_operand" "=Us") > - (unspec:<MVE_H_ELEM> > - [(match_operand:MVE_6 1 "s_register_operand" "w") > - (match_operand:MVE_6 2 "s_register_operand" "w") > - (match_operand:HI 3 "vpr_register_operand" "Up")] > - VSTRHSOQ)) > - ] > +(define_expand "mve_vstrhq_scatter_offset_p_<supf><mode>" > + [(match_operand:<MVE_H_ELEM> 0 "mve_scatter_memory") > + (match_operand:MVE_6 1 "s_register_operand") > + (match_operand:MVE_6 2 "s_register_operand") > + (match_operand:HI 3 "vpr_register_operand") > + (unspec:V4SI [(const_int 0)] VSTRHSOQ)] > "TARGET_HAVE_MVE" > { > - rtx ops[3]; > - ops[0] = operands[0]; > - ops[1] = operands[1]; > - ops[2] = operands[2]; > - output_asm_insn ("vpst\n\tvstrht.<V_sz_elem>\t%q2, [%m0, %q1]",ops); > - return ""; > -} > + rtx ind = XEXP (operands[0], 0); > + gcc_assert (REG_P (ind)); > + emit_insn ( > + gen_mve_vstrhq_scatter_offset_p_<supf><mode>_insn (ind, operands[1], > + operands[2], > + operands[3])); > + DONE; > +}) > + > +(define_insn "mve_vstrhq_scatter_offset_p_<supf><mode>_insn" > + [(set (mem:BLK (scratch)) > + (unspec:BLK > + [(match_operand:SI 0 "register_operand" "r") > + (match_operand:MVE_6 1 "s_register_operand" "w") > + (match_operand:MVE_6 2 "s_register_operand" "w") > + (match_operand:HI 3 "vpr_register_operand" "Up")] > + VSTRHSOQ))] > + "TARGET_HAVE_MVE" > + "vpst\;vstrht.<V_sz_elem>\t%q2, [%0, %q1]" > [(set_attr "length" "8")]) > > ;; > ;; [vstrhq_scatter_offset_s vstrhq_scatter_offset_u] > ;; > -(define_insn "mve_vstrhq_scatter_offset_<supf><mode>" > - [(set (match_operand:<MVE_H_ELEM> 0 "memory_operand" "=Us") > - (unspec:<MVE_H_ELEM> > - [(match_operand:MVE_6 1 "s_register_operand" "w") > - (match_operand:MVE_6 2 "s_register_operand" "w")] > - VSTRHSOQ)) > - ] > +(define_expand "mve_vstrhq_scatter_offset_<supf><mode>" > + [(match_operand:<MVE_H_ELEM> 0 "mve_scatter_memory") > + (match_operand:MVE_6 1 "s_register_operand") > + (match_operand:MVE_6 2 "s_register_operand") > + (unspec:V4SI [(const_int 0)] VSTRHSOQ)] > "TARGET_HAVE_MVE" > { > - rtx ops[3]; > - ops[0] = operands[0]; > - ops[1] = operands[1]; > - ops[2] = operands[2]; > - output_asm_insn ("vstrh.<V_sz_elem>\t%q2, [%m0, %q1]",ops); > - return ""; > -} > + rtx ind = XEXP (operands[0], 0); > + gcc_assert (REG_P (ind)); > + emit_insn (gen_mve_vstrhq_scatter_offset_<supf><mode>_insn (ind, > operands[1], > + operands[2])); > + DONE; > +}) > + > +(define_insn "mve_vstrhq_scatter_offset_<supf><mode>_insn" > + [(set (mem:BLK (scratch)) > + (unspec:BLK > + [(match_operand:SI 0 "register_operand" "r") > + (match_operand:MVE_6 1 "s_register_operand" "w") > + (match_operand:MVE_6 2 "s_register_operand" "w")] > + VSTRHSOQ))] > + "TARGET_HAVE_MVE" > + "vstrh.<V_sz_elem>\t%q2, [%0, %q1]" > [(set_attr "length" "4")]) > > ;; > ;; [vstrhq_scatter_shifted_offset_p_s vstrhq_scatter_shifted_offset_p_u] > ;; > -(define_insn "mve_vstrhq_scatter_shifted_offset_p_<supf><mode>" > - [(set (match_operand:<MVE_H_ELEM> 0 "memory_operand" "=Ux") > - (unspec:<MVE_H_ELEM> > - [(match_operand:MVE_6 1 "s_register_operand" "w") > - (match_operand:MVE_6 2 "s_register_operand" "w") > - (match_operand:HI 3 "vpr_register_operand" "Up")] > - VSTRHSSOQ)) > - ] > +(define_expand "mve_vstrhq_scatter_shifted_offset_p_<supf><mode>" > + [(match_operand:<MVE_H_ELEM> 0 "mve_scatter_memory") > + (match_operand:MVE_6 1 "s_register_operand") > + (match_operand:MVE_6 2 "s_register_operand") > + (match_operand:HI 3 "vpr_register_operand") > + (unspec:V4SI [(const_int 0)] VSTRHSSOQ)] > "TARGET_HAVE_MVE" > { > - rtx ops[3]; > - ops[0] = operands[0]; > - ops[1] = operands[1]; > - ops[2] = operands[2]; > - output_asm_insn ("vpst\n\tvstrht.<V_sz_elem>\t%q2, [%m0, %q1, uxtw > #1]",ops); > - return ""; > -} > + rtx ind = XEXP (operands[0], 0); > + gcc_assert (REG_P (ind)); > + emit_insn ( > + gen_mve_vstrhq_scatter_shifted_offset_p_<supf><mode>_insn (ind, > operands[1], > + operands[2], > + operands[3])); > + DONE; > +}) > + > +(define_insn "mve_vstrhq_scatter_shifted_offset_p_<supf><mode>_insn" > + [(set (mem:BLK (scratch)) > + (unspec:BLK > + [(match_operand:SI 0 "register_operand" "r") > + (match_operand:MVE_6 1 "s_register_operand" "w") > + (match_operand:MVE_6 2 "s_register_operand" "w") > + (match_operand:HI 3 "vpr_register_operand" "Up")] > + VSTRHSSOQ))] > + "TARGET_HAVE_MVE" > + "vpst\;vstrht.<V_sz_elem>\t%q2, [%0, %q1, uxtw #1]" > [(set_attr "length" "8")]) > > ;; > ;; [vstrhq_scatter_shifted_offset_s vstrhq_scatter_shifted_offset_u] > ;; > -(define_insn "mve_vstrhq_scatter_shifted_offset_<supf><mode>" > - [(set (match_operand:<MVE_H_ELEM> 0 "memory_operand" "=Us") > - (unspec:<MVE_H_ELEM> > - [(match_operand:MVE_6 1 "s_register_operand" "w") > - (match_operand:MVE_6 2 "s_register_operand" "w")] > - VSTRHSSOQ)) > - ] > +(define_expand "mve_vstrhq_scatter_shifted_offset_<supf><mode>" > + [(match_operand:<MVE_H_ELEM> 0 "mve_scatter_memory") > + (match_operand:MVE_6 1 "s_register_operand") > + (match_operand:MVE_6 2 "s_register_operand") > + (unspec:V4SI [(const_int 0)] VSTRHSSOQ)] > "TARGET_HAVE_MVE" > { > - rtx ops[3]; > - ops[0] = operands[0]; > - ops[1] = operands[1]; > - ops[2] = operands[2]; > - output_asm_insn ("vstrh.<V_sz_elem>\t%q2, [%m0, %q1, uxtw #1]",ops); > - return ""; > -} > + rtx ind = XEXP (operands[0], 0); > + gcc_assert (REG_P (ind)); > + emit_insn ( > + gen_mve_vstrhq_scatter_shifted_offset_<supf><mode>_insn (ind, > operands[1], > + operands[2])); > + DONE; > +}) > + > +(define_insn "mve_vstrhq_scatter_shifted_offset_<supf><mode>_insn" > + [(set (mem:BLK (scratch)) > + (unspec:BLK > + [(match_operand:SI 0 "register_operand" "r") > + (match_operand:MVE_6 1 "s_register_operand" "w") > + (match_operand:MVE_6 2 "s_register_operand" "w")] > + VSTRHSSOQ))] > + "TARGET_HAVE_MVE" > + "vstrh.<V_sz_elem>\t%q2, [%0, %q1, uxtw #1]" > [(set_attr "length" "4")]) > > ;; > @@ -9345,173 +9397,240 @@ > ;; > ;; [vstrdq_scatter_offset_p_s vstrdq_scatter_offset_p_u] > ;; > -(define_insn "mve_vstrdq_scatter_offset_p_<supf>v2di" > - [(set (match_operand:V2DI 0 "memory_operand" "=Us") > - (unspec:V2DI > - [(match_operand:V2DI 1 "s_register_operand" "w") > - (match_operand:V2DI 2 "s_register_operand" "w") > - (match_operand:HI 3 "vpr_register_operand" "Up")] > - VSTRDSOQ)) > - ] > +(define_expand "mve_vstrdq_scatter_offset_p_<supf>v2di" > + [(match_operand:V2DI 0 "mve_scatter_memory") > + (match_operand:V2DI 1 "s_register_operand") > + (match_operand:V2DI 2 "s_register_operand") > + (match_operand:HI 3 "vpr_register_operand") > + (unspec:V4SI [(const_int 0)] VSTRDSOQ)] > "TARGET_HAVE_MVE" > { > - rtx ops[3]; > - ops[0] = operands[0]; > - ops[1] = operands[1]; > - ops[2] = operands[2]; > - output_asm_insn ("vpst\;\tvstrdt.64\t%q2, [%m0, %q1]",ops); > - return ""; > -} > + rtx ind = XEXP (operands[0], 0); > + gcc_assert (REG_P (ind)); > + emit_insn (gen_mve_vstrdq_scatter_offset_p_<supf>v2di_insn (ind, > operands[1], > + operands[2], > + operands[3])); > + DONE; > +}) > + > +(define_insn "mve_vstrdq_scatter_offset_p_<supf>v2di_insn" > + [(set (mem:BLK (scratch)) > + (unspec:BLK > + [(match_operand:SI 0 "register_operand" "r") > + (match_operand:V2DI 1 "s_register_operand" "w") > + (match_operand:V2DI 2 "s_register_operand" "w") > + (match_operand:HI 3 "vpr_register_operand" "Up")] > + VSTRDSOQ))] > + "TARGET_HAVE_MVE" > + "vpst\;vstrdt.64\t%q2, [%0, %q1]" > [(set_attr "length" "8")]) > > ;; > ;; [vstrdq_scatter_offset_s vstrdq_scatter_offset_u] > ;; > -(define_insn "mve_vstrdq_scatter_offset_<supf>v2di" > - [(set (match_operand:V2DI 0 "memory_operand" "=Us") > - (unspec:V2DI > - [(match_operand:V2DI 1 "s_register_operand" "w") > - (match_operand:V2DI 2 "s_register_operand" "w")] > - VSTRDSOQ)) > - ] > +(define_expand "mve_vstrdq_scatter_offset_<supf>v2di" > + [(match_operand:V2DI 0 "mve_scatter_memory") > + (match_operand:V2DI 1 "s_register_operand") > + (match_operand:V2DI 2 "s_register_operand") > + (unspec:V4SI [(const_int 0)] VSTRDSOQ)] > "TARGET_HAVE_MVE" > { > - rtx ops[3]; > - ops[0] = operands[0]; > - ops[1] = operands[1]; > - ops[2] = operands[2]; > - output_asm_insn ("vstrd.64\t%q2, [%m0, %q1]",ops); > - return ""; > -} > + rtx ind = XEXP (operands[0], 0); > + gcc_assert (REG_P (ind)); > + emit_insn (gen_mve_vstrdq_scatter_offset_<supf>v2di_insn (ind, > operands[1], > + operands[2])); > + DONE; > +}) > + > +(define_insn "mve_vstrdq_scatter_offset_<supf>v2di_insn" > + [(set (mem:BLK (scratch)) > + (unspec:BLK > + [(match_operand:SI 0 "register_operand" "r") > + (match_operand:V2DI 1 "s_register_operand" "w") > + (match_operand:V2DI 2 "s_register_operand" "w")] > + VSTRDSOQ))] > + "TARGET_HAVE_MVE" > + "vstrd.64\t%q2, [%0, %q1]" > [(set_attr "length" "4")]) > > ;; > ;; [vstrdq_scatter_shifted_offset_p_s vstrdq_scatter_shifted_offset_p_u] > ;; > -(define_insn "mve_vstrdq_scatter_shifted_offset_p_<supf>v2di" > - [(set (match_operand:V2DI 0 "memory_operand" "=Us") > - (unspec:V2DI > - [(match_operand:V2DI 1 "s_register_operand" "w") > - (match_operand:V2DI 2 "s_register_operand" "w") > - (match_operand:HI 3 "vpr_register_operand" "Up")] > - VSTRDSSOQ)) > - ] > +(define_expand "mve_vstrdq_scatter_shifted_offset_p_<supf>v2di" > + [(match_operand:V2DI 0 "mve_scatter_memory") > + (match_operand:V2DI 1 "s_register_operand") > + (match_operand:V2DI 2 "s_register_operand") > + (match_operand:HI 3 "vpr_register_operand") > + (unspec:V4SI [(const_int 0)] VSTRDSSOQ)] > "TARGET_HAVE_MVE" > { > - rtx ops[3]; > - ops[0] = operands[0]; > - ops[1] = operands[1]; > - ops[2] = operands[2]; > - output_asm_insn ("vpst\;\tvstrdt.64\t%q2, [%m0, %q1, UXTW #3]",ops); > - return ""; > -} > + rtx ind = XEXP (operands[0], 0); > + gcc_assert (REG_P (ind)); > + emit_insn ( > + gen_mve_vstrdq_scatter_shifted_offset_p_<supf>v2di_insn (ind, > operands[1], > + operands[2], > + operands[3])); > + DONE; > +}) > + > +(define_insn "mve_vstrdq_scatter_shifted_offset_p_<supf>v2di_insn" > + [(set (mem:BLK (scratch)) > + (unspec:BLK > + [(match_operand:SI 0 "register_operand" "r") > + (match_operand:V2DI 1 "s_register_operand" "w") > + (match_operand:V2DI 2 "s_register_operand" "w") > + (match_operand:HI 3 "vpr_register_operand" "Up")] > + VSTRDSSOQ))] > + "TARGET_HAVE_MVE" > + "vpst\;vstrdt.64\t%q2, [%0, %q1, UXTW #3]" > [(set_attr "length" "8")]) > > ;; > ;; [vstrdq_scatter_shifted_offset_s vstrdq_scatter_shifted_offset_u] > ;; > -(define_insn "mve_vstrdq_scatter_shifted_offset_<supf>v2di" > - [(set (match_operand:V2DI 0 "memory_operand" "=Us") > - (unspec:V2DI > - [(match_operand:V2DI 1 "s_register_operand" "w") > - (match_operand:V2DI 2 "s_register_operand" "w")] > - VSTRDSSOQ)) > - ] > +(define_expand "mve_vstrdq_scatter_shifted_offset_<supf>v2di" > + [(match_operand:V2DI 0 "mve_scatter_memory") > + (match_operand:V2DI 1 "s_register_operand") > + (match_operand:V2DI 2 "s_register_operand") > + (unspec:V4SI [(const_int 0)] VSTRDSSOQ)] > "TARGET_HAVE_MVE" > { > - rtx ops[3]; > - ops[0] = operands[0]; > - ops[1] = operands[1]; > - ops[2] = operands[2]; > - output_asm_insn ("vstrd.64\t%q2, [%m0, %q1, UXTW #3]",ops); > - return ""; > -} > + rtx ind = XEXP (operands[0], 0); > + gcc_assert (REG_P (ind)); > + emit_insn ( > + gen_mve_vstrdq_scatter_shifted_offset_<supf>v2di_insn (ind, > operands[1], > + operands[2])); > + DONE; > +}) > + > +(define_insn "mve_vstrdq_scatter_shifted_offset_<supf>v2di_insn" > + [(set (mem:BLK (scratch)) > + (unspec:BLK > + [(match_operand:SI 0 "register_operand" "r") > + (match_operand:V2DI 1 "s_register_operand" "w") > + (match_operand:V2DI 2 "s_register_operand" "w")] > + VSTRDSSOQ))] > + "TARGET_HAVE_MVE" > + "vstrd.64\t%q2, [%0, %q1, UXTW #3]" > [(set_attr "length" "4")]) > > ;; > ;; [vstrhq_scatter_offset_f] > ;; > -(define_insn "mve_vstrhq_scatter_offset_fv8hf" > - [(set (match_operand:V8HI 0 "memory_operand" "=Us") > - (unspec:V8HI > - [(match_operand:V8HI 1 "s_register_operand" "w") > - (match_operand:V8HF 2 "s_register_operand" "w")] > - VSTRHQSO_F)) > - ] > +(define_expand "mve_vstrhq_scatter_offset_fv8hf" > + [(match_operand:V8HI 0 "mve_scatter_memory") > + (match_operand:V8HI 1 "s_register_operand") > + (match_operand:V8HF 2 "s_register_operand") > + (unspec:V4SI [(const_int 0)] VSTRHQSO_F)] > "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" > { > - rtx ops[3]; > - ops[0] = operands[0]; > - ops[1] = operands[1]; > - ops[2] = operands[2]; > - output_asm_insn ("vstrh.16\t%q2, [%m0, %q1]",ops); > - return ""; > -} > + rtx ind = XEXP (operands[0], 0); > + gcc_assert (REG_P (ind)); > + emit_insn (gen_mve_vstrhq_scatter_offset_fv8hf_insn (ind, operands[1], > + operands[2])); > + DONE; > +}) > + > +(define_insn "mve_vstrhq_scatter_offset_fv8hf_insn" > + [(set (mem:BLK (scratch)) > + (unspec:BLK > + [(match_operand:SI 0 "register_operand" "r") > + (match_operand:V8HI 1 "s_register_operand" "w") > + (match_operand:V8HF 2 "s_register_operand" "w")] > + VSTRHQSO_F))] > + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" > + "vstrh.16\t%q2, [%0, %q1]" > [(set_attr "length" "4")]) > > ;; > ;; [vstrhq_scatter_offset_p_f] > ;; > -(define_insn "mve_vstrhq_scatter_offset_p_fv8hf" > - [(set (match_operand:V8HI 0 "memory_operand" "=Us") > - (unspec:V8HI > - [(match_operand:V8HI 1 "s_register_operand" "w") > - (match_operand:V8HF 2 "s_register_operand" "w") > - (match_operand:HI 3 "vpr_register_operand" "Up")] > - VSTRHQSO_F)) > - ] > +(define_expand "mve_vstrhq_scatter_offset_p_fv8hf" > + [(match_operand:V8HI 0 "mve_scatter_memory") > + (match_operand:V8HI 1 "s_register_operand") > + (match_operand:V8HF 2 "s_register_operand") > + (match_operand:HI 3 "vpr_register_operand") > + (unspec:V4SI [(const_int 0)] VSTRHQSO_F)] > "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" > { > - rtx ops[3]; > - ops[0] = operands[0]; > - ops[1] = operands[1]; > - ops[2] = operands[2]; > - output_asm_insn ("vpst\n\tvstrht.16\t%q2, [%m0, %q1]",ops); > - return ""; > -} > + rtx ind = XEXP (operands[0], 0); > + gcc_assert (REG_P (ind)); > + emit_insn (gen_mve_vstrhq_scatter_offset_p_fv8hf_insn (ind, > operands[1], > + operands[2], > + operands[3])); > + DONE; > +}) > + > +(define_insn "mve_vstrhq_scatter_offset_p_fv8hf_insn" > + [(set (mem:BLK (scratch)) > + (unspec:BLK > + [(match_operand:SI 0 "register_operand" "r") > + (match_operand:V8HI 1 "s_register_operand" "w") > + (match_operand:V8HF 2 "s_register_operand" "w") > + (match_operand:HI 3 "vpr_register_operand" "Up")] > + VSTRHQSO_F))] > + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" > + "vpst\;vstrht.16\t%q2, [%0, %q1]" > [(set_attr "length" "8")]) > > ;; > ;; [vstrhq_scatter_shifted_offset_f] > ;; > -(define_insn "mve_vstrhq_scatter_shifted_offset_fv8hf" > - [(set (match_operand:V8HI 0 "memory_operand" "=Us") > - (unspec:V8HI > - [(match_operand:V8HI 1 "s_register_operand" "w") > - (match_operand:V8HF 2 "s_register_operand" "w")] > - VSTRHQSSO_F)) > - ] > +(define_expand "mve_vstrhq_scatter_shifted_offset_fv8hf" > + [(match_operand:V8HI 0 "memory_operand" "=Us") > + (match_operand:V8HI 1 "s_register_operand" "w") > + (match_operand:V8HF 2 "s_register_operand" "w") > + (unspec:V4SI [(const_int 0)] VSTRHQSSO_F)] > "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" > { > - rtx ops[3]; > - ops[0] = operands[0]; > - ops[1] = operands[1]; > - ops[2] = operands[2]; > - output_asm_insn ("vstrh.16\t%q2, [%m0, %q1, uxtw #1]",ops); > - return ""; > -} > + rtx ind = XEXP (operands[0], 0); > + gcc_assert (REG_P (ind)); > + emit_insn (gen_mve_vstrhq_scatter_shifted_offset_fv8hf_insn (ind, > operands[1], > + operands[2])); > + DONE; > +}) > + > +(define_insn "mve_vstrhq_scatter_shifted_offset_fv8hf_insn" > + [(set (mem:BLK (scratch)) > + (unspec:BLK > + [(match_operand:SI 0 "register_operand" "r") > + (match_operand:V8HI 1 "s_register_operand" "w") > + (match_operand:V8HF 2 "s_register_operand" "w")] > + VSTRHQSSO_F))] > + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" > + "vstrh.16\t%q2, [%0, %q1, uxtw #1]" > [(set_attr "length" "4")]) > > ;; > ;; [vstrhq_scatter_shifted_offset_p_f] > ;; > -(define_insn "mve_vstrhq_scatter_shifted_offset_p_fv8hf" > - [(set (match_operand:V8HI 0 "memory_operand" "=Us") > - (unspec:V8HI > - [(match_operand:V8HI 1 "s_register_operand" "w") > - (match_operand:V8HF 2 "s_register_operand" "w") > - (match_operand:HI 3 "vpr_register_operand" "Up")] > - VSTRHQSSO_F)) > - ] > +(define_expand "mve_vstrhq_scatter_shifted_offset_p_fv8hf" > + [(match_operand:V8HI 0 "memory_operand" "=Us") > + (match_operand:V8HI 1 "s_register_operand" "w") > + (match_operand:V8HF 2 "s_register_operand" "w") > + (match_operand:HI 3 "vpr_register_operand" "Up") > + (unspec:V4SI [(const_int 0)] VSTRHQSSO_F)] > "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" > { > - rtx ops[3]; > - ops[0] = operands[0]; > - ops[1] = operands[1]; > - ops[2] = operands[2]; > - output_asm_insn ("vpst\n\tvstrht.16\t%q2, [%m0, %q1, uxtw #1]",ops); > - return ""; > -} > + rtx ind = XEXP (operands[0], 0); > + gcc_assert (REG_P (ind)); > + emit_insn ( > + gen_mve_vstrhq_scatter_shifted_offset_p_fv8hf_insn (ind, operands[1], > + operands[2], > + operands[3])); > + DONE; > +}) > + > +(define_insn "mve_vstrhq_scatter_shifted_offset_p_fv8hf_insn" > + [(set (mem:BLK (scratch)) > + (unspec:BLK > + [(match_operand:SI 0 "register_operand" "r") > + (match_operand:V8HI 1 "s_register_operand" "w") > + (match_operand:V8HF 2 "s_register_operand" "w") > + (match_operand:HI 3 "vpr_register_operand" "Up")] > + VSTRHQSSO_F))] > + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" > + "vpst\;vstrht.16\t%q2, [%0, %q1, uxtw #1]" > [(set_attr "length" "8")]) > > ;; > @@ -9562,173 +9681,240 @@ > ;; > ;; [vstrwq_scatter_offset_f] > ;; > -(define_insn "mve_vstrwq_scatter_offset_fv4sf" > - [(set (match_operand:V4SI 0 "memory_operand" "=Us") > - (unspec:V4SI > - [(match_operand:V4SI 1 "s_register_operand" "w") > - (match_operand:V4SF 2 "s_register_operand" "w")] > - VSTRWQSO_F)) > - ] > +(define_expand "mve_vstrwq_scatter_offset_fv4sf" > + [(match_operand:V4SI 0 "mve_scatter_memory") > + (match_operand:V4SI 1 "s_register_operand") > + (match_operand:V4SF 2 "s_register_operand") > + (unspec:V4SI [(const_int 0)] VSTRWQSO_F)] > "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" > { > - rtx ops[3]; > - ops[0] = operands[0]; > - ops[1] = operands[1]; > - ops[2] = operands[2]; > - output_asm_insn ("vstrw.32\t%q2, [%m0, %q1]",ops); > - return ""; > -} > + rtx ind = XEXP (operands[0], 0); > + gcc_assert (REG_P (ind)); > + emit_insn (gen_mve_vstrwq_scatter_offset_fv4sf_insn (ind, operands[1], > + operands[2])); > + DONE; > +}) > + > +(define_insn "mve_vstrwq_scatter_offset_fv4sf_insn" > + [(set (mem:BLK (scratch)) > + (unspec:BLK > + [(match_operand:SI 0 "register_operand" "r") > + (match_operand:V4SI 1 "s_register_operand" "w") > + (match_operand:V4SF 2 "s_register_operand" "w")] > + VSTRWQSO_F))] > + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" > + "vstrw.32\t%q2, [%0, %q1]" > [(set_attr "length" "4")]) > > ;; > ;; [vstrwq_scatter_offset_p_f] > ;; > -(define_insn "mve_vstrwq_scatter_offset_p_fv4sf" > - [(set (match_operand:V4SI 0 "memory_operand" "=Us") > - (unspec:V4SI > - [(match_operand:V4SI 1 "s_register_operand" "w") > - (match_operand:V4SF 2 "s_register_operand" "w") > - (match_operand:HI 3 "vpr_register_operand" "Up")] > - VSTRWQSO_F)) > - ] > +(define_expand "mve_vstrwq_scatter_offset_p_fv4sf" > + [(match_operand:V4SI 0 "mve_scatter_memory") > + (match_operand:V4SI 1 "s_register_operand") > + (match_operand:V4SF 2 "s_register_operand") > + (match_operand:HI 3 "vpr_register_operand") > + (unspec:V4SI [(const_int 0)] VSTRWQSO_F)] > "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" > { > - rtx ops[3]; > - ops[0] = operands[0]; > - ops[1] = operands[1]; > - ops[2] = operands[2]; > - output_asm_insn ("vpst\n\tvstrwt.32\t%q2, [%m0, %q1]",ops); > - return ""; > -} > + rtx ind = XEXP (operands[0], 0); > + gcc_assert (REG_P (ind)); > + emit_insn (gen_mve_vstrwq_scatter_offset_p_fv4sf_insn (ind, > operands[1], > + operands[2], > + operands[3])); > + DONE; > +}) > + > +(define_insn "mve_vstrwq_scatter_offset_p_fv4sf_insn" > + [(set (mem:BLK (scratch)) > + (unspec:BLK > + [(match_operand:SI 0 "register_operand" "r") > + (match_operand:V4SI 1 "s_register_operand" "w") > + (match_operand:V4SF 2 "s_register_operand" "w") > + (match_operand:HI 3 "vpr_register_operand" "Up")] > + VSTRWQSO_F))] > + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" > + "vpst\;vstrwt.32\t%q2, [%0, %q1]" > [(set_attr "length" "8")]) > > ;; > -;; [vstrwq_scatter_offset_p_s vstrwq_scatter_offset_p_u] > +;; [vstrwq_scatter_offset_s vstrwq_scatter_offset_u] > ;; > -(define_insn "mve_vstrwq_scatter_offset_p_<supf>v4si" > - [(set (match_operand:V4SI 0 "memory_operand" "=Us") > - (unspec:V4SI > - [(match_operand:V4SI 1 "s_register_operand" "w") > - (match_operand:V4SI 2 "s_register_operand" "w") > - (match_operand:HI 3 "vpr_register_operand" "Up")] > - VSTRWSOQ)) > - ] > +(define_expand "mve_vstrwq_scatter_offset_p_<supf>v4si" > + [(match_operand:V4SI 0 "mve_scatter_memory") > + (match_operand:V4SI 1 "s_register_operand") > + (match_operand:V4SI 2 "s_register_operand") > + (match_operand:HI 3 "vpr_register_operand") > + (unspec:V4SI [(const_int 0)] VSTRWSOQ)] > "TARGET_HAVE_MVE" > { > - rtx ops[3]; > - ops[0] = operands[0]; > - ops[1] = operands[1]; > - ops[2] = operands[2]; > - output_asm_insn ("vpst\n\tvstrwt.32\t%q2, [%m0, %q1]",ops); > - return ""; > -} > + rtx ind = XEXP (operands[0], 0); > + gcc_assert (REG_P (ind)); > + emit_insn (gen_mve_vstrwq_scatter_offset_p_<supf>v4si_insn (ind, > operands[1], > + operands[2], > + operands[3])); > + DONE; > +}) > + > +(define_insn "mve_vstrwq_scatter_offset_p_<supf>v4si_insn" > + [(set (mem:BLK (scratch)) > + (unspec:BLK > + [(match_operand:SI 0 "register_operand" "r") > + (match_operand:V4SI 1 "s_register_operand" "w") > + (match_operand:V4SI 2 "s_register_operand" "w") > + (match_operand:HI 3 "vpr_register_operand" "Up")] > + VSTRWSOQ))] > + "TARGET_HAVE_MVE" > + "vpst\;vstrwt.32\t%q2, [%0, %q1]" > [(set_attr "length" "8")]) > > ;; > ;; [vstrwq_scatter_offset_s vstrwq_scatter_offset_u] > ;; > -(define_insn "mve_vstrwq_scatter_offset_<supf>v4si" > - [(set (match_operand:V4SI 0 "memory_operand" "=Us") > - (unspec:V4SI > - [(match_operand:V4SI 1 "s_register_operand" "w") > - (match_operand:V4SI 2 "s_register_operand" "w")] > - VSTRWSOQ)) > - ] > +(define_expand "mve_vstrwq_scatter_offset_<supf>v4si" > + [(match_operand:V4SI 0 "mve_scatter_memory") > + (match_operand:V4SI 1 "s_register_operand") > + (match_operand:V4SI 2 "s_register_operand") > + (unspec:V4SI [(const_int 0)] VSTRWSOQ)] > "TARGET_HAVE_MVE" > { > - rtx ops[3]; > - ops[0] = operands[0]; > - ops[1] = operands[1]; > - ops[2] = operands[2]; > - output_asm_insn ("vstrw.32\t%q2, [%m0, %q1]",ops); > - return ""; > -} > + rtx ind = XEXP (operands[0], 0); > + gcc_assert (REG_P (ind)); > + emit_insn (gen_mve_vstrwq_scatter_offset_<supf>v4si_insn (ind, > operands[1], > + operands[2])); > + DONE; > +}) > + > +(define_insn "mve_vstrwq_scatter_offset_<supf>v4si_insn" > + [(set (mem:BLK (scratch)) > + (unspec:BLK > + [(match_operand:SI 0 "register_operand" "r") > + (match_operand:V4SI 1 "s_register_operand" "w") > + (match_operand:V4SI 2 "s_register_operand" "w")] > + VSTRWSOQ))] > + "TARGET_HAVE_MVE" > + "vstrw.32\t%q2, [%0, %q1]" > [(set_attr "length" "4")]) > > ;; > ;; [vstrwq_scatter_shifted_offset_f] > ;; > -(define_insn "mve_vstrwq_scatter_shifted_offset_fv4sf" > - [(set (match_operand:V4SI 0 "memory_operand" "=Us") > - (unspec:V4SI > - [(match_operand:V4SI 1 "s_register_operand" "w") > - (match_operand:V4SF 2 "s_register_operand" "w")] > - VSTRWQSSO_F)) > - ] > +(define_expand "mve_vstrwq_scatter_shifted_offset_fv4sf" > + [(match_operand:V4SI 0 "mve_scatter_memory") > + (match_operand:V4SI 1 "s_register_operand") > + (match_operand:V4SF 2 "s_register_operand") > + (unspec:V4SI [(const_int 0)] VSTRWQSSO_F)] > "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" > { > - rtx ops[3]; > - ops[0] = operands[0]; > - ops[1] = operands[1]; > - ops[2] = operands[2]; > - output_asm_insn ("vstrw.32\t%q2, [%m0, %q1, uxtw #2]",ops); > - return ""; > -} > - [(set_attr "length" "4")]) > + rtx ind = XEXP (operands[0], 0); > + gcc_assert (REG_P (ind)); > + emit_insn (gen_mve_vstrwq_scatter_shifted_offset_fv4sf_insn (ind, > operands[1], > + operands[2])); > + DONE; > +}) > + > +(define_insn "mve_vstrwq_scatter_shifted_offset_fv4sf_insn" > + [(set (mem:BLK (scratch)) > + (unspec:BLK > + [(match_operand:SI 0 "register_operand" "r") > + (match_operand:V4SI 1 "s_register_operand" "w") > + (match_operand:V4SF 2 "s_register_operand" "w")] > + VSTRWQSSO_F))] > + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" > + "vstrw.32\t%q2, [%0, %q1, uxtw #2]" > + [(set_attr "length" "8")]) > > ;; > ;; [vstrwq_scatter_shifted_offset_p_f] > ;; > -(define_insn "mve_vstrwq_scatter_shifted_offset_p_fv4sf" > - [(set (match_operand:V4SI 0 "memory_operand" "=Us") > - (unspec:V4SI > - [(match_operand:V4SI 1 "s_register_operand" "w") > - (match_operand:V4SF 2 "s_register_operand" "w") > - (match_operand:HI 3 "vpr_register_operand" "Up")] > - VSTRWQSSO_F)) > - ] > +(define_expand "mve_vstrwq_scatter_shifted_offset_p_fv4sf" > + [(match_operand:V4SI 0 "mve_scatter_memory") > + (match_operand:V4SI 1 "s_register_operand") > + (match_operand:V4SF 2 "s_register_operand") > + (match_operand:HI 3 "vpr_register_operand") > + (unspec:V4SI [(const_int 0)] VSTRWQSSO_F)] > "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" > { > - rtx ops[3]; > - ops[0] = operands[0]; > - ops[1] = operands[1]; > - ops[2] = operands[2]; > - output_asm_insn ("vpst\;\tvstrwt.32\t%q2, [%m0, %q1, uxtw #2]",ops); > - return ""; > -} > + rtx ind = XEXP (operands[0], 0); > + gcc_assert (REG_P (ind)); > + emit_insn ( > + gen_mve_vstrwq_scatter_shifted_offset_p_fv4sf_insn (ind, operands[1], > + operands[2], > + operands[3])); > + DONE; > +}) > + > +(define_insn "mve_vstrwq_scatter_shifted_offset_p_fv4sf_insn" > + [(set (mem:BLK (scratch)) > + (unspec:BLK > + [(match_operand:SI 0 "register_operand" "r") > + (match_operand:V4SI 1 "s_register_operand" "w") > + (match_operand:V4SF 2 "s_register_operand" "w") > + (match_operand:HI 3 "vpr_register_operand" "Up")] > + VSTRWQSSO_F))] > + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" > + "vpst\;vstrwt.32\t%q2, [%0, %q1, uxtw #2]" > [(set_attr "length" "8")]) > > ;; > ;; [vstrwq_scatter_shifted_offset_p_s vstrwq_scatter_shifted_offset_p_u] > ;; > -(define_insn "mve_vstrwq_scatter_shifted_offset_p_<supf>v4si" > - [(set (match_operand:V4SI 0 "memory_operand" "=Us") > - (unspec:V4SI > - [(match_operand:V4SI 1 "s_register_operand" "w") > - (match_operand:V4SI 2 "s_register_operand" "w") > - (match_operand:HI 3 "vpr_register_operand" "Up")] > - VSTRWSSOQ)) > - ] > +(define_expand "mve_vstrwq_scatter_shifted_offset_p_<supf>v4si" > + [(match_operand:V4SI 0 "mve_scatter_memory") > + (match_operand:V4SI 1 "s_register_operand") > + (match_operand:V4SI 2 "s_register_operand") > + (match_operand:HI 3 "vpr_register_operand") > + (unspec:V4SI [(const_int 0)] VSTRWSSOQ)] > "TARGET_HAVE_MVE" > { > - rtx ops[3]; > - ops[0] = operands[0]; > - ops[1] = operands[1]; > - ops[2] = operands[2]; > - output_asm_insn ("vpst\;\tvstrwt.32\t%q2, [%m0, %q1, uxtw #2]",ops); > - return ""; > -} > + rtx ind = XEXP (operands[0], 0); > + gcc_assert (REG_P (ind)); > + emit_insn ( > + gen_mve_vstrwq_scatter_shifted_offset_p_<supf>v4si_insn (ind, > operands[1], > + operands[2], > + operands[3])); > + DONE; > +}) > + > +(define_insn "mve_vstrwq_scatter_shifted_offset_p_<supf>v4si_insn" > + [(set (mem:BLK (scratch)) > + (unspec:BLK > + [(match_operand:SI 0 "register_operand" "r") > + (match_operand:V4SI 1 "s_register_operand" "w") > + (match_operand:V4SI 2 "s_register_operand" "w") > + (match_operand:HI 3 "vpr_register_operand" "Up")] > + VSTRWSSOQ))] > + "TARGET_HAVE_MVE" > + "vpst\;vstrwt.32\t%q2, [%0, %q1, uxtw #2]" > [(set_attr "length" "8")]) > > ;; > ;; [vstrwq_scatter_shifted_offset_s vstrwq_scatter_shifted_offset_u] > ;; > -(define_insn "mve_vstrwq_scatter_shifted_offset_<supf>v4si" > - [(set (match_operand:V4SI 0 "memory_operand" "=Us") > - (unspec:V4SI > - [(match_operand:V4SI 1 "s_register_operand" "w") > - (match_operand:V4SI 2 "s_register_operand" "w")] > - VSTRWSSOQ)) > - ] > +(define_expand "mve_vstrwq_scatter_shifted_offset_<supf>v4si" > + [(match_operand:V4SI 0 "mve_scatter_memory") > + (match_operand:V4SI 1 "s_register_operand") > + (match_operand:V4SI 2 "s_register_operand") > + (unspec:V4SI [(const_int 0)] VSTRWSSOQ)] > "TARGET_HAVE_MVE" > { > - rtx ops[3]; > - ops[0] = operands[0]; > - ops[1] = operands[1]; > - ops[2] = operands[2]; > - output_asm_insn ("vstrw.32\t%q2, [%m0, %q1, uxtw #2]",ops); > - return ""; > -} > + rtx ind = XEXP (operands[0], 0); > + gcc_assert (REG_P (ind)); > + emit_insn ( > + gen_mve_vstrwq_scatter_shifted_offset_<supf>v4si_insn (ind, > operands[1], > + operands[2])); > + DONE; > +}) > + > +(define_insn "mve_vstrwq_scatter_shifted_offset_<supf>v4si_insn" > + [(set (mem:BLK (scratch)) > + (unspec:BLK > + [(match_operand:SI 0 "register_operand" "r") > + (match_operand:V4SI 1 "s_register_operand" "w") > + (match_operand:V4SI 2 "s_register_operand" "w")] > + VSTRWSSOQ))] > + "TARGET_HAVE_MVE" > + "vstrw.32\t%q2, [%0, %q1, uxtw #2]" > [(set_attr "length" "4")]) > > ;; > diff --git a/gcc/config/arm/predicates.md b/gcc/config/arm/predicates.md > index > c57ad73577e1eebebc8951ed5b4fb544dd3381f8..9e9bca4d87fdc31e045b2b5 > bb03b996f082079bd 100644 > --- a/gcc/config/arm/predicates.md > +++ b/gcc/config/arm/predicates.md > @@ -37,6 +37,12 @@ > && mve_vector_mem_operand (GET_MODE (op), XEXP (op, > 0), > false)"))) > > +(define_predicate "mve_scatter_memory" > + (and (match_code "mem") > + (match_test "TARGET_HAVE_MVE && REG_P (XEXP (op, 0)) > + && mve_vector_mem_operand (GET_MODE (op), XEXP (op, > 0), > + false)"))) > + > ;; True for immediates in the range of 1 to 16 for MVE. > (define_predicate "mve_imm_16" > (match_test "satisfies_constraint_Rd (op)")) > diff --git > a/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve_vstore_scatter_base.c > b/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve_vstore_scatter_base.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..21b9e12d57e064688e6d52 > 493deffc1c2c39761d > --- /dev/null > +++ > b/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve_vstore_scatter_base.c > @@ -0,0 +1,67 @@ > +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ > +/* { dg-add-options arm_v8_1m_mve_fp } */ > +/* { dg-additional-options "-O2" } */ > + > +#include "arm_mve.h" > + > +int > +foows32(uint32x4_t pDataDest, int32x4_t value, int32_t * ret) > +{ > + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; > + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; > + vstrwq_scatter_base_s32 (pDataDest, 4, value); > + vstrwq_scatter_base_s32 (pDataDest, 132, value); > + vstrwq_scatter_offset_s32 (ret, vecOffs1, (int32x4_t) pDataDest); > + vstrwq_scatter_offset_s32 (ret, vecOffs2, (int32x4_t) pDataDest); > + return 0; > +} > + > +int > +foowu32(uint32x4_t pDataDest, uint32x4_t value, int32_t * ret) > +{ > + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; > + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; > + vstrwq_scatter_base_u32 (pDataDest, 4, value); > + vstrwq_scatter_base_u32 (pDataDest, 132, value); > + vstrwq_scatter_offset_s32 (ret, vecOffs1, (int32x4_t) pDataDest); > + vstrwq_scatter_offset_s32 (ret, vecOffs2, (int32x4_t) pDataDest); > + return 0; > +} > + > +int > +foowf32(uint32x4_t pDataDest, float32x4_t value, int32_t * ret) > +{ > + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; > + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; > + vstrwq_scatter_base_f32 (pDataDest, 4, value); > + vstrwq_scatter_base_f32 (pDataDest, 132, value); > + vstrwq_scatter_offset_s32 (ret, vecOffs1, (int32x4_t) pDataDest); > + vstrwq_scatter_offset_s32 (ret, vecOffs2, (int32x4_t) pDataDest); > + return 0; > +} > + > +int > +foods64(uint64x2_t pDataDest, int64x2_t value, int32_t * ret) > +{ > + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; > + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; > + vstrdq_scatter_base_s64 (pDataDest, 256, value); > + vstrdq_scatter_base_s64 (pDataDest, 512, value); > + vstrwq_scatter_offset_s32 (ret, vecOffs1, (int32x4_t) pDataDest); > + vstrwq_scatter_offset_s32 (ret, vecOffs2, (int32x4_t) pDataDest); > + return 0; > +} > + > +int > +foodu64(uint64x2_t pDataDest, uint64x2_t value, int32_t * ret) > +{ > + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; > + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; > + vstrdq_scatter_base_u64 (pDataDest, 256, value); > + vstrdq_scatter_base_u64 (pDataDest, 512, value); > + vstrwq_scatter_offset_s32 (ret, vecOffs1, (int32x4_t) pDataDest); > + vstrwq_scatter_offset_s32 (ret, vecOffs2, (int32x4_t) pDataDest); > + return 0; > +} > + > +/* { dg-final { scan-assembler-times "vstr\[a-z\]" 20 } } */ > diff --git > a/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve_vstore_scatter_base_p.c > b/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve_vstore_scatter_base_p.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..15c6496732a31259ebcceeb > eb8ac65e071a04b20 > --- /dev/null > +++ > b/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve_vstore_scatter_base_p.c > @@ -0,0 +1,69 @@ > +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ > +/* { dg-add-options arm_v8_1m_mve_fp } */ > +/* { dg-additional-options "-O2" } */ > + > +#include "arm_mve.h" > + > +mve_pred16_t __p; > + > +int > +foows32(uint32x4_t pDataDest, int32x4_t value, int32_t * ret) > +{ > + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; > + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; > + vstrwq_scatter_base_p_s32 (pDataDest, 4, value, __p); > + vstrwq_scatter_base_p_s32 (pDataDest, 132, value, __p); > + vstrwq_scatter_offset_p_s32 (ret, vecOffs1, (int32x4_t) pDataDest, __p); > + vstrwq_scatter_offset_p_s32 (ret, vecOffs2, (int32x4_t) pDataDest, __p); > + return 0; > +} > + > +int > +foowu32(uint32x4_t pDataDest, uint32x4_t value, int32_t * ret) > +{ > + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; > + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; > + vstrwq_scatter_base_p_u32 (pDataDest, 4, value, __p); > + vstrwq_scatter_base_p_u32 (pDataDest, 132, value, __p); > + vstrwq_scatter_offset_p_s32 (ret, vecOffs1, (int32x4_t) pDataDest, __p); > + vstrwq_scatter_offset_p_s32 (ret, vecOffs2, (int32x4_t) pDataDest, __p); > + return 0; > +} > + > +int > +foowf32(uint32x4_t pDataDest, float32x4_t value, int32_t * ret) > +{ > + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; > + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; > + vstrwq_scatter_base_p_f32 (pDataDest, 4, value, __p); > + vstrwq_scatter_base_p_f32 (pDataDest, 132, value, __p); > + vstrwq_scatter_offset_p_s32 (ret, vecOffs1, (int32x4_t) pDataDest, __p); > + vstrwq_scatter_offset_p_s32 (ret, vecOffs2, (int32x4_t) pDataDest, __p); > + return 0; > +} > + > +int > +foods64(uint64x2_t pDataDest, int64x2_t value, int32_t * ret) > +{ > + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; > + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; > + vstrdq_scatter_base_p_s64 (pDataDest, 256, value, __p); > + vstrdq_scatter_base_p_s64 (pDataDest, 512, value, __p); > + vstrwq_scatter_offset_p_s32 (ret, vecOffs1, (int32x4_t) pDataDest, __p); > + vstrwq_scatter_offset_p_s32 (ret, vecOffs2, (int32x4_t) pDataDest, __p); > + return 0; > +} > + > +int > +foodu64(uint64x2_t pDataDest, uint64x2_t value, int32_t * ret) > +{ > + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; > + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; > + vstrdq_scatter_base_p_u64 (pDataDest, 256, value, __p); > + vstrdq_scatter_base_p_u64 (pDataDest, 512, value, __p); > + vstrwq_scatter_offset_p_s32 (ret, vecOffs1, (int32x4_t) pDataDest, __p); > + vstrwq_scatter_offset_p_s32 (ret, vecOffs2, (int32x4_t) pDataDest, __p); > + return 0; > +} > + > +/* { dg-final { scan-assembler-times "vstr\[a-z\]t" 20 } } */ > diff --git > a/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve_vstore_scatter_offset.c > b/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve_vstore_scatter_offset.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..6d123669c13f168e651b7aa > 3344c4324fd4afe50 > --- /dev/null > +++ > b/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve_vstore_scatter_offset.c > @@ -0,0 +1,215 @@ > +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ > +/* { dg-add-options arm_v8_1m_mve_fp } */ > +/* { dg-additional-options "-O2" } */ > + > +#include "arm_mve.h" > + > +int > +foobu8( uint8_t * pDataSrc, uint8_t * pDataDest) > +{ > + const uint8x16_t vecOffs1 = { 0, 3, 6, 1, 4, 7, 2, 5, 9, 11, 13, 10, 12, > 15, 8, > 14}; > + const uint8x16_t vecOffs2 = { 31, 29, 27, 25, 23, 28, 21, 26, 19, 24, > 17, 22, > 16, 20, 18, 30}; > + uint32x4_t vecIn1 = vldrwq_u32 ((uint32_t const *) pDataSrc); > + uint32x4_t vecIn2 = vldrwq_u32 ((uint32_t const *) &pDataSrc[16]); > + vstrbq_scatter_offset_u8 (pDataDest, vecOffs1, (uint8x16_t) vecIn1); > + vstrbq_scatter_offset_u8 (pDataDest, vecOffs2, (uint8x16_t) vecIn2); > + pDataDest[32] = pDataSrc[32]; > + return 0; > +} > + > +int > +foobu16( uint8_t * pDataSrc, uint8_t * pDataDest) > +{ > + const uint16x8_t vecOffs1 = { 0, 3, 6, 1, 4, 7, 2, 5}; > + const uint16x8_t vecOffs2 = { 11, 13, 10, 12, 15, 8, 14, 9}; > + uint32x4_t vecIn1 = vldrwq_u32 ((uint32_t const *) pDataSrc); > + uint32x4_t vecIn2 = vldrwq_u32 ((uint32_t const *) &pDataSrc[8]); > + vstrbq_scatter_offset_u16 (pDataDest, vecOffs1, (uint16x8_t) vecIn1); > + vstrbq_scatter_offset_u16 (pDataDest, vecOffs2, (uint16x8_t) vecIn2); > + pDataDest[16] = pDataSrc[16]; > + return 0; > +} > + > +int > +foobu32( uint8_t * pDataSrc, uint8_t * pDataDest) > +{ > + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; > + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; > + uint32x4_t vecIn1 = vldrwq_u32 ((uint32_t const *) pDataSrc); > + uint32x4_t vecIn2 = vldrwq_u32 ((uint32_t const *) &pDataSrc[4]); > + vstrbq_scatter_offset_u32 (pDataDest, vecOffs1, (uint32x4_t) vecIn1); > + vstrbq_scatter_offset_u32 (pDataDest, vecOffs2, (uint32x4_t) vecIn2); > + pDataDest[8] = pDataSrc[8]; > + return 0; > +} > + > +int > +foobs8( int8_t * pDataSrc, int8_t * pDataDest) > +{ > + const uint8x16_t vecOffs1 = { 0, 3, 6, 1, 4, 7, 2, 5, 9, 11, 13, 10, 12, > 15, 8, > 14}; > + const uint8x16_t vecOffs2 = { 31, 29, 27, 25, 23, 28, 21, 26, 19, 24, > 17, 22, > 16, 20, 18, 30}; > + int32x4_t vecIn1 = vldrwq_s32 ((int32_t const *) pDataSrc); > + int32x4_t vecIn2 = vldrwq_s32 ((int32_t const *) &pDataSrc[16]); > + vstrbq_scatter_offset_s8 (pDataDest, vecOffs1, (int8x16_t) vecIn1); > + vstrbq_scatter_offset_s8 (pDataDest, vecOffs2, (int8x16_t) vecIn2); > + pDataDest[32] = pDataSrc[32]; > + return 0; > +} > + > +int > +foobs16( int8_t * pDataSrc, int8_t * pDataDest) > +{ > + const uint16x8_t vecOffs1 = { 0, 3, 6, 1, 4, 7, 2, 5}; > + const uint16x8_t vecOffs2 = { 11, 13, 10, 12, 15, 8, 14, 9}; > + int32x4_t vecIn1 = vldrwq_s32 ((int32_t const *) pDataSrc); > + int32x4_t vecIn2 = vldrwq_s32 ((int32_t const *) &pDataSrc[8]); > + vstrbq_scatter_offset_s16 (pDataDest, vecOffs1, (int16x8_t) vecIn1); > + vstrbq_scatter_offset_s16 (pDataDest, vecOffs2, (int16x8_t) vecIn2); > + pDataDest[16] = pDataSrc[16]; > + return 0; > +} > + > +int > +foobs32( uint8_t * pDataSrc, int8_t * pDataDest) > +{ > + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; > + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; > + int32x4_t vecIn1 = vldrwq_s32 ((int32_t const *) pDataSrc); > + int32x4_t vecIn2 = vldrwq_s32 ((int32_t const *) &pDataSrc[4]); > + vstrbq_scatter_offset_s32 (pDataDest, vecOffs1, (int32x4_t) vecIn1); > + vstrbq_scatter_offset_s32 (pDataDest, vecOffs2, (int32x4_t) vecIn2); > + pDataDest[8] = pDataSrc[8]; > + return 0; > +} > + > +int > +foohu16( uint16_t * pDataSrc, uint16_t * pDataDest) > +{ > + const uint16x8_t vecOffs1 = { 0, 3, 6, 1, 4, 7, 2, 5}; > + const uint16x8_t vecOffs2 = { 11, 13, 10, 12, 15, 8, 14, 9}; > + uint32x4_t vecIn1 = vldrwq_u32 ((uint32_t const *) pDataSrc); > + uint32x4_t vecIn2 = vldrwq_u32 ((uint32_t const *) &pDataSrc[8]); > + vstrhq_scatter_offset_u16 (pDataDest, vecOffs1, (uint16x8_t) vecIn1); > + vstrhq_scatter_offset_u16 (pDataDest, vecOffs2, (uint16x8_t) vecIn2); > + pDataDest[16] = pDataSrc[16]; > + return 0; > +} > + > +int > +foohu32( uint16_t * pDataSrc, uint16_t * pDataDest) > +{ > + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; > + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; > + uint32x4_t vecIn1 = vldrwq_u32 ((uint32_t const *) pDataSrc); > + uint32x4_t vecIn2 = vldrwq_u32 ((uint32_t const *) &pDataSrc[4]); > + vstrhq_scatter_offset_u32 (pDataDest, vecOffs1, (uint32x4_t) vecIn1); > + vstrhq_scatter_offset_u32 (pDataDest, vecOffs2, (uint32x4_t) vecIn2); > + pDataDest[8] = pDataSrc[8]; > + return 0; > +} > + > +int > +foohs16( int16_t * pDataSrc, int16_t * pDataDest) > +{ > + const uint16x8_t vecOffs1 = { 0, 3, 6, 1, 4, 7, 2, 5}; > + const uint16x8_t vecOffs2 = { 11, 13, 10, 12, 15, 8, 14, 9}; > + int32x4_t vecIn1 = vldrwq_s32 ((int32_t const *) pDataSrc); > + int32x4_t vecIn2 = vldrwq_s32 ((int32_t const *) &pDataSrc[8]); > + vstrhq_scatter_offset_s16 (pDataDest, vecOffs1, (int16x8_t) vecIn1); > + vstrhq_scatter_offset_s16 (pDataDest, vecOffs2, (int16x8_t) vecIn2); > + pDataDest[16] = pDataSrc[16]; > + return 0; > +} > + > +int > +foohs32( uint16_t * pDataSrc, int16_t * pDataDest) > +{ > + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; > + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; > + int32x4_t vecIn1 = vldrwq_s32 ((int32_t const *) pDataSrc); > + int32x4_t vecIn2 = vldrwq_s32 ((int32_t const *) &pDataSrc[4]); > + vstrhq_scatter_offset_s32 (pDataDest, vecOffs1, (int32x4_t) vecIn1); > + vstrhq_scatter_offset_s32 (pDataDest, vecOffs2, (int32x4_t) vecIn2); > + pDataDest[8] = pDataSrc[8]; > + return 0; > +} > + > +int > +foohf16( float16_t * pDataSrc, float16_t * pDataDest) > +{ > + const uint16x8_t vecOffs1 = { 0, 3, 6, 1, 4, 7, 2, 5}; > + const uint16x8_t vecOffs2 = { 11, 13, 10, 12, 15, 8, 14, 9}; > + uint32x4_t vecIn1 = vldrwq_u32 ((uint32_t const *) pDataSrc); > + uint32x4_t vecIn2 = vldrwq_u32 ((uint32_t const *) &pDataSrc[8]); > + vstrhq_scatter_offset_f16 (pDataDest, vecOffs1, (float16x8_t) vecIn1); > + vstrhq_scatter_offset_f16 (pDataDest, vecOffs2, (float16x8_t) vecIn2); > + pDataDest[16] = pDataSrc[16]; > + return 0; > +} > + > +int > +foowu32( uint32_t * pDataSrc, uint32_t * pDataDest) > +{ > + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; > + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; > + uint32x4_t vecIn1 = vldrwq_u32 ((uint32_t const *) pDataSrc); > + uint32x4_t vecIn2 = vldrwq_u32 ((uint32_t const *) &pDataSrc[4]); > + vstrwq_scatter_offset_u32 (pDataDest, vecOffs1, (uint32x4_t) vecIn1); > + vstrwq_scatter_offset_u32 (pDataDest, vecOffs2, (uint32x4_t) vecIn2); > + pDataDest[8] = pDataSrc[8]; > + return 0; > +} > + > +int > +foows32( int32_t * pDataSrc, int32_t * pDataDest) > +{ > + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; > + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; > + uint32x4_t vecIn1 = vldrwq_u32 ((uint32_t const *) pDataSrc); > + uint32x4_t vecIn2 = vldrwq_u32 ((uint32_t const *) &pDataSrc[4]); > + vstrwq_scatter_offset_s32 (pDataDest, vecOffs1, (int32x4_t) vecIn1); > + vstrwq_scatter_offset_s32 (pDataDest, vecOffs2, (int32x4_t) vecIn2); > + pDataDest[8] = pDataSrc[8]; > + return 0; > +} > + > +int > +foowf32( float32_t * pDataSrc, float32_t * pDataDest) > +{ > + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; > + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; > + uint32x4_t vecIn1 = vldrwq_u32 ((uint32_t const *) pDataSrc); > + uint32x4_t vecIn2 = vldrwq_u32 ((uint32_t const *) &pDataSrc[8]); > + vstrwq_scatter_offset_f32 (pDataDest, vecOffs1, (float32x4_t) vecIn1); > + vstrwq_scatter_offset_f32 (pDataDest, vecOffs2, (float32x4_t) vecIn2); > + pDataDest[8] = pDataSrc[8]; > + return 0; > +} > + > +int > +foowu64( uint64_t * pDataSrc, uint64_t * pDataDest) > +{ > + const uint64x2_t vecOffs1 = { 0, 3}; > + const uint64x2_t vecOffs2 = { 1, 2}; > + uint32x4_t vecIn1 = vldrwq_u32 ((uint32_t const *) pDataSrc); > + uint32x4_t vecIn2 = vldrwq_u32 ((uint32_t const *) &pDataSrc[2]); > + vstrdq_scatter_offset_u64 (pDataDest, vecOffs1, (uint64x2_t) vecIn1); > + vstrdq_scatter_offset_u64 (pDataDest, vecOffs2, (uint64x2_t) vecIn2); > + pDataDest[4] = pDataSrc[4]; > + return 0; > +} > + > +int > +foows64( int64_t * pDataSrc, int64_t * pDataDest) > +{ > + const uint64x2_t vecOffs1 = { 0, 3}; > + const uint64x2_t vecOffs2 = { 1, 2}; > + uint32x4_t vecIn1 = vldrwq_u32 ((uint32_t const *) pDataSrc); > + uint32x4_t vecIn2 = vldrwq_u32 ((uint32_t const *) &pDataSrc[2]); > + vstrdq_scatter_offset_s64 (pDataDest, vecOffs1, (int64x2_t) vecIn1); > + vstrdq_scatter_offset_s64 (pDataDest, vecOffs2, (int64x2_t) vecIn2); > + pDataDest[4] = pDataSrc[4]; > + return 0; > +} > + > +/* { dg-final { scan-assembler-times "vstr\[a-z\]" 32 } } */ > diff --git > a/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve_vstore_scatter_offset_p. > c > b/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve_vstore_scatter_offset_p. > c > new file mode 100644 > index > 0000000000000000000000000000000000000000..cd2e1ee80f9dfe35955468a > 822bd202679039831 > --- /dev/null > +++ > b/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve_vstore_scatter_offset_p. > c > @@ -0,0 +1,216 @@ > +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ > +/* { dg-add-options arm_v8_1m_mve_fp } */ > +/* { dg-additional-options "-O2" } */ > + > +#include "arm_mve.h" > + > +mve_pred16_t __p; > +int > +foobu8( uint8_t * pDataSrc, uint8_t * pDataDest) > +{ > + const uint8x16_t vecOffs1 = { 0, 3, 6, 1, 4, 7, 2, 5, 9, 11, 13, 10, 12, > 15, 8, > 14}; > + const uint8x16_t vecOffs2 = { 31, 29, 27, 25, 23, 28, 21, 26, 19, 24, > 17, 22, > 16, 20, 18, 30}; > + uint32x4_t vecIn1 = vldrwq_u32 ((uint32_t const *) pDataSrc); > + uint32x4_t vecIn2 = vldrwq_u32 ((uint32_t const *) &pDataSrc[16]); > + vstrbq_scatter_offset_p_u8(pDataDest, vecOffs1, (uint8x16_t) vecIn1, > __p); > + vstrbq_scatter_offset_p_u8(pDataDest, vecOffs2, (uint8x16_t) vecIn2, > __p); > + pDataDest[32] = pDataSrc[32]; > + return 0; > +} > + > +int > +foobu16( uint8_t * pDataSrc, uint8_t * pDataDest) > +{ > + const uint16x8_t vecOffs1 = { 0, 3, 6, 1, 4, 7, 2, 5}; > + const uint16x8_t vecOffs2 = { 11, 13, 10, 12, 15, 8, 14, 9}; > + uint32x4_t vecIn1 = vldrwq_u32 ((uint32_t const *) pDataSrc); > + uint32x4_t vecIn2 = vldrwq_u32 ((uint32_t const *) &pDataSrc[8]); > + vstrbq_scatter_offset_p_u16 (pDataDest, vecOffs1, (uint16x8_t) vecIn1, > __p); > + vstrbq_scatter_offset_p_u16 (pDataDest, vecOffs2, (uint16x8_t) vecIn2, > __p); > + pDataDest[16] = pDataSrc[16]; > + return 0; > +} > + > +int > +foobu32( uint8_t * pDataSrc, uint8_t * pDataDest) > +{ > + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; > + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; > + uint32x4_t vecIn1 = vldrwq_u32 ((uint32_t const *) pDataSrc); > + uint32x4_t vecIn2 = vldrwq_u32 ((uint32_t const *) &pDataSrc[4]); > + vstrbq_scatter_offset_p_u32 (pDataDest, vecOffs1, (uint32x4_t) vecIn1, > __p); > + vstrbq_scatter_offset_p_u32 (pDataDest, vecOffs2, (uint32x4_t) vecIn2, > __p); > + pDataDest[8] = pDataSrc[8]; > + return 0; > +} > + > +int > +foobs8( int8_t * pDataSrc, int8_t * pDataDest) > +{ > + const uint8x16_t vecOffs1 = { 0, 3, 6, 1, 4, 7, 2, 5, 9, 11, 13, 10, 12, > 15, 8, > 14}; > + const uint8x16_t vecOffs2 = { 31, 29, 27, 25, 23, 28, 21, 26, 19, 24, > 17, 22, > 16, 20, 18, 30}; > + int32x4_t vecIn1 = vldrwq_s32 ((int32_t const *) pDataSrc); > + int32x4_t vecIn2 = vldrwq_s32 ((int32_t const *) &pDataSrc[16]); > + vstrbq_scatter_offset_p_s8 (pDataDest, vecOffs1, (int8x16_t) vecIn1, > __p); > + vstrbq_scatter_offset_p_s8 (pDataDest, vecOffs2, (int8x16_t) vecIn2, > __p); > + pDataDest[32] = pDataSrc[32]; > + return 0; > +} > + > +int > +foobs16( int8_t * pDataSrc, int8_t * pDataDest) > +{ > + const uint16x8_t vecOffs1 = { 0, 3, 6, 1, 4, 7, 2, 5}; > + const uint16x8_t vecOffs2 = { 11, 13, 10, 12, 15, 8, 14, 9}; > + int32x4_t vecIn1 = vldrwq_s32 ((int32_t const *) pDataSrc); > + int32x4_t vecIn2 = vldrwq_s32 ((int32_t const *) &pDataSrc[8]); > + vstrbq_scatter_offset_p_s16 (pDataDest, vecOffs1, (int16x8_t) vecIn1, > __p); > + vstrbq_scatter_offset_p_s16 (pDataDest, vecOffs2, (int16x8_t) vecIn2, > __p); > + pDataDest[16] = pDataSrc[16]; > + return 0; > +} > + > +int > +foobs32( uint8_t * pDataSrc, int8_t * pDataDest) > +{ > + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; > + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; > + int32x4_t vecIn1 = vldrwq_s32 ((int32_t const *) pDataSrc); > + int32x4_t vecIn2 = vldrwq_s32 ((int32_t const *) &pDataSrc[4]); > + vstrbq_scatter_offset_p_s32 (pDataDest, vecOffs1, (int32x4_t) vecIn1, > __p); > + vstrbq_scatter_offset_p_s32 (pDataDest, vecOffs2, (int32x4_t) vecIn2, > __p); > + pDataDest[8] = pDataSrc[8]; > + return 0; > +} > + > +int > +foohu16( uint16_t * pDataSrc, uint16_t * pDataDest) > +{ > + const uint16x8_t vecOffs1 = { 0, 3, 6, 1, 4, 7, 2, 5}; > + const uint16x8_t vecOffs2 = { 11, 13, 10, 12, 15, 8, 14, 9}; > + uint32x4_t vecIn1 = vldrwq_u32 ((uint32_t const *) pDataSrc); > + uint32x4_t vecIn2 = vldrwq_u32 ((uint32_t const *) &pDataSrc[8]); > + vstrhq_scatter_offset_p_u16 (pDataDest, vecOffs1, (uint16x8_t) vecIn1, > __p); > + vstrhq_scatter_offset_p_u16 (pDataDest, vecOffs2, (uint16x8_t) vecIn2, > __p); > + pDataDest[16] = pDataSrc[16]; > + return 0; > +} > + > +int > +foohu32( uint16_t * pDataSrc, uint16_t * pDataDest) > +{ > + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; > + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; > + uint32x4_t vecIn1 = vldrwq_u32 ((uint32_t const *) pDataSrc); > + uint32x4_t vecIn2 = vldrwq_u32 ((uint32_t const *) &pDataSrc[4]); > + vstrhq_scatter_offset_p_u32 (pDataDest, vecOffs1, (uint32x4_t) vecIn1, > __p); > + vstrhq_scatter_offset_p_u32 (pDataDest, vecOffs2, (uint32x4_t) vecIn2, > __p); > + pDataDest[8] = pDataSrc[8]; > + return 0; > +} > + > +int > +foohs16( int16_t * pDataSrc, int16_t * pDataDest) > +{ > + const uint16x8_t vecOffs1 = { 0, 3, 6, 1, 4, 7, 2, 5}; > + const uint16x8_t vecOffs2 = { 11, 13, 10, 12, 15, 8, 14, 9}; > + int32x4_t vecIn1 = vldrwq_s32 ((int32_t const *) pDataSrc); > + int32x4_t vecIn2 = vldrwq_s32 ((int32_t const *) &pDataSrc[8]); > + vstrhq_scatter_offset_p_s16 (pDataDest, vecOffs1, (int16x8_t) vecIn1, > __p); > + vstrhq_scatter_offset_p_s16 (pDataDest, vecOffs2, (int16x8_t) vecIn2, > __p); > + pDataDest[16] = pDataSrc[16]; > + return 0; > +} > + > +int > +foohs32( uint16_t * pDataSrc, int16_t * pDataDest) > +{ > + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; > + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; > + int32x4_t vecIn1 = vldrwq_s32 ((int32_t const *) pDataSrc); > + int32x4_t vecIn2 = vldrwq_s32 ((int32_t const *) &pDataSrc[4]); > + vstrhq_scatter_offset_p_s32 (pDataDest, vecOffs1, (int32x4_t) vecIn1, > __p); > + vstrhq_scatter_offset_p_s32 (pDataDest, vecOffs2, (int32x4_t) vecIn2, > __p); > + pDataDest[8] = pDataSrc[8]; > + return 0; > +} > + > +int > +foohf16( float16_t * pDataSrc, float16_t * pDataDest) > +{ > + const uint16x8_t vecOffs1 = { 0, 3, 6, 1, 4, 7, 2, 5}; > + const uint16x8_t vecOffs2 = { 11, 13, 10, 12, 15, 8, 14, 9}; > + uint32x4_t vecIn1 = vldrwq_u32 ((uint32_t const *) pDataSrc); > + uint32x4_t vecIn2 = vldrwq_u32 ((uint32_t const *) &pDataSrc[8]); > + vstrhq_scatter_offset_p_f16 (pDataDest, vecOffs1, (float16x8_t) vecIn1, > __p); > + vstrhq_scatter_offset_p_f16 (pDataDest, vecOffs2, (float16x8_t) vecIn2, > __p); > + pDataDest[16] = pDataSrc[16]; > + return 0; > +} > + > +int > +foowu32( uint32_t * pDataSrc, uint32_t * pDataDest) > +{ > + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; > + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; > + uint32x4_t vecIn1 = vldrwq_u32 ((uint32_t const *) pDataSrc); > + uint32x4_t vecIn2 = vldrwq_u32 ((uint32_t const *) &pDataSrc[4]); > + vstrwq_scatter_offset_p_u32 (pDataDest, vecOffs1, (uint32x4_t) vecIn1, > __p); > + vstrwq_scatter_offset_p_u32 (pDataDest, vecOffs2, (uint32x4_t) vecIn2, > __p); > + pDataDest[8] = pDataSrc[8]; > + return 0; > +} > + > +int > +foows32( int32_t * pDataSrc, int32_t * pDataDest) > +{ > + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; > + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; > + uint32x4_t vecIn1 = vldrwq_u32 ((uint32_t const *) pDataSrc); > + uint32x4_t vecIn2 = vldrwq_u32 ((uint32_t const *) &pDataSrc[4]); > + vstrwq_scatter_offset_p_s32 (pDataDest, vecOffs1, (int32x4_t) vecIn1, > __p); > + vstrwq_scatter_offset_p_s32 (pDataDest, vecOffs2, (int32x4_t) vecIn2, > __p); > + pDataDest[8] = pDataSrc[8]; > + return 0; > +} > + > +int > +foowf32( float32_t * pDataSrc, float32_t * pDataDest) > +{ > + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; > + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; > + uint32x4_t vecIn1 = vldrwq_u32 ((uint32_t const *) pDataSrc); > + uint32x4_t vecIn2 = vldrwq_u32 ((uint32_t const *) &pDataSrc[8]); > + vstrwq_scatter_offset_p_f32 (pDataDest, vecOffs1, (float32x4_t) vecIn1, > __p); > + vstrwq_scatter_offset_p_f32 (pDataDest, vecOffs2, (float32x4_t) vecIn2, > __p); > + pDataDest[8] = pDataSrc[8]; > + return 0; > +} > + > +int > +foowu64( uint64_t * pDataSrc, uint64_t * pDataDest) > +{ > + const uint64x2_t vecOffs1 = { 0, 3}; > + const uint64x2_t vecOffs2 = { 1, 2}; > + uint32x4_t vecIn1 = vldrwq_u32 ((uint32_t const *) pDataSrc); > + uint32x4_t vecIn2 = vldrwq_u32 ((uint32_t const *) &pDataSrc[2]); > + vstrdq_scatter_offset_p_u64 (pDataDest, vecOffs1, (uint64x2_t) vecIn1, > __p); > + vstrdq_scatter_offset_p_u64 (pDataDest, vecOffs2, (uint64x2_t) vecIn2, > __p); > + pDataDest[4] = pDataSrc[4]; > + return 0; > +} > + > +int > +foows64( int64_t * pDataSrc, int64_t * pDataDest) > +{ > + const uint64x2_t vecOffs1 = { 0, 3}; > + const uint64x2_t vecOffs2 = { 1, 2}; > + uint32x4_t vecIn1 = vldrwq_u32 ((uint32_t const *) pDataSrc); > + uint32x4_t vecIn2 = vldrwq_u32 ((uint32_t const *) &pDataSrc[2]); > + vstrdq_scatter_offset_p_s64 (pDataDest, vecOffs1, (int64x2_t) vecIn1, > __p); > + vstrdq_scatter_offset_p_s64 (pDataDest, vecOffs2, (int64x2_t) vecIn2, > __p); > + pDataDest[4] = pDataSrc[4]; > + return 0; > +} > + > +/* { dg-final { scan-assembler-times "vstr\[a-z\]t" 32 } } */ > diff --git > a/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve_vstore_scatter_shifted_o > ffset.c > b/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve_vstore_scatter_shifted_o > ffset.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..62dfb450a6d30312472f5c8 > bb2d41e98fe6b6a32 > --- /dev/null > +++ > b/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve_vstore_scatter_shifted_o > ffset.c > @@ -0,0 +1,141 @@ > +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ > +/* { dg-add-options arm_v8_1m_mve_fp } */ > +/* { dg-additional-options "-O2" } */ > + > +#include "arm_mve.h" > + > +int > +foowu32( uint32_t * pDataSrc, uint32_t * pDataDest) > +{ > + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; > + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; > + uint32x4_t vecIn1 = vldrwq_u32 ((uint32_t const *) pDataSrc); > + uint32x4_t vecIn2 = vldrwq_u32 ((uint32_t const *) &pDataSrc[4]); > + vstrwq_scatter_shifted_offset_u32 (pDataDest, vecOffs1, vecIn1); > + vstrwq_scatter_shifted_offset_u32 (pDataDest, vecOffs2, vecIn2); > + pDataDest[8] = pDataSrc[8]; > + return 0; > +} > + > +int > +foowf32( float32_t * pDataSrc, float32_t * pDataDest) > +{ > + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; > + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; > + float32x4_t vecIn1 = vldrwq_f32 ((float32_t const *) pDataSrc); > + float32x4_t vecIn2 = vldrwq_f32 ((float32_t const *) &pDataSrc[4]); > + vstrwq_scatter_shifted_offset_f32 (pDataDest, vecOffs1, vecIn1); > + vstrwq_scatter_shifted_offset_f32 (pDataDest, vecOffs2, vecIn2); > + pDataDest[8] = pDataSrc[8]; > + return 0; > +} > + > +int > +foohu16( uint16_t * pDataSrc, uint16_t * pDataDest) > +{ > + const uint16x8_t vecOffs1 = { 0, 3, 6, 1, 4, 7, 2, 5}; > + const uint16x8_t vecOffs2 = { 9, 11, 13, 10, 12, 15, 8, 14}; > + uint16x8_t vecIn1 = vldrhq_u16 ((uint16_t const *) pDataSrc); > + uint16x8_t vecIn2 = vldrhq_u16 ((uint16_t const *) &pDataSrc[8]); > + vstrhq_scatter_shifted_offset_u16 (pDataDest, vecOffs1, vecIn1); > + vstrhq_scatter_shifted_offset_u16 (pDataDest, vecOffs2, vecIn2); > + pDataDest[16] = pDataSrc[16]; > + return 0; > +} > + > +int > +foohu32( uint32_t * pDataSrc, uint32_t * pDataDest) > +{ > + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; > + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; > + uint32x4_t vecIn1 = vldrhq_u32 ((uint16_t const *) pDataSrc); > + uint32x4_t vecIn2 = vldrhq_u32 ((uint16_t const *) &pDataSrc[4]); > + vstrhq_scatter_shifted_offset_u32 ((uint16_t *)pDataDest, vecOffs1, > vecIn1); > + vstrhq_scatter_shifted_offset_u32 ((uint16_t *)pDataDest, vecOffs2, > vecIn2); > + pDataDest[8] = pDataSrc[8]; > + return 0; > +} > + > +int > +foohf16( float16_t * pDataSrc, float16_t * pDataDest) > +{ > + const uint16x8_t vecOffs1 = { 0, 3, 6, 1, 4, 7, 2, 5}; > + const uint16x8_t vecOffs2 = { 9, 11, 13, 10, 12, 15, 8, 14}; > + float16x8_t vecIn1 = vldrhq_f16 ((float16_t const *) pDataSrc); > + float16x8_t vecIn2 = vldrhq_f16 ((float16_t const *) &pDataSrc[8]); > + vstrhq_scatter_shifted_offset_f16 (pDataDest, vecOffs1, vecIn1); > + vstrhq_scatter_shifted_offset_f16 (pDataDest, vecOffs2, vecIn2); > + pDataDest[16] = pDataSrc[16]; > + return 0; > +} > + > +int > +foodu64( uint64_t * pDataSrc, uint64_t * pDataDest) > +{ > + const uint64x2_t vecOffs1 = { 0, 1}; > + const uint64x2_t vecOffs2 = { 2, 3}; > + uint32x4_t vecIn1 = vldrwq_u32 ((uint32_t const *) pDataSrc); > + uint32x4_t vecIn2 = vldrwq_u32 ((uint32_t const *) &pDataSrc[2]); > + > + vstrdq_scatter_shifted_offset_u64 (pDataDest, vecOffs1, (uint64x2_t) > vecIn1); > + vstrdq_scatter_shifted_offset_u64 (pDataDest, vecOffs2, (uint64x2_t) > vecIn2); > + > + pDataDest[2] = pDataSrc[2]; > + return 0; > +} > + > +int > +foows32( int32_t * pDataSrc, int32_t * pDataDest) > +{ > + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; > + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; > + int32x4_t vecIn1 = vldrwq_s32 ((int32_t const *) pDataSrc); > + int32x4_t vecIn2 = vldrwq_s32 ((int32_t const *) &pDataSrc[4]); > + vstrwq_scatter_shifted_offset_s32 (pDataDest, vecOffs1, vecIn1); > + vstrwq_scatter_shifted_offset_s32 (pDataDest, vecOffs2, vecIn2); > + pDataDest[8] = pDataSrc[8]; > + return 0; > +} > + > +int > +foohs16( int16_t * pDataSrc, int16_t * pDataDest) > +{ > + const uint16x8_t vecOffs1 = { 0, 3, 6, 1, 4, 7, 2, 5}; > + const uint16x8_t vecOffs2 = { 9, 11, 13, 10, 12, 15, 8, 14}; > + int16x8_t vecIn1 = vldrhq_s16 ((int16_t const *) pDataSrc); > + int16x8_t vecIn2 = vldrhq_s16 ((int16_t const *) &pDataSrc[8]); > + vstrhq_scatter_shifted_offset_s16 (pDataDest, vecOffs1, vecIn1); > + vstrhq_scatter_shifted_offset_s16 (pDataDest, vecOffs2, vecIn2); > + pDataDest[16] = pDataSrc[16]; > + return 0; > +} > + > +int > +foohs32( int32_t * pDataSrc, int32_t * pDataDest) > +{ > + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; > + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; > + int32x4_t vecIn1 = vldrhq_s32 ((int16_t const *) pDataSrc); > + int32x4_t vecIn2 = vldrhq_s32 ((int16_t const *) &pDataSrc[4]); > + vstrhq_scatter_shifted_offset_s32 ((int16_t *)pDataDest, vecOffs1, > vecIn1); > + vstrhq_scatter_shifted_offset_s32 ((int16_t *)pDataDest, vecOffs2, > vecIn2); > + pDataDest[8] = pDataSrc[8]; > + return 0; > +} > + > +int > +foods64( int64_t * pDataSrc, int64_t * pDataDest) > +{ > + const uint64x2_t vecOffs1 = { 0, 1}; > + const uint64x2_t vecOffs2 = { 2, 3}; > + int32x4_t vecIn1 = vldrwq_s32 ((int32_t const *) pDataSrc); > + int32x4_t vecIn2 = vldrwq_s32 ((int32_t const *) &pDataSrc[2]); > + > + vstrdq_scatter_shifted_offset_s64 (pDataDest, vecOffs1, (int64x2_t) > vecIn1); > + vstrdq_scatter_shifted_offset_s64 (pDataDest, vecOffs2, (int64x2_t) > vecIn2); > + > + pDataDest[2] = pDataSrc[2]; > + return 0; > +} > + > +/* { dg-final { scan-assembler-times "vstr\[a-z\]" 20 } } */ > diff --git > a/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve_vstore_scatter_shifted_o > ffset_p.c > b/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve_vstore_scatter_shifted_o > ffset_p.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..a51d3a211672e74e99f571e > f362445d13f2e2368 > --- /dev/null > +++ > b/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve_vstore_scatter_shifted_o > ffset_p.c > @@ -0,0 +1,142 @@ > +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ > +/* { dg-add-options arm_v8_1m_mve_fp } */ > +/* { dg-additional-options "-O2" } */ > + > +#include "arm_mve.h" > + > +mve_pred16_t __p; > +int > +foowu32( uint32_t * pDataSrc, uint32_t * pDataDest) > +{ > + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; > + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; > + uint32x4_t vecIn1 = vldrwq_z_u32 ((uint32_t const *) pDataSrc, __p); > + uint32x4_t vecIn2 = vldrwq_z_u32 ((uint32_t const *) &pDataSrc[4], __p); > + vstrwq_scatter_shifted_offset_p_u32 (pDataDest, vecOffs1, vecIn1, __p); > + vstrwq_scatter_shifted_offset_p_u32 (pDataDest, vecOffs2, vecIn2, __p); > + pDataDest[8] = pDataSrc[8]; > + return 0; > +} > + > +int > +foowf32( float32_t * pDataSrc, float32_t * pDataDest) > +{ > + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; > + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; > + float32x4_t vecIn1 = vldrwq_z_f32 ((float32_t const *) pDataSrc, __p); > + float32x4_t vecIn2 = vldrwq_z_f32 ((float32_t const *) &pDataSrc[4], > __p); > + vstrwq_scatter_shifted_offset_p_f32 (pDataDest, vecOffs1, vecIn1, __p); > + vstrwq_scatter_shifted_offset_p_f32 (pDataDest, vecOffs2, vecIn2, __p); > + pDataDest[8] = pDataSrc[8]; > + return 0; > +} > + > +int > +foohu16( uint16_t * pDataSrc, uint16_t * pDataDest) > +{ > + const uint16x8_t vecOffs1 = { 0, 3, 6, 1, 4, 7, 2, 5}; > + const uint16x8_t vecOffs2 = { 9, 11, 13, 10, 12, 15, 8, 14}; > + uint16x8_t vecIn1 = vldrhq_z_u16 ((uint16_t const *) pDataSrc, __p); > + uint16x8_t vecIn2 = vldrhq_z_u16 ((uint16_t const *) &pDataSrc[8], __p); > + vstrhq_scatter_shifted_offset_p_u16 (pDataDest, vecOffs1, vecIn1, __p); > + vstrhq_scatter_shifted_offset_p_u16 (pDataDest, vecOffs2, vecIn2, __p); > + pDataDest[16] = pDataSrc[16]; > + return 0; > +} > + > +int > +foohu32( uint32_t * pDataSrc, uint32_t * pDataDest) > +{ > + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; > + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; > + uint32x4_t vecIn1 = vldrhq_z_u32 ((uint16_t const *) pDataSrc, __p); > + uint32x4_t vecIn2 = vldrhq_z_u32 ((uint16_t const *) &pDataSrc[4], __p); > + vstrhq_scatter_shifted_offset_p_u32 ((uint16_t *)pDataDest, vecOffs1, > vecIn1, __p); > + vstrhq_scatter_shifted_offset_p_u32 ((uint16_t *)pDataDest, vecOffs2, > vecIn2, __p); > + pDataDest[8] = pDataSrc[8]; > + return 0; > +} > + > +int > +foohf16( float16_t * pDataSrc, float16_t * pDataDest) > +{ > + const uint16x8_t vecOffs1 = { 0, 3, 6, 1, 4, 7, 2, 5}; > + const uint16x8_t vecOffs2 = { 9, 11, 13, 10, 12, 15, 8, 14}; > + float16x8_t vecIn1 = vldrhq_z_f16 ((float16_t const *) pDataSrc, __p); > + float16x8_t vecIn2 = vldrhq_z_f16 ((float16_t const *) &pDataSrc[8], > __p); > + vstrhq_scatter_shifted_offset_p_f16 (pDataDest, vecOffs1, vecIn1, __p); > + vstrhq_scatter_shifted_offset_p_f16 (pDataDest, vecOffs2, vecIn2, __p); > + pDataDest[16] = pDataSrc[16]; > + return 0; > +} > + > +int > +foodu64( uint64_t * pDataSrc, uint64_t * pDataDest) > +{ > + const uint64x2_t vecOffs1 = { 0, 1}; > + const uint64x2_t vecOffs2 = { 2, 3}; > + uint32x4_t vecIn1 = vldrwq_z_u32 ((uint32_t const *) pDataSrc, __p); > + uint32x4_t vecIn2 = vldrwq_z_u32 ((uint32_t const *) &pDataSrc[2], __p); > + > + vstrdq_scatter_shifted_offset_p_u64 (pDataDest, vecOffs1, (uint64x2_t) > vecIn1, __p); > + vstrdq_scatter_shifted_offset_p_u64 (pDataDest, vecOffs2, (uint64x2_t) > vecIn2, __p); > + > + pDataDest[2] = pDataSrc[2]; > + return 0; > +} > + > +int > +foows32( int32_t * pDataSrc, int32_t * pDataDest) > +{ > + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; > + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; > + int32x4_t vecIn1 = vldrwq_z_s32 ((int32_t const *) pDataSrc, __p); > + int32x4_t vecIn2 = vldrwq_z_s32 ((int32_t const *) &pDataSrc[4], __p); > + vstrwq_scatter_shifted_offset_p_s32 (pDataDest, vecOffs1, vecIn1, __p); > + vstrwq_scatter_shifted_offset_p_s32 (pDataDest, vecOffs2, vecIn2, __p); > + pDataDest[8] = pDataSrc[8]; > + return 0; > +} > + > +int > +foohs16( int16_t * pDataSrc, int16_t * pDataDest) > +{ > + const uint16x8_t vecOffs1 = { 0, 3, 6, 1, 4, 7, 2, 5}; > + const uint16x8_t vecOffs2 = { 9, 11, 13, 10, 12, 15, 8, 14}; > + int16x8_t vecIn1 = vldrhq_z_s16 ((int16_t const *) pDataSrc, __p); > + int16x8_t vecIn2 = vldrhq_z_s16 ((int16_t const *) &pDataSrc[8], __p); > + vstrhq_scatter_shifted_offset_p_s16 (pDataDest, vecOffs1, vecIn1, __p); > + vstrhq_scatter_shifted_offset_p_s16 (pDataDest, vecOffs2, vecIn2, __p); > + pDataDest[16] = pDataSrc[16]; > + return 0; > +} > + > +int > +foohs32( int32_t * pDataSrc, int32_t * pDataDest) > +{ > + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; > + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; > + int32x4_t vecIn1 = vldrhq_z_s32 ((int16_t const *) pDataSrc, __p); > + int32x4_t vecIn2 = vldrhq_z_s32 ((int16_t const *) &pDataSrc[4], __p); > + vstrhq_scatter_shifted_offset_p_s32 ((int16_t *)pDataDest, vecOffs1, > vecIn1, __p); > + vstrhq_scatter_shifted_offset_p_s32 ((int16_t *)pDataDest, vecOffs2, > vecIn2, __p); > + pDataDest[8] = pDataSrc[8]; > + return 0; > +} > + > +int > +foods64( int64_t * pDataSrc, int64_t * pDataDest) > +{ > + const uint64x2_t vecOffs1 = { 0, 1}; > + const uint64x2_t vecOffs2 = { 2, 3}; > + int32x4_t vecIn1 = vldrwq_z_s32 ((int32_t const *) pDataSrc, __p); > + int32x4_t vecIn2 = vldrwq_z_s32 ((int32_t const *) &pDataSrc[2], __p); > + > + vstrdq_scatter_shifted_offset_p_s64 (pDataDest, vecOffs1, (int64x2_t) > vecIn1, __p); > + vstrdq_scatter_shifted_offset_p_s64 (pDataDest, vecOffs2, (int64x2_t) > vecIn2, __p); > + > + pDataDest[2] = pDataSrc[2]; > + return 0; > +} > + > +/* { dg-final { scan-assembler-times "vstr\[a-z\]t" 20 } } */