On 16 May 2012 14:51, Christophe Lyon <christophe.l...@st.com> wrote: > On 11.05.2012 16:48, Ramana Radhakrishnan wrote: >> >> I would change the iterator from VQX to VQ in the pattern above (you >> can also simplify the setting of neon_type in that case as well as >> change that to be a vec_duplicate as below and get rid of any >> lingering definitions of UNSPEC_VLD1_DUP if they exist), define a >> separate pattern that expressed this as a define_insn_and_split as >> below. >> >> (define_insn_and_split "neon_vld1_dupv2di" >> [(set (match_operand:V2DI 0 "s_register_operand" "=w") >> (vec_duplicate:V2DI (match_operand:DI 1 "neon_struct_operand" >> "Um")))] >> "TARGET_NEON" >> "#" >> "&& reload_completed" >> [(const_int 0)] >> { >> rtx tmprtx = gen_lowpart (DImode, operands[0]); >> emit_insn (gen_neon_vld1_dupdi (tmprtx, operands[1])); >> emit_move_insn (gen_highpart (DImode, operands[0]), tmprtx ); >> DONE; >> } >> (set_attr "length" "8") >> (set_attr "neon_type" "<fromearlierpattern">) >> ) >> >> Do you want to try this and see what you get ? > > > Thanks for this example and suggestion, it does work. > > >> I'd rather have an extra regression test in gcc.target/arm that was a run >> time test. for e.g. take a look at gcc.target/arm/neon-vadds64.c . > > > Here is an updated patch:
I tried applying your patch but ran into trouble with patch not liking this . My suspicion is mailer munging white spaces in some form - Could you send the patch as an attachment please rather than inline in your mail ? regards, Ramana > 2012-05-16 Christophe Lyon <christophe.l...@st.com> > > * gcc/config/arm/neon.md (neon_vld1_dup): Restrict to VQ > operands. > (neon_vld1_dupv2di): New, fixes vld1q_dup_s64. > * gcc/testsuite/gcc.target/arm/neon-vld1_dupQ.c: New test. > > Index: gcc/testsuite/gcc.target/arm/neon-vld1_dupQ.c > =================================================================== > --- gcc.orig/gcc/testsuite/gcc.target/arm/neon-vld1_dupQ.c (revision 0) > +++ gcc.new/gcc/testsuite/gcc.target/arm/neon-vld1_dupQ.c (revision 0) > @@ -0,0 +1,24 @@ > +/* Test the `vld1q_s64' ARM Neon intrinsic. */ > + > +/* { dg-do run } */ > +/* { dg-require-effective-target arm_neon_hw } */ > +/* { dg-options "-O0" } */ > +/* { dg-add-options arm_neon } */ > + > +#include "arm_neon.h" > +#include <stdlib.h> > + > +int main (void) > +{ > + int64x1_t input[2] = {(int64x1_t)0x0123456776543210LL, > + (int64x1_t)0x89abcdeffedcba90LL}; > + int64x1_t output[2] = {0, 0}; > + int64x2_t var = vld1q_dup_s64(input); > + > + vst1q_s64(output, var); > + if (output[0] != (int64x1_t)0x0123456776543210LL) > + abort(); > + if (output[1] != (int64x1_t)0x0123456776543210LL) > + abort(); > + return 0; > +} > Index: gcc/config/arm/neon.md > =================================================================== > --- gcc.orig/gcc/config/arm/neon.md (revision 2659) > +++ gcc.new/gcc/config/arm/neon.md (working copy) > @@ -4195,20 +4195,32 @@ > ) > > (define_insn "neon_vld1_dup<mode>" > - [(set (match_operand:VQX 0 "s_register_operand" "=w") > - (unspec:VQX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")] > + [(set (match_operand:VQ 0 "s_register_operand" "=w") > + (unspec:VQ [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")] > UNSPEC_VLD1_DUP))] > "TARGET_NEON" > { > - if (GET_MODE_NUNITS (<MODE>mode) > 2) > > return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, %A1"; > - else > > - return "vld1.<V_sz_elem>\t%h0, %A1"; > } > [(set (attr "neon_type") > - (if_then_else (gt (const_string "<V_mode_nunits>") (const_string > "1")) > - (const_string "neon_vld2_2_regs_vld1_vld2_all_lanes") > - (const_string "neon_vld1_1_2_regs")))] > + (const_string "neon_vld2_2_regs_vld1_vld2_all_lanes"))] > +) > + > +(define_insn_and_split "neon_vld1_dupv2di" > + [(set (match_operand:V2DI 0 "s_register_operand" "=w") > + (vec_duplicate:V2DI (match_operand:DI 1 "neon_struct_operand" "Um")))] > + "TARGET_NEON" > + "#" > + "&& reload_completed" > + [(const_int 0)] > + { > + rtx tmprtx = gen_lowpart (DImode, operands[0]); > + emit_insn (gen_neon_vld1_dupdi (tmprtx, operands[1])); > + emit_move_insn (gen_highpart (DImode, operands[0]), tmprtx ); > + DONE; > + } > + [(set_attr "length" "8") > + (set (attr "neon_type") (const_string > "neon_vld2_2_regs_vld1_vld2_all_lanes"))] > ) > > (define_expand "vec_store_lanes<mode><mode>" > > >