Tamar Christina <tamar.christ...@arm.com> writes: > Hi All, > > This rewrites the simd MOV patterns to use the new compact syntax. > No change in semantics is expected. This will be needed in follow on patches. > > This also merges the splits into the define_insn which will also be needed > soon. > > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. > > Ok for master? > > Thanks, > Tamar > > gcc/ChangeLog: > > PR tree-optimization/109154 > * config/aarch64/aarch64-simd.md (*aarch64_simd_mov<VDMOV:mode>): > Rewrite to new syntax. > (*aarch64_simd_mov<VQMOV:mode): Rewrite to new syntax and merge in > splits. > > --- inline copy of patch -- > diff --git a/gcc/config/aarch64/aarch64-simd.md > b/gcc/config/aarch64/aarch64-simd.md > index > e955691f1be8830efacc237465119764ce2a4942..7b4d5a37a9795fefda785aaacc246918826ed0a2 > 100644 > --- a/gcc/config/aarch64/aarch64-simd.md > +++ b/gcc/config/aarch64/aarch64-simd.md > @@ -143,54 +143,57 @@ (define_insn "aarch64_dup_lane_<vswap_width_name><mode>" > ) > > (define_insn "*aarch64_simd_mov<VDMOV:mode>" > - [(set (match_operand:VDMOV 0 "nonimmediate_operand" > - "=w, r, m, m, m, w, ?r, ?w, ?r, w, w") > - (match_operand:VDMOV 1 "general_operand" > - "m, m, Dz, w, r, w, w, r, r, Dn, Dz"))] > + [(set (match_operand:VDMOV 0 "nonimmediate_operand") > + (match_operand:VDMOV 1 "general_operand"))] > "TARGET_FLOAT > && (register_operand (operands[0], <MODE>mode) > || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))" > - "@ > - ldr\t%d0, %1 > - ldr\t%x0, %1 > - str\txzr, %0 > - str\t%d1, %0 > - str\t%x1, %0 > - * return TARGET_SIMD ? \"mov\t%0.<Vbtype>, %1.<Vbtype>\" : \"fmov\t%d0, > %d1\"; > - * return TARGET_SIMD ? \"umov\t%0, %1.d[0]\" : \"fmov\t%x0, %d1\"; > - fmov\t%d0, %1 > - mov\t%0, %1 > - * return aarch64_output_simd_mov_immediate (operands[1], 64); > - fmov\t%d0, xzr" > - [(set_attr "type" "neon_load1_1reg<q>, load_8, store_8, > neon_store1_1reg<q>,\ > - store_8, neon_logic<q>, neon_to_gp<q>, f_mcr,\ > - mov_reg, neon_move<q>, f_mcr") > - (set_attr "arch" "*,*,*,*,*,*,*,*,*,simd,*")] > -) > - > -(define_insn "*aarch64_simd_mov<VQMOV:mode>" > - [(set (match_operand:VQMOV 0 "nonimmediate_operand" > - "=w, Umn, m, w, ?r, ?w, ?r, w, w") > - (match_operand:VQMOV 1 "general_operand" > - "m, Dz, w, w, w, r, r, Dn, Dz"))] > + {@ [cons: =0, 1; attrs: type, arch] > + [w , m ; neon_load1_1reg<q> , * ] ldr\t%d0, %1 > + [r , m ; load_8 , * ] ldr\t%x0, %1 > + [m , Dz; store_8 , * ] str\txzr, %0 > + [m , w ; neon_store1_1reg<q>, * ] str\t%d1, %0 > + [m , r ; store_8 , * ] str\t%x1, %0 > + [w , w ; neon_logic<q> , simd] mov\t%0.<Vbtype>, %1.<Vbtype> > + [w , w ; neon_logic<q> , * ] fmov\t%d0, %d1 > + [?r, w ; neon_to_gp<q> , simd] umov\t%0, %1.d[0] > + [?r, w ; neon_to_gp<q> , * ] fmov\t%x0, %d1 > + [?w, r ; f_mcr , * ] fmov\t%d0, %1 > + [?r, r ; mov_reg , * ] mov\t%0, %1 > + [w , Dn; neon_move<q> , simd] << > aarch64_output_simd_mov_immediate (operands[1], 64); > + [w , Dz; f_mcr , * ] fmov\t%d0, xzr > + } > +) > + > +(define_insn_and_split "*aarch64_simd_mov<VQMOV:mode>" > + [(set (match_operand:VQMOV 0 "nonimmediate_operand") > + (match_operand:VQMOV 1 "general_operand"))] > "TARGET_FLOAT > && (register_operand (operands[0], <MODE>mode) > || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))" > - "@ > - ldr\t%q0, %1 > - stp\txzr, xzr, %0 > - str\t%q1, %0 > - mov\t%0.<Vbtype>, %1.<Vbtype> > - # > - # > - # > - * return aarch64_output_simd_mov_immediate (operands[1], 128); > - fmov\t%d0, xzr" > - [(set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\ > - neon_logic<q>, multiple, multiple,\ > - multiple, neon_move<q>, fmov") > - (set_attr "length" "4,4,4,4,8,8,8,4,4") > - (set_attr "arch" "*,*,*,simd,*,*,*,simd,*")] > + {@ [cons: =0, 1; attrs: type, arch, length] > + [w , m ; neon_load1_1reg<q> , * , 4] ldr\t%q0, %1 > + [Umn, Dz; store_16 , * , 4] stp\txzr, xzr, %0 > + [m , w ; neon_store1_1reg<q>, * , 4] str\t%q1, %0 > + [w , w ; neon_logic<q> , simd, 4] mov\t%0.<Vbtype>, %1.<Vbtype> > + [?r , w ; multiple , * , 8] # > + [?w , r ; multiple , * , 8] # > + [?r , r ; multiple , * , 8] # > + [w , Dn; neon_move<q> , simd, 4] << > aarch64_output_simd_mov_immediate (operands[1], 128); > + [w , Dz; fmov , * , 4] fmov\t%d0, xzr > + } > + "&& reload_completed > + && !(FP_REGNUM_P (REGNO (operands[0])) > + && FP_REGNUM_P (REGNO (operands[1])))"
Won't this also trigger for the load, store, and Dn alternatives? Looks OK otherwise. Thanks, Richard > + [(const_int 0)] > + { > + if (GP_REGNUM_P (REGNO (operands[0])) > + && GP_REGNUM_P (REGNO (operands[1]))) > + aarch64_simd_emit_reg_reg_move (operands, DImode, 2); > + else > + aarch64_split_simd_move (operands[0], operands[1]); > + DONE; > + } > ) > > ;; When storing lane zero we can use the normal STR and its more permissive > @@ -276,33 +279,6 @@ (define_insn "vec_store_pair<VQ:mode><VQ2:mode>" > [(set_attr "type" "neon_stp_q")] > ) > > - > -(define_split > - [(set (match_operand:VQMOV 0 "register_operand" "") > - (match_operand:VQMOV 1 "register_operand" ""))] > - "TARGET_FLOAT > - && reload_completed > - && GP_REGNUM_P (REGNO (operands[0])) > - && GP_REGNUM_P (REGNO (operands[1]))" > - [(const_int 0)] > -{ > - aarch64_simd_emit_reg_reg_move (operands, DImode, 2); > - DONE; > -}) > - > -(define_split > - [(set (match_operand:VQMOV 0 "register_operand" "") > - (match_operand:VQMOV 1 "register_operand" ""))] > - "TARGET_FLOAT > - && reload_completed > - && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO > (operands[1]))) > - || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO > (operands[1]))))" > - [(const_int 0)] > -{ > - aarch64_split_simd_move (operands[0], operands[1]); > - DONE; > -}) > - > (define_expand "@aarch64_split_simd_mov<mode>" > [(set (match_operand:VQMOV 0) > (match_operand:VQMOV 1))]