Hi All, Updating this patch with the feedback I've received from patch 1/4.
Thanks, Tamar ________________________________________ From: gcc-patches-ow...@gcc.gnu.org <gcc-patches-ow...@gcc.gnu.org> on behalf of Tamar Christina <tamar.christ...@arm.com> Sent: Wednesday, June 7, 2017 12:38:37 PM To: GCC Patches Cc: nd; James Greenhalgh; Marcus Shawcroft; Richard Earnshaw Subject: [PATCH][GCC][AArch64] optimize float immediate moves (2 /4) - HF/DF/SF mode. Hi All, This patch adds support for creating floating point constants using mov immediate instructions. The movi SIMD instruction can be used for HFmode and SFmode constants, eg. for -0.0f we generate: movi v0.2s, 0x80, lsl 24 More complex constants can be generated using an integer MOV or MOV+MOVK: mov w0, 48128 movk w0, 0x47f0, lsl 16 fmov s0, w0 We allow up to 3 instructions as this allows all HF, SF and most DF constants to be generated without a literal load, and is overall best for codesize. Regression tested on aarch64-none-linux-gnu and no regressions. OK for trunk? Thanks, Tamar gcc/ 2017-06-07 Tamar Christina <tamar.christ...@arm.com> * config/aarch64/aarch64.md (mov<mode>): Generalize. (*movhf_aarch64, *movsf_aarch64, *movdf_aarch64): Add integer and movi cases.
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 5adc5edb8dde9c30450b04932a37c41f84cc5ed1..62ad76731d2c5b4b3d02def8c2b1457c713c2d8e 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -1167,66 +1167,120 @@ } ) -(define_insn "*movhf_aarch64" - [(set (match_operand:HF 0 "nonimmediate_operand" "=w,w ,?r,w,w,m,r,m ,r") - (match_operand:HF 1 "general_operand" "Y ,?rY, w,w,m,w,m,rY,r"))] +(define_insn_and_split "*movhf_aarch64" + [(set (match_operand:HF 0 "nonimmediate_operand" "=w,w ,?r,w,w ,w ,w,m,r,m ,r") + (match_operand:HF 1 "general_operand" "Y ,?rY, w,w,Ufc,Uvi,m,w,m,rY,r"))] "TARGET_FLOAT && (register_operand (operands[0], HFmode) - || aarch64_reg_or_fp_zero (operands[1], HFmode))" + || aarch64_reg_or_fp_float (operands[1], HFmode))" "@ movi\\t%0.4h, #0 - mov\\t%0.h[0], %w1 + fmov\\t%s0, %w1 umov\\t%w0, %1.h[0] mov\\t%0.h[0], %1.h[0] + fmov\\t%s0, %1 + * return aarch64_output_scalar_simd_mov_immediate (operands[1], SImode); ldr\\t%h0, %1 str\\t%h1, %0 ldrh\\t%w0, %1 strh\\t%w1, %0 mov\\t%w0, %w1" - [(set_attr "type" "neon_move,neon_from_gp,neon_to_gp,neon_move,\ - f_loads,f_stores,load1,store1,mov_reg") - (set_attr "simd" "yes,yes,yes,yes,*,*,*,*,*")] + "&& can_create_pseudo_p () + && !aarch64_can_const_movi_rtx_p (operands[1], HFmode) + && !aarch64_float_const_representable_p (operands[1]) + && aarch64_float_const_rtx_p (operands[1])" + [(const_int 0)] + "{ + unsigned HOST_WIDE_INT ival; + if (!aarch64_reinterpret_float_as_int (operands[1], &ival)) + FAIL; + + rtx tmp = gen_reg_rtx (SImode); + aarch64_expand_mov_immediate (tmp, gen_int_mode (ival, SImode)); + tmp = simplify_gen_subreg (HImode, tmp, SImode, 0); + emit_move_insn (operands[0], gen_lowpart (HFmode, tmp)); + DONE; + }" + [(set_attr "type" "neon_move,f_mcr,neon_to_gp,neon_move,fconsts, \ + neon_move,f_loads,f_stores,load1,store1,mov_reg") + (set_attr "simd" "yes,*,yes,yes,*,yes,*,*,*,*,*")] ) -(define_insn "*movsf_aarch64" - [(set (match_operand:SF 0 "nonimmediate_operand" "=w,w ,?r,w,w ,w,m,r,m ,r") - (match_operand:SF 1 "general_operand" "Y ,?rY, w,w,Ufc,m,w,m,rY,r"))] +(define_insn_and_split "*movsf_aarch64" + [(set (match_operand:SF 0 "nonimmediate_operand" "=w,w ,?r,w,w ,w ,w,m,r,m ,r,r") + (match_operand:SF 1 "general_operand" "Y ,?rY, w,w,Ufc,Uvi,m,w,m,rY,r,M"))] "TARGET_FLOAT && (register_operand (operands[0], SFmode) - || aarch64_reg_or_fp_zero (operands[1], SFmode))" + || aarch64_reg_or_fp_float (operands[1], SFmode))" "@ movi\\t%0.2s, #0 fmov\\t%s0, %w1 fmov\\t%w0, %s1 fmov\\t%s0, %s1 fmov\\t%s0, %1 + * return aarch64_output_scalar_simd_mov_immediate (operands[1], SImode); ldr\\t%s0, %1 str\\t%s1, %0 ldr\\t%w0, %1 str\\t%w1, %0 - mov\\t%w0, %w1" - [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconsts,\ - f_loads,f_stores,load1,store1,mov_reg") - (set_attr "simd" "yes,*,*,*,*,*,*,*,*,*")] + mov\\t%w0, %w1 + mov\\t%w0, %1" + "&& can_create_pseudo_p () + && !aarch64_can_const_movi_rtx_p (operands[1], SFmode) + && !aarch64_float_const_representable_p (operands[1]) + && aarch64_float_const_rtx_p (operands[1])" + [(const_int 0)] + "{ + unsigned HOST_WIDE_INT ival; + if (!aarch64_reinterpret_float_as_int (operands[1], &ival)) + FAIL; + + rtx tmp = gen_reg_rtx (SImode); + aarch64_expand_mov_immediate (tmp, gen_int_mode (ival, SImode)); + emit_move_insn (operands[0], gen_lowpart (SFmode, tmp)); + DONE; + }" + [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconsts,neon_move,\ + f_loads,f_stores,load1,store1,mov_reg,\ + fconsts") + (set_attr "simd" "yes,*,*,*,*,yes,*,*,*,*,*,*")] ) -(define_insn "*movdf_aarch64" - [(set (match_operand:DF 0 "nonimmediate_operand" "=w,w ,?r,w,w ,w,m,r,m ,r") - (match_operand:DF 1 "general_operand" "Y ,?rY, w,w,Ufc,m,w,m,rY,r"))] +(define_insn_and_split "*movdf_aarch64" + [(set (match_operand:DF 0 "nonimmediate_operand" "=w, w ,?r,w,w ,w ,w,m,r,m ,r,r") + (match_operand:DF 1 "general_operand" "Y , ?rY, w,w,Ufc,Uvi,m,w,m,rY,r,N"))] "TARGET_FLOAT && (register_operand (operands[0], DFmode) - || aarch64_reg_or_fp_zero (operands[1], DFmode))" + || aarch64_reg_or_fp_float (operands[1], DFmode))" "@ movi\\t%d0, #0 fmov\\t%d0, %x1 fmov\\t%x0, %d1 fmov\\t%d0, %d1 fmov\\t%d0, %1 + * return aarch64_output_scalar_simd_mov_immediate (operands[1], DImode); ldr\\t%d0, %1 str\\t%d1, %0 ldr\\t%x0, %1 str\\t%x1, %0 - mov\\t%x0, %x1" - [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconstd,\ - f_loadd,f_stored,load1,store1,mov_reg") - (set_attr "simd" "yes,*,*,*,*,*,*,*,*,*")] + mov\\t%x0, %x1 + mov\\t%x0, %1" + "&& can_create_pseudo_p () + && !aarch64_can_const_movi_rtx_p (operands[1], DFmode) + && !aarch64_float_const_representable_p (operands[1]) + && aarch64_float_const_rtx_p (operands[1])" + [(const_int 0)] + "{ + unsigned HOST_WIDE_INT ival; + if (!aarch64_reinterpret_float_as_int (operands[1], &ival)) + FAIL; + + rtx tmp = gen_reg_rtx (DImode); + aarch64_expand_mov_immediate (tmp, gen_int_mode (ival, DImode)); + emit_move_insn (operands[0], gen_lowpart (DFmode, tmp)); + DONE; + }" + [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconstd,neon_move,\ + f_loadd,f_stored,load1,store1,mov_reg,\ + fconstd") + (set_attr "simd" "yes,*,*,*,*,yes,*,*,*,*,*,*")] ) (define_insn "*movtf_aarch64"