Wilco Dijkstra <wilco.dijks...@arm.com> writes: > Hi Richard, > >> The idea was that, if we did the split during expand, the movsf/df >> define_insns would then only accept the immediates that their >> constraints can handle. > > Right, always disallowing these immediates works fine too (it seems > reload doesn't require all immediates to be valid), and then the split is > redundant. I've updated the patch: > > > v2: split during expand, remove movsf/df splitter > > The IRA combine_and_move pass runs if the scheduler is disabled and > aggressively > combines moves. The movsf/df patterns allow all FP immediates since they rely > on a split pattern. However splits do not happen during IRA, so the result is > extra literal loads. To avoid this, split early during expand and block > creation of FP immediates that need this split. > > double f(void) { return 128.0; } > > -O2 -fno-schedule-insns gives: > > adrp x0, .LC0 > ldr d0, [x0, #:lo12:.LC0] > ret > > After patch: > > mov x0, 4638707616191610880 > fmov d0, x0 > ret > > Passes bootstrap & regress, OK for commit? > > gcc/ChangeLog: > * config/aarch64/aarch64.md (movhf_aarch64): Use > aarch64_valid_fp_move. > (movsf_aarch64): Likewise. > (movdf_aarch64): Likewise. > * config/aarch64/aarch64.cc (aarch64_valid_fp_move): New function. > * config/aarch64/aarch64-protos.h (aarch64_valid_fp_move): Likewise.
Thanks, this mostly looks good, but... > --- > > diff --git a/gcc/config/aarch64/aarch64-protos.h > b/gcc/config/aarch64/aarch64-protos.h > index > 05d3258abf7b43342d9058dd1b365a1a0870cdc2..6da81556110c978a9de6f6fad5775c9d77771b10 > 100644 > --- a/gcc/config/aarch64/aarch64-protos.h > +++ b/gcc/config/aarch64/aarch64-protos.h > @@ -843,6 +843,7 @@ bool aarch64_advsimd_struct_mode_p (machine_mode mode); > opt_machine_mode aarch64_vq_mode (scalar_mode); > opt_machine_mode aarch64_full_sve_mode (scalar_mode); > bool aarch64_can_const_movi_rtx_p (rtx x, machine_mode mode); > +bool aarch64_valid_fp_move (rtx, rtx, machine_mode); > bool aarch64_const_vec_all_same_int_p (rtx, HOST_WIDE_INT); > bool aarch64_const_vec_all_same_in_range_p (rtx, HOST_WIDE_INT, > HOST_WIDE_INT); > diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc > index > 00bcf18ae97cea94227c00798b7951daa255d360..ec2d391d1e3eb9bd28f66fb6ee85311b4ced4c94 > 100644 > --- a/gcc/config/aarch64/aarch64.cc > +++ b/gcc/config/aarch64/aarch64.cc > @@ -11175,6 +11175,36 @@ aarch64_can_const_movi_rtx_p (rtx x, machine_mode > mode) > return aarch64_simd_valid_mov_imm (v_op); > } > > +/* Return TRUE if DST and SRC with mode MODE is a valid fp move. */ > +bool > +aarch64_valid_fp_move (rtx dst, rtx src, machine_mode mode) > +{ > + if (!TARGET_FLOAT) > + return false; > + > + if (aarch64_reg_or_fp_zero (src, mode)) > + return true; > + > + if (!register_operand (dst, mode)) > + return false; > + > + if (MEM_P (src)) > + return true; > + > + if (!DECIMAL_FLOAT_MODE_P (mode)) > + { > + if (aarch64_can_const_movi_rtx_p (src, mode) > + || aarch64_float_const_representable_p (src) > + || aarch64_float_const_zero_rtx_p (src)) > + return true; > + > + /* Block FP immediates which are split during expand. */ > + if (aarch64_float_const_rtx_p (src)) > + return false; > + } > + > + return can_create_pseudo_p (); ...I still think we should avoid testing can_create_pseudo_p. Does it work with the last part replaced by: if (!DECIMAL_FLOAT_MODE_P (mode)) { if (aarch64_can_const_movi_rtx_p (src, mode) || aarch64_float_const_representable_p (src) || aarch64_float_const_zero_rtx_p (src)) return true; } return false; ? If so, the patch is OK with that change from my POV, but please wait till Thursday morning for others' opinions. Richard > +} > > /* Return the fixed registers used for condition codes. */ > > diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md > index > 8d10197c9e8dd66b7a30a1034b629297b9992661..b865ae2ff5e23edc4d8990e1efd4a51dd195f41f > 100644 > --- a/gcc/config/aarch64/aarch64.md > +++ b/gcc/config/aarch64/aarch64.md > @@ -1636,14 +1636,33 @@ (define_expand "mov<mode>" > && ! (GET_CODE (operands[1]) == CONST_DOUBLE > && aarch64_float_const_zero_rtx_p (operands[1]))) > operands[1] = force_reg (<MODE>mode, operands[1]); > + > + if (!DECIMAL_FLOAT_MODE_P (<MODE>mode) > + && GET_CODE (operands[1]) == CONST_DOUBLE > + && can_create_pseudo_p () > + && !aarch64_can_const_movi_rtx_p (operands[1], <MODE>mode) > + && !aarch64_float_const_representable_p (operands[1]) > + && !aarch64_float_const_zero_rtx_p (operands[1]) > + && aarch64_float_const_rtx_p (operands[1])) > + { > + unsigned HOST_WIDE_INT ival; > + bool res = aarch64_reinterpret_float_as_int (operands[1], &ival); > + gcc_assert (res); > + > + machine_mode intmode > + = int_mode_for_size (GET_MODE_BITSIZE (<MODE>mode), 0).require (); > + rtx tmp = gen_reg_rtx (intmode); > + emit_move_insn (tmp, gen_int_mode (ival, intmode)); > + emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp)); > + DONE; > + } > } > ) > > (define_insn "*mov<mode>_aarch64" > [(set (match_operand:HFBF 0 "nonimmediate_operand") > (match_operand:HFBF 1 "general_operand"))] > - "TARGET_FLOAT && (register_operand (operands[0], <MODE>mode) > - || aarch64_reg_or_fp_zero (operands[1], <MODE>mode))" > + "aarch64_valid_fp_move (operands[0], operands[1], <MODE>mode)" > {@ [ cons: =0 , 1 ; attrs: type , arch ] > [ w , Y ; neon_move , simd ] movi\t%0.4h, #0 > [ w , ?rY ; f_mcr , fp16 ] fmov\t%h0, %w1 > @@ -1666,8 +1685,7 @@ (define_insn "*mov<mode>_aarch64" > (define_insn "*mov<mode>_aarch64" > [(set (match_operand:SFD 0 "nonimmediate_operand") > (match_operand:SFD 1 "general_operand"))] > - "TARGET_FLOAT && (register_operand (operands[0], <MODE>mode) > - || aarch64_reg_or_fp_zero (operands[1], <MODE>mode))" > + "aarch64_valid_fp_move (operands[0], operands[1], <MODE>mode)" > {@ [ cons: =0 , 1 ; attrs: type , arch ] > [ w , Y ; neon_move , simd ] movi\t%0.2s, #0 > [ w , ?rY ; f_mcr , * ] fmov\t%s0, %w1 > @@ -1687,8 +1705,7 @@ (define_insn "*mov<mode>_aarch64" > (define_insn "*mov<mode>_aarch64" > [(set (match_operand:DFD 0 "nonimmediate_operand") > (match_operand:DFD 1 "general_operand"))] > - "TARGET_FLOAT && (register_operand (operands[0], <MODE>mode) > - || aarch64_reg_or_fp_zero (operands[1], <MODE>mode))" > + "aarch64_valid_fp_move (operands[0], operands[1], <MODE>mode)" > {@ [ cons: =0 , 1 ; attrs: type , arch ] > [ w , Y ; neon_move , simd ] movi\t%d0, #0 > [ w , ?rY ; f_mcr , * ] fmov\t%d0, %x1 > @@ -1705,27 +1722,6 @@ (define_insn "*mov<mode>_aarch64" > } > ) > > -(define_split > - [(set (match_operand:GPF_HF 0 "nonimmediate_operand") > - (match_operand:GPF_HF 1 "const_double_operand"))] > - "can_create_pseudo_p () > - && !aarch64_can_const_movi_rtx_p (operands[1], <MODE>mode) > - && !aarch64_float_const_representable_p (operands[1]) > - && !aarch64_float_const_zero_rtx_p (operands[1]) > - && aarch64_float_const_rtx_p (operands[1])" > - [(const_int 0)] > - { > - unsigned HOST_WIDE_INT ival; > - if (!aarch64_reinterpret_float_as_int (operands[1], &ival)) > - FAIL; > - > - rtx tmp = gen_reg_rtx (<FCVT_TARGET>mode); > - emit_move_insn (tmp, gen_int_mode (ival, <FCVT_TARGET>mode)); > - emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp)); > - DONE; > - } > -) > - > (define_insn "*mov<mode>_aarch64" > [(set (match_operand:TFD 0 > "nonimmediate_operand" "=w,w,?r ,w ,?r,w,?w,w,m,?r,m ,m")