Hi Richard, > The idea was that, if we did the split during expand, the movsf/df > define_insns would then only accept the immediates that their > constraints can handle.
Right, always disallowing these immediates works fine too (it seems reload doesn't require all immediates to be valid), and then the split is redundant. I've updated the patch: v2: split during expand, remove movsf/df splitter The IRA combine_and_move pass runs if the scheduler is disabled and aggressively combines moves. The movsf/df patterns allow all FP immediates since they rely on a split pattern. However splits do not happen during IRA, so the result is extra literal loads. To avoid this, split early during expand and block creation of FP immediates that need this split. double f(void) { return 128.0; } -O2 -fno-schedule-insns gives: adrp x0, .LC0 ldr d0, [x0, #:lo12:.LC0] ret After patch: mov x0, 4638707616191610880 fmov d0, x0 ret Passes bootstrap & regress, OK for commit? gcc/ChangeLog: * config/aarch64/aarch64.md (movhf_aarch64): Use aarch64_valid_fp_move. (movsf_aarch64): Likewise. (movdf_aarch64): Likewise. * config/aarch64/aarch64.cc (aarch64_valid_fp_move): New function. * config/aarch64/aarch64-protos.h (aarch64_valid_fp_move): Likewise. --- diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index 05d3258abf7b43342d9058dd1b365a1a0870cdc2..6da81556110c978a9de6f6fad5775c9d77771b10 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -843,6 +843,7 @@ bool aarch64_advsimd_struct_mode_p (machine_mode mode); opt_machine_mode aarch64_vq_mode (scalar_mode); opt_machine_mode aarch64_full_sve_mode (scalar_mode); bool aarch64_can_const_movi_rtx_p (rtx x, machine_mode mode); +bool aarch64_valid_fp_move (rtx, rtx, machine_mode); bool aarch64_const_vec_all_same_int_p (rtx, HOST_WIDE_INT); bool aarch64_const_vec_all_same_in_range_p (rtx, HOST_WIDE_INT, HOST_WIDE_INT); diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index 00bcf18ae97cea94227c00798b7951daa255d360..ec2d391d1e3eb9bd28f66fb6ee85311b4ced4c94 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -11175,6 +11175,36 @@ aarch64_can_const_movi_rtx_p (rtx x, machine_mode mode) return aarch64_simd_valid_mov_imm (v_op); } +/* Return TRUE if DST and SRC with mode MODE is a valid fp move. */ +bool +aarch64_valid_fp_move (rtx dst, rtx src, machine_mode mode) +{ + if (!TARGET_FLOAT) + return false; + + if (aarch64_reg_or_fp_zero (src, mode)) + return true; + + if (!register_operand (dst, mode)) + return false; + + if (MEM_P (src)) + return true; + + if (!DECIMAL_FLOAT_MODE_P (mode)) + { + if (aarch64_can_const_movi_rtx_p (src, mode) + || aarch64_float_const_representable_p (src) + || aarch64_float_const_zero_rtx_p (src)) + return true; + + /* Block FP immediates which are split during expand. */ + if (aarch64_float_const_rtx_p (src)) + return false; + } + + return can_create_pseudo_p (); +} /* Return the fixed registers used for condition codes. */ diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 8d10197c9e8dd66b7a30a1034b629297b9992661..b865ae2ff5e23edc4d8990e1efd4a51dd195f41f 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -1636,14 +1636,33 @@ (define_expand "mov<mode>" && ! (GET_CODE (operands[1]) == CONST_DOUBLE && aarch64_float_const_zero_rtx_p (operands[1]))) operands[1] = force_reg (<MODE>mode, operands[1]); + + if (!DECIMAL_FLOAT_MODE_P (<MODE>mode) + && GET_CODE (operands[1]) == CONST_DOUBLE + && can_create_pseudo_p () + && !aarch64_can_const_movi_rtx_p (operands[1], <MODE>mode) + && !aarch64_float_const_representable_p (operands[1]) + && !aarch64_float_const_zero_rtx_p (operands[1]) + && aarch64_float_const_rtx_p (operands[1])) + { + unsigned HOST_WIDE_INT ival; + bool res = aarch64_reinterpret_float_as_int (operands[1], &ival); + gcc_assert (res); + + machine_mode intmode + = int_mode_for_size (GET_MODE_BITSIZE (<MODE>mode), 0).require (); + rtx tmp = gen_reg_rtx (intmode); + emit_move_insn (tmp, gen_int_mode (ival, intmode)); + emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp)); + DONE; + } } ) (define_insn "*mov<mode>_aarch64" [(set (match_operand:HFBF 0 "nonimmediate_operand") (match_operand:HFBF 1 "general_operand"))] - "TARGET_FLOAT && (register_operand (operands[0], <MODE>mode) - || aarch64_reg_or_fp_zero (operands[1], <MODE>mode))" + "aarch64_valid_fp_move (operands[0], operands[1], <MODE>mode)" {@ [ cons: =0 , 1 ; attrs: type , arch ] [ w , Y ; neon_move , simd ] movi\t%0.4h, #0 [ w , ?rY ; f_mcr , fp16 ] fmov\t%h0, %w1 @@ -1666,8 +1685,7 @@ (define_insn "*mov<mode>_aarch64" (define_insn "*mov<mode>_aarch64" [(set (match_operand:SFD 0 "nonimmediate_operand") (match_operand:SFD 1 "general_operand"))] - "TARGET_FLOAT && (register_operand (operands[0], <MODE>mode) - || aarch64_reg_or_fp_zero (operands[1], <MODE>mode))" + "aarch64_valid_fp_move (operands[0], operands[1], <MODE>mode)" {@ [ cons: =0 , 1 ; attrs: type , arch ] [ w , Y ; neon_move , simd ] movi\t%0.2s, #0 [ w , ?rY ; f_mcr , * ] fmov\t%s0, %w1 @@ -1687,8 +1705,7 @@ (define_insn "*mov<mode>_aarch64" (define_insn "*mov<mode>_aarch64" [(set (match_operand:DFD 0 "nonimmediate_operand") (match_operand:DFD 1 "general_operand"))] - "TARGET_FLOAT && (register_operand (operands[0], <MODE>mode) - || aarch64_reg_or_fp_zero (operands[1], <MODE>mode))" + "aarch64_valid_fp_move (operands[0], operands[1], <MODE>mode)" {@ [ cons: =0 , 1 ; attrs: type , arch ] [ w , Y ; neon_move , simd ] movi\t%d0, #0 [ w , ?rY ; f_mcr , * ] fmov\t%d0, %x1 @@ -1705,27 +1722,6 @@ (define_insn "*mov<mode>_aarch64" } ) -(define_split - [(set (match_operand:GPF_HF 0 "nonimmediate_operand") - (match_operand:GPF_HF 1 "const_double_operand"))] - "can_create_pseudo_p () - && !aarch64_can_const_movi_rtx_p (operands[1], <MODE>mode) - && !aarch64_float_const_representable_p (operands[1]) - && !aarch64_float_const_zero_rtx_p (operands[1]) - && aarch64_float_const_rtx_p (operands[1])" - [(const_int 0)] - { - unsigned HOST_WIDE_INT ival; - if (!aarch64_reinterpret_float_as_int (operands[1], &ival)) - FAIL; - - rtx tmp = gen_reg_rtx (<FCVT_TARGET>mode); - emit_move_insn (tmp, gen_int_mode (ival, <FCVT_TARGET>mode)); - emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp)); - DONE; - } -) - (define_insn "*mov<mode>_aarch64" [(set (match_operand:TFD 0 "nonimmediate_operand" "=w,w,?r ,w ,?r,w,?w,w,m,?r,m ,m")