The AArch64 port uses define_splits to prefer moving certain float constants via integer registers over loading them from memory. E.g.:
(set (reg:SF X) (const_double:SF C)) splits to: (set (reg:SI tmp) (const_int C')) (set (reg:SF X) (subreg:SF (reg:SI tmp) 0)) The problem with using splits for this -- especially when the split instruction is a constant move -- is that the original form is still valid and can be recreated by later pre-RA passes. (And I think that's a valid thing for them to do, since they're folding away what appears in rtl terms to be a redundant instruction.) One pass that can do this is ira's combine_and_move_insns, which among other things looks for registers that are set once and used once. If the register is set to a rematerialisable value, the code tries to fold that value into the single use. We don't normally see this effect at -O2 and above because combine_and_move_insns isn't run when -fsched-pressure is enabled (which it is by default on AArch64). But arguably the combine part is useful independently of -fsched-pressure, and only the move part is suspect. So I don't think we should rely on the combination not happening here. The new tests demonstrate the problem by running the original tests at -O instead of -O2. This patch does the optimisation by splitting the moves at generation time and rejecting the combined form while the split is still possible. REG_EQUAL notes on the second move still give the original floating-point value for passes that need it. Tested on aarch64-linux-gnu (with and without SVE) and aarch64_be-elf. OK to install? Richard 2019-08-07 Richard Sandiford <richard.sandif...@arm.com> gcc/ * config/aarch64/aarch64-protos.h (aarch64_move_float_via_int_p): Declare. * config/aarch64/aarch64.c (aarch64_move_float_via_int_p): New function, extracted from the GPF_HF move splitter. * config/aarch64/aarch64.md: Remove GPF_HF move splitter. (mov<GPF_TF_F16:mode>): Move via an integer register if aarch64_move_float_via_int_p. (*movhf_aarch64, *movsf_aarch64, *movdf_aarch64): Check aarch64_move_float_via_int_p. * config/aarch64/iterators.md (fcvt_target): Handle TI and TF. (FCVT_TARGET): Likewise. gcc/testsuite/ * gcc.target/aarch64/dbl_mov_immediate_2.c: New test. * gcc.target/aarch64/f16_mov_immediate_5.c: Likewise. * gcc.target/aarch64/flt_mov_immediate_2.c: Likewise. Index: gcc/config/aarch64/aarch64-protos.h =================================================================== --- gcc/config/aarch64/aarch64-protos.h 2019-07-01 09:37:06.704528805 +0100 +++ gcc/config/aarch64/aarch64-protos.h 2019-08-07 19:07:38.199739765 +0100 @@ -519,6 +519,7 @@ const char * aarch64_output_probe_stack_ const char * aarch64_output_probe_sve_stack_clash (rtx, rtx, rtx, rtx); void aarch64_err_no_fpadvsimd (machine_mode); void aarch64_expand_epilogue (bool); +bool aarch64_move_float_via_int_p (rtx); void aarch64_expand_mov_immediate (rtx, rtx, rtx (*) (rtx, rtx) = 0); rtx aarch64_ptrue_reg (machine_mode); rtx aarch64_pfalse_reg (machine_mode); Index: gcc/config/aarch64/aarch64.c =================================================================== --- gcc/config/aarch64/aarch64.c 2019-07-16 09:11:06.449416469 +0100 +++ gcc/config/aarch64/aarch64.c 2019-08-07 19:07:38.203739735 +0100 @@ -3278,6 +3278,22 @@ aarch64_expand_sve_const_vector (rtx des gcc_assert (vectors[0] == dest); } +/* Return true if floating-point value SRC should be moved into an + integer register first and then moved into a floating-point register. + This means that SRC is a constant that cannot be moved directly into + floating-point registers but assembling it in integer registers is + better than forcing it to memory. */ +bool +aarch64_move_float_via_int_p (rtx src) +{ + return (GET_MODE (src) != TFmode + && GET_CODE (src) == CONST_DOUBLE + && can_create_pseudo_p () + && !aarch64_can_const_movi_rtx_p (src, GET_MODE (src)) + && !aarch64_float_const_representable_p (src) + && aarch64_float_const_rtx_p (src)); +} + /* Set DEST to immediate IMM. For SVE vector modes, GEN_VEC_DUPLICATE is a pattern that can be used to set DEST to a replicated scalar element. */ Index: gcc/config/aarch64/aarch64.md =================================================================== --- gcc/config/aarch64/aarch64.md 2019-08-05 17:46:20.713723611 +0100 +++ gcc/config/aarch64/aarch64.md 2019-08-07 19:07:38.203739735 +0100 @@ -1249,14 +1249,24 @@ (define_expand "mov<mode>" && ! (GET_CODE (operands[1]) == CONST_DOUBLE && aarch64_float_const_zero_rtx_p (operands[1]))) operands[1] = force_reg (<MODE>mode, operands[1]); + + if (aarch64_move_float_via_int_p (operands[1])) + { + rtx imm = simplify_gen_subreg (<FCVT_TARGET>mode, operands[1], + <MODE>mode, 0); + rtx tmp = force_reg (<FCVT_TARGET>mode, imm); + operands[1] = gen_lowpart (<MODE>mode, tmp); + } } ) (define_insn "*movhf_aarch64" [(set (match_operand:HF 0 "nonimmediate_operand" "=w,w , w,?r,w,w ,w ,w,m,r,m ,r") (match_operand:HF 1 "general_operand" "Y ,?rY,?r, w,w,Ufc,Uvi,m,w,m,rY,r"))] - "TARGET_FLOAT && (register_operand (operands[0], HFmode) - || aarch64_reg_or_fp_zero (operands[1], HFmode))" + "TARGET_FLOAT + && (register_operand (operands[0], HFmode) + || aarch64_reg_or_fp_zero (operands[1], HFmode)) + && !aarch64_move_float_via_int_p (operands[1])" "@ movi\\t%0.4h, #0 fmov\\t%h0, %w1 @@ -1278,8 +1288,10 @@ (define_insn "*movhf_aarch64" (define_insn "*movsf_aarch64" [(set (match_operand:SF 0 "nonimmediate_operand" "=w,w ,?r,w,w ,w ,w,m,r,m ,r,r") (match_operand:SF 1 "general_operand" "Y ,?rY, w,w,Ufc,Uvi,m,w,m,rY,r,M"))] - "TARGET_FLOAT && (register_operand (operands[0], SFmode) - || aarch64_reg_or_fp_zero (operands[1], SFmode))" + "TARGET_FLOAT + && (register_operand (operands[0], SFmode) + || aarch64_reg_or_fp_zero (operands[1], SFmode)) + && !aarch64_move_float_via_int_p (operands[1])" "@ movi\\t%0.2s, #0 fmov\\t%s0, %w1 @@ -1302,8 +1314,10 @@ (define_insn "*movsf_aarch64" (define_insn "*movdf_aarch64" [(set (match_operand:DF 0 "nonimmediate_operand" "=w, w ,?r,w,w ,w ,w,m,r,m ,r,r") (match_operand:DF 1 "general_operand" "Y , ?rY, w,w,Ufc,Uvi,m,w,m,rY,r,N"))] - "TARGET_FLOAT && (register_operand (operands[0], DFmode) - || aarch64_reg_or_fp_zero (operands[1], DFmode))" + "TARGET_FLOAT + && (register_operand (operands[0], DFmode) + || aarch64_reg_or_fp_zero (operands[1], DFmode)) + && !aarch64_move_float_via_int_p (operands[1])" "@ movi\\t%d0, #0 fmov\\t%d0, %x1 @@ -1323,26 +1337,6 @@ (define_insn "*movdf_aarch64" (set_attr "arch" "simd,*,*,*,*,simd,*,*,*,*,*,*")] ) -(define_split - [(set (match_operand:GPF_HF 0 "nonimmediate_operand") - (match_operand:GPF_HF 1 "general_operand"))] - "can_create_pseudo_p () - && !aarch64_can_const_movi_rtx_p (operands[1], <MODE>mode) - && !aarch64_float_const_representable_p (operands[1]) - && aarch64_float_const_rtx_p (operands[1])" - [(const_int 0)] - { - unsigned HOST_WIDE_INT ival; - if (!aarch64_reinterpret_float_as_int (operands[1], &ival)) - FAIL; - - rtx tmp = gen_reg_rtx (<FCVT_TARGET>mode); - emit_move_insn (tmp, gen_int_mode (ival, <FCVT_TARGET>mode)); - emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp)); - DONE; - } -) - (define_insn "*movtf_aarch64" [(set (match_operand:TF 0 "nonimmediate_operand" "=w,?&r,w ,?r,w,?w,w,m,?r,m ,m") Index: gcc/config/aarch64/iterators.md =================================================================== --- gcc/config/aarch64/iterators.md 2019-07-29 09:39:46.658190164 +0100 +++ gcc/config/aarch64/iterators.md 2019-08-07 19:07:38.203739735 +0100 @@ -960,12 +960,14 @@ (define_mode_attr fcvt_target [(V2DF "v2 (V2DI "v2df") (V4SI "v4sf") (V2SI "v2sf") (SF "si") (DF "di") (SI "sf") (DI "df") (V4HF "v4hi") (V8HF "v8hi") (V4HI "v4hf") - (V8HI "v8hf") (HF "hi") (HI "hf")]) + (V8HI "v8hf") (HF "hi") (HI "hf") + (TI "tf") (TF "ti")]) (define_mode_attr FCVT_TARGET [(V2DF "V2DI") (V4SF "V4SI") (V2SF "V2SI") (V2DI "V2DF") (V4SI "V4SF") (V2SI "V2SF") (SF "SI") (DF "DI") (SI "SF") (DI "DF") (V4HF "V4HI") (V8HF "V8HI") (V4HI "V4HF") - (V8HI "V8HF") (HF "HI") (HI "HF")]) + (V8HI "V8HF") (HF "HI") (HI "HF") + (TI "TF") (TF "TI")]) ;; for the inequal width integer to fp conversions Index: gcc/testsuite/gcc.target/aarch64/dbl_mov_immediate_2.c =================================================================== --- /dev/null 2019-07-30 08:53:31.317691683 +0100 +++ gcc/testsuite/gcc.target/aarch64/dbl_mov_immediate_2.c 2019-08-07 19:07:38.203739735 +0100 @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-O -mno-pc-relative-literal-loads" } */ +/* { dg-skip-if "Tiny model won't generate adrp" { *-*-* } { "-mcmodel=tiny" } { "" } } */ + +#include "dbl_mov_immediate_1.c" + +/* { dg-final { scan-assembler-times "movi\td\[0-9\]+, #?0" 1 } } */ + +/* { dg-final { scan-assembler-times "adrp\tx\[0-9\]+, \.LC\[0-9\]" 2 } } */ +/* { dg-final { scan-assembler-times "ldr\td\[0-9\]+, \\\[x\[0-9\], #:lo12:\.LC\[0-9\]\\\]" 2 } } */ + +/* { dg-final { scan-assembler-times "fmov\td\[0-9\]+, 1\\\.5e\\\+0" 1 } } */ + +/* { dg-final { scan-assembler-times "mov\tx\[0-9\]+, 25838523252736" 1 } } */ +/* { dg-final { scan-assembler-times "movk\tx\[0-9\]+, 0x40fe, lsl 48" 1 } } */ +/* { dg-final { scan-assembler-times "mov\tx\[0-9\]+, -9223372036854775808" 1 } } */ +/* { dg-final { scan-assembler-times "fmov\td\[0-9\]+, x\[0-9\]+" 2 } } */ + Index: gcc/testsuite/gcc.target/aarch64/f16_mov_immediate_5.c =================================================================== --- /dev/null 2019-07-30 08:53:31.317691683 +0100 +++ gcc/testsuite/gcc.target/aarch64/f16_mov_immediate_5.c 2019-08-07 19:07:38.203739735 +0100 @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-O" } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_ok } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ + +#include "f16_mov_immediate_2.c" + +/* { dg-final { scan-assembler-times "movi\tv\[0-9\]+\\\.4h, ?#0" 1 } } */ +/* { dg-final { scan-assembler-times "movi\tv\[0-9\]+\\\.4h, 0x80, lsl 8" 1 } } */ +/* { dg-final { scan-assembler-times "movi\tv\[0-9\]+\\\.4h, 0x5c, lsl 8" 1 } } */ +/* { dg-final { scan-assembler-times "movi\tv\[0-9\]+\\\.4h, 0x7c, lsl 8" 1 } } */ + +/* { dg-final { scan-assembler-times {fmov\th[0-9]+, #?1.7e\+1} 1 } } */ Index: gcc/testsuite/gcc.target/aarch64/flt_mov_immediate_2.c =================================================================== --- /dev/null 2019-07-30 08:53:31.317691683 +0100 +++ gcc/testsuite/gcc.target/aarch64/flt_mov_immediate_2.c 2019-08-07 19:07:38.203739735 +0100 @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-O" } */ + +#include "flt_mov_immediate_1.c" + +/* { dg-final { scan-assembler-times "movi\tv\[0-9\]+\\\.2s, ?#0" 1 } } */ +/* { dg-final { scan-assembler-times "movi\tv\[0-9\]+\\\.2s, 0x80, lsl 24" 1 } } */ +/* { dg-final { scan-assembler-times "movi\tv\[0-9\]+\\\.2s, 0x80, lsl 24" 1 } } */ + +/* { dg-final { scan-assembler-times "mov\tw\[0-9\]+, 48128" 1 } } */ +/* { dg-final { scan-assembler-times "movk\tw\[0-9\]+, 0x47f0, lsl 16" 1 } } */ + +/* { dg-final { scan-assembler-times "fmov\ts\[0-9\]+, 2\\\.0e\\\+0" 1 } } */ + +/* { dg-final { scan-assembler-times "mov\tw\[0-9\]+, 16435" 1 } } */ +/* { dg-final { scan-assembler-times "movk\tw\[0-9\]+, 0xc69c, lsl 16" 1 } } */