Re: [PATCH v2] AArch64: Block combine_and_move from creating FP literal loads

Wilco Dijkstra Tue, 12 Nov 2024 09:17:00 -0800

Hi Richard,

> The idea was that, if we did the split during expand, the movsf/df
> define_insns would then only accept the immediates that their
> constraints can handle.


Right, always disallowing these immediates works fine too (it seems
reload doesn't require all immediates to be valid), and then the split is
redundant. I've updated the patch:


v2: split during expand, remove movsf/df splitter

The IRA combine_and_move pass runs if the scheduler is disabled and aggressively
combines moves.  The movsf/df patterns allow all FP immediates since they rely
on a split pattern.  However splits do not happen during IRA, so the result is
extra literal loads.  To avoid this, split early during expand and block
creation of FP immediates that need this split.

double f(void) { return 128.0; }

-O2 -fno-schedule-insns gives:

        adrp    x0, .LC0
        ldr     d0, [x0, #:lo12:.LC0]
        ret

After patch:

        mov     x0, 4638707616191610880
        fmov    d0, x0
        ret

Passes bootstrap & regress, OK for commit?

gcc/ChangeLog:
        * config/aarch64/aarch64.md (movhf_aarch64): Use aarch64_valid_fp_move.
        (movsf_aarch64): Likewise.
        (movdf_aarch64): Likewise.
        * config/aarch64/aarch64.cc (aarch64_valid_fp_move): New function.
        * config/aarch64/aarch64-protos.h (aarch64_valid_fp_move): Likewise.

---

diff --git a/gcc/config/aarch64/aarch64-protos.h 
b/gcc/config/aarch64/aarch64-protos.h
index 
05d3258abf7b43342d9058dd1b365a1a0870cdc2..6da81556110c978a9de6f6fad5775c9d77771b10
 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -843,6 +843,7 @@ bool aarch64_advsimd_struct_mode_p (machine_mode mode);
 opt_machine_mode aarch64_vq_mode (scalar_mode);
 opt_machine_mode aarch64_full_sve_mode (scalar_mode);
 bool aarch64_can_const_movi_rtx_p (rtx x, machine_mode mode);
+bool aarch64_valid_fp_move (rtx, rtx, machine_mode);
 bool aarch64_const_vec_all_same_int_p (rtx, HOST_WIDE_INT);
 bool aarch64_const_vec_all_same_in_range_p (rtx, HOST_WIDE_INT,
                                            HOST_WIDE_INT);
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 
00bcf18ae97cea94227c00798b7951daa255d360..ec2d391d1e3eb9bd28f66fb6ee85311b4ced4c94
 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -11175,6 +11175,36 @@ aarch64_can_const_movi_rtx_p (rtx x, machine_mode mode)
   return aarch64_simd_valid_mov_imm (v_op);
 }
 
+/* Return TRUE if DST and SRC with mode MODE is a valid fp move.  */
+bool
+aarch64_valid_fp_move (rtx dst, rtx src, machine_mode mode)
+{
+  if (!TARGET_FLOAT)
+    return false;
+
+  if (aarch64_reg_or_fp_zero (src, mode))
+    return true;
+
+  if (!register_operand (dst, mode))
+    return false;
+
+  if (MEM_P (src))
+    return true;
+
+  if (!DECIMAL_FLOAT_MODE_P (mode))
+    {
+      if (aarch64_can_const_movi_rtx_p (src, mode)
+         || aarch64_float_const_representable_p (src)
+         || aarch64_float_const_zero_rtx_p (src))
+       return true;
+
+      /* Block FP immediates which are split during expand.  */
+      if (aarch64_float_const_rtx_p (src))
+       return false;
+    }
+
+  return can_create_pseudo_p ();
+}
 
 /* Return the fixed registers used for condition codes.  */
 
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 
8d10197c9e8dd66b7a30a1034b629297b9992661..b865ae2ff5e23edc4d8990e1efd4a51dd195f41f
 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -1636,14 +1636,33 @@ (define_expand "mov<mode>"
         && ! (GET_CODE (operands[1]) == CONST_DOUBLE
              && aarch64_float_const_zero_rtx_p (operands[1])))
       operands[1] = force_reg (<MODE>mode, operands[1]);
+
+    if (!DECIMAL_FLOAT_MODE_P (<MODE>mode)
+       && GET_CODE (operands[1]) == CONST_DOUBLE
+       && can_create_pseudo_p ()
+       && !aarch64_can_const_movi_rtx_p (operands[1], <MODE>mode)
+       && !aarch64_float_const_representable_p (operands[1])
+       && !aarch64_float_const_zero_rtx_p (operands[1])
+       &&  aarch64_float_const_rtx_p (operands[1]))
+      {
+       unsigned HOST_WIDE_INT ival;
+       bool res = aarch64_reinterpret_float_as_int (operands[1], &ival);
+       gcc_assert (res);
+
+       machine_mode intmode
+         = int_mode_for_size (GET_MODE_BITSIZE (<MODE>mode), 0).require ();
+       rtx tmp = gen_reg_rtx (intmode);
+       emit_move_insn (tmp, gen_int_mode (ival, intmode));
+       emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
+       DONE;
+      }
   }
 )
 
 (define_insn "*mov<mode>_aarch64"
   [(set (match_operand:HFBF 0 "nonimmediate_operand")
        (match_operand:HFBF 1 "general_operand"))]
-  "TARGET_FLOAT && (register_operand (operands[0], <MODE>mode)
-    || aarch64_reg_or_fp_zero (operands[1], <MODE>mode))"
+  "aarch64_valid_fp_move (operands[0], operands[1], <MODE>mode)"
   {@ [ cons: =0 , 1   ; attrs: type , arch  ]
      [ w        , Y   ; neon_move   , simd  ] movi\t%0.4h, #0
      [ w        , ?rY ; f_mcr       , fp16  ] fmov\t%h0, %w1
@@ -1666,8 +1685,7 @@ (define_insn "*mov<mode>_aarch64"
 (define_insn "*mov<mode>_aarch64"
   [(set (match_operand:SFD 0 "nonimmediate_operand")
        (match_operand:SFD 1 "general_operand"))]
-  "TARGET_FLOAT && (register_operand (operands[0], <MODE>mode)
-    || aarch64_reg_or_fp_zero (operands[1], <MODE>mode))"
+  "aarch64_valid_fp_move (operands[0], operands[1], <MODE>mode)"
   {@ [ cons: =0 , 1   ; attrs: type , arch  ]
      [ w        , Y   ; neon_move   , simd  ] movi\t%0.2s, #0
      [ w        , ?rY ; f_mcr       , *     ] fmov\t%s0, %w1
@@ -1687,8 +1705,7 @@ (define_insn "*mov<mode>_aarch64"
 (define_insn "*mov<mode>_aarch64"
   [(set (match_operand:DFD 0 "nonimmediate_operand")
        (match_operand:DFD 1 "general_operand"))]
-  "TARGET_FLOAT && (register_operand (operands[0], <MODE>mode)
-    || aarch64_reg_or_fp_zero (operands[1], <MODE>mode))"
+  "aarch64_valid_fp_move (operands[0], operands[1], <MODE>mode)"
   {@ [ cons: =0 , 1   ; attrs: type , arch  ]
      [ w        , Y   ; neon_move   , simd  ] movi\t%d0, #0
      [ w        , ?rY ; f_mcr       , *     ] fmov\t%d0, %x1
@@ -1705,27 +1722,6 @@ (define_insn "*mov<mode>_aarch64"
   }
 )
 
-(define_split
-  [(set (match_operand:GPF_HF 0 "nonimmediate_operand")
-       (match_operand:GPF_HF 1 "const_double_operand"))]
-  "can_create_pseudo_p ()
-   && !aarch64_can_const_movi_rtx_p (operands[1], <MODE>mode)
-   && !aarch64_float_const_representable_p (operands[1])
-   && !aarch64_float_const_zero_rtx_p (operands[1])
-   &&  aarch64_float_const_rtx_p (operands[1])"
-  [(const_int 0)]
-  {
-    unsigned HOST_WIDE_INT ival;
-    if (!aarch64_reinterpret_float_as_int (operands[1], &ival))
-      FAIL;
-
-    rtx tmp = gen_reg_rtx (<FCVT_TARGET>mode);
-    emit_move_insn (tmp, gen_int_mode (ival, <FCVT_TARGET>mode));
-    emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
-    DONE;
-  }
-)
-
 (define_insn "*mov<mode>_aarch64"
   [(set (match_operand:TFD 0
         "nonimmediate_operand" "=w,w,?r ,w ,?r,w,?w,w,m,?r,m ,m")

Re: [PATCH v2] AArch64: Block combine_and_move from creating FP literal loads

Reply via email to