Re: [PATCH v2] AArch64: Block combine_and_move from creating FP literal loads

Richard Sandiford Wed, 08 Jan 2025 10:20:14 -0800

Wilco Dijkstra <wilco.dijks...@arm.com> writes:
> Hi Richard,
>
>> ...I still think we should avoid testing can_create_pseudo_p.
>> Does it work with the last part replaced by:
>>
>>  if (!DECIMAL_FLOAT_MODE_P (mode))
>>    {
>>      if (aarch64_can_const_movi_rtx_p (src, mode)
>>          || aarch64_float_const_representable_p (src)
>>          || aarch64_float_const_zero_rtx_p (src))
>>        return true;
>>    }
>>
>>  return false;
>>
>> ?  If so, the patch is OK with that change from my POV, but please wait
>> till Thursday morning for others' opinions.
>
> With that every FP literal load causes an internal error, so they must be 
> allowed.
>
> I could change the can_create_pseudo_p to true. This generates identical code 
> but
> it allows literal loads to be created after regalloc (which should ultimately 
> crash as
> there is no valid alternative).


Sorry for dropping the ball on this.  It was still on the list of TODOs
before end of stage 3, but time got the better of me...

I see you've now pushed the patch.  But I still think the can_create_pseudo_p
test is wrong.  Earlier I said:

  The idea was that, if we did the split during expand, the movsf/df
  define_insns would then only accept the immediates that their
  constraints can handle.

The patch below is what I meant.  It passes bootstrap & regression-test
on aarch64-linux-gnu (and so produces the same results for the tests
that you changed).  Do you see any problems with this version?
If not, I think we should go with it.

Thanks,
Richard


This patch reverts the changes to the define_insns in
g:45d306a835cb3f865a897dc7c04efbe1f9f46c28.  Instead it
forces invalid and unsplittable constants to memory during expansion
and tightens the predicates to prevent multi-insn constants from
being accepted later.

There are three reasons for this:

(1) The previous approach made the define_insns somewhat
    conditional on can_create_pseudo_p.  That seems like an ICE trap,
    since it means that existing already-recognised insns can go from
    being valid to invalid.

(2) Exposing the memory reference earlier should give RTL optimisers
    a better picture of what's going on, rather than leaving RA to
    turn an immediate move into a load.

(3) It seems unlikely that RTL optimisations would be able to expose
    more FP constants than gimple, so there shouldn't be much benefit
    to gradual lowering.

gcc/
        * config/aarch64/aarch64-protos.h (aarch64_valid_fp_move): Delete.
        (aarch64_mov_fp_immediate_p): Declare.
        * config/aarch64/aarch64.cc (aarch64_valid_fp_move): Delete.
        (aarch64_mov_fp_immediate_p): New function.
        * config/aarch64/predicates.md (aarch64_mov_operand): Use multi-operand
        ior.
        (aarch64_mov_fp_operand): New predicate.
        * config/aarch64/aarch64.md (mov<mode>): Use it when testing
        whether to expand an FP move as an integer move.  Fall back
        to forcing illegitimate FP immediates into memory.
        (*mov<mode>_aarch64): Restore the original C++ guards for the FP
        move patterns and use aarch64_mov_fp_operand as the source predicate.
---
 gcc/config/aarch64/aarch64-protos.h |  2 +-
 gcc/config/aarch64/aarch64.cc       | 40 ++++++----------------
 gcc/config/aarch64/aarch64.md       | 52 +++++++++++++++++------------
 gcc/config/aarch64/predicates.md    | 10 ++++--
 4 files changed, 49 insertions(+), 55 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-protos.h 
b/gcc/config/aarch64/aarch64-protos.h
index fa7bc8029be..18b89b805b1 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -857,7 +857,6 @@ opt_machine_mode aarch64_v64_mode (scalar_mode);
 opt_machine_mode aarch64_v128_mode (scalar_mode);
 opt_machine_mode aarch64_full_sve_mode (scalar_mode);
 bool aarch64_can_const_movi_rtx_p (rtx x, machine_mode mode);
-bool aarch64_valid_fp_move (rtx, rtx, machine_mode);
 bool aarch64_const_vec_all_same_int_p (rtx, HOST_WIDE_INT);
 bool aarch64_const_vec_all_same_in_range_p (rtx, HOST_WIDE_INT,
                                            HOST_WIDE_INT);
@@ -916,6 +915,7 @@ char *aarch64_output_rdsvl (const_rtx);
 bool aarch64_addsvl_addspl_immediate_p (const_rtx);
 char *aarch64_output_addsvl_addspl (rtx);
 bool aarch64_mov_operand_p (rtx, machine_mode);
+bool aarch64_mov_fp_immediate_p (rtx);
 rtx aarch64_reverse_mask (machine_mode, unsigned int);
 bool aarch64_offset_7bit_signed_scaled_p (machine_mode, poly_int64);
 bool aarch64_offset_9bit_signed_unscaled_p (machine_mode, poly_int64);
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 78d2cc4bbe4..ee304316072 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -11309,36 +11309,6 @@ aarch64_can_const_movi_rtx_p (rtx x, machine_mode mode)
   return aarch64_simd_valid_mov_imm (v_op);
 }
 
-/* Return TRUE if DST and SRC with mode MODE is a valid fp move.  */
-bool
-aarch64_valid_fp_move (rtx dst, rtx src, machine_mode mode)
-{
-  if (!TARGET_FLOAT)
-    return false;
-
-  if (aarch64_reg_or_fp_zero (src, mode))
-    return true;
-
-  if (!register_operand (dst, mode))
-    return false;
-
-  if (MEM_P (src))
-    return true;
-
-  if (!DECIMAL_FLOAT_MODE_P (mode))
-    {
-      if (aarch64_can_const_movi_rtx_p (src, mode)
-         || aarch64_float_const_representable_p (src)
-         || aarch64_float_const_zero_rtx_p (src))
-       return true;
-
-      /* Block FP immediates which are split during expand.  */
-      if (aarch64_float_const_rtx_p (src))
-       return false;
-    }
-
-  return can_create_pseudo_p ();
-}
 
 /* Return the fixed registers used for condition codes.  */
 
@@ -23722,6 +23692,16 @@ aarch64_mov_operand_p (rtx x, machine_mode mode)
     == SYMBOL_TINY_ABSOLUTE;
 }
 
+/* Return true if X is a suitable constant for a single FP move.  */
+
+bool
+aarch64_mov_fp_immediate_p (rtx x)
+{
+  return (aarch64_can_const_movi_rtx_p (x, GET_MODE (x))
+         || aarch64_float_const_representable_p (x)
+         || aarch64_float_const_zero_rtx_p (x));
+}
+
 /* Return a function-invariant register that contains VALUE.  *CACHED_INSN
    caches instructions that set up such registers, so that they can be
    reused by future calls.  */
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 0ed3c93b379..c59da6a694a 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -1761,32 +1761,36 @@ (define_expand "mov<mode>"
              && aarch64_float_const_zero_rtx_p (operands[1])))
       operands[1] = force_reg (<MODE>mode, operands[1]);
 
-    if (!DECIMAL_FLOAT_MODE_P (<MODE>mode)
-       && GET_CODE (operands[1]) == CONST_DOUBLE
-       && can_create_pseudo_p ()
-       && !aarch64_can_const_movi_rtx_p (operands[1], <MODE>mode)
-       && !aarch64_float_const_representable_p (operands[1])
-       && !aarch64_float_const_zero_rtx_p (operands[1])
-       &&  aarch64_float_const_rtx_p (operands[1]))
+    if (GET_CODE (operands[1]) == CONST_DOUBLE
+       && !aarch64_mov_fp_immediate_p (operands[1]))
       {
-       unsigned HOST_WIDE_INT ival;
-       bool res = aarch64_reinterpret_float_as_int (operands[1], &ival);
-       gcc_assert (res);
+       if (can_create_pseudo_p ()
+           && aarch64_float_const_rtx_p (operands[1]))
+         {
+           unsigned HOST_WIDE_INT ival;
+           bool res = aarch64_reinterpret_float_as_int (operands[1], &ival);
+           gcc_assert (res);
+
+           auto bitsize = GET_MODE_BITSIZE (<MODE>mode);
+           auto intmode = int_mode_for_size (bitsize, 0).require ();
+           rtx tmp = gen_reg_rtx (intmode);
+           emit_move_insn (tmp, gen_int_mode (ival, intmode));
+           emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
+           DONE;
+         }
 
-       machine_mode intmode
-         = int_mode_for_size (GET_MODE_BITSIZE (<MODE>mode), 0).require ();
-       rtx tmp = gen_reg_rtx (intmode);
-       emit_move_insn (tmp, gen_int_mode (ival, intmode));
-       emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
-       DONE;
+       rtx mem = force_const_mem (<MODE>mode, operands[1]);
+       operands[1] = validize_mem (mem);
       }
   }
 )
 
 (define_insn "*mov<mode>_aarch64"
   [(set (match_operand:HFBF 0 "nonimmediate_operand")
-       (match_operand:HFBF 1 "general_operand"))]
-  "aarch64_valid_fp_move (operands[0], operands[1], <MODE>mode)"
+       (match_operand:HFBF 1 "aarch64_mov_fp_operand"))]
+  "TARGET_FLOAT
+   && (register_operand (operands[0], <MODE>mode)
+       || aarch64_reg_or_fp_zero (operands[1], <MODE>mode))"
   {@ [ cons: =0 , 1   ; attrs: type , arch  ]
      [ w        , Y   ; neon_move   , simd  ] movi\t%0.4h, #0
      [ w        , ?rY ; f_mcr       , fp16  ] fmov\t%h0, %w1
@@ -1808,8 +1812,10 @@ (define_insn "*mov<mode>_aarch64"
 
 (define_insn "*mov<mode>_aarch64"
   [(set (match_operand:SFD 0 "nonimmediate_operand")
-       (match_operand:SFD 1 "general_operand"))]
-  "aarch64_valid_fp_move (operands[0], operands[1], <MODE>mode)"
+       (match_operand:SFD 1 "aarch64_mov_fp_operand"))]
+  "TARGET_FLOAT
+   && (register_operand (operands[0], <MODE>mode)
+       || aarch64_reg_or_fp_zero (operands[1], <MODE>mode))"
   {@ [ cons: =0 , 1   ; attrs: type , arch  ]
      [ w        , Y   ; neon_move   , simd  ] movi\t%0.2s, #0
      [ w        , ?rY ; f_mcr       , *     ] fmov\t%s0, %w1
@@ -1828,8 +1834,10 @@ (define_insn "*mov<mode>_aarch64"
 
 (define_insn "*mov<mode>_aarch64"
   [(set (match_operand:DFD 0 "nonimmediate_operand")
-       (match_operand:DFD 1 "general_operand"))]
-  "aarch64_valid_fp_move (operands[0], operands[1], <MODE>mode)"
+       (match_operand:DFD 1 "aarch64_mov_fp_operand"))]
+  "TARGET_FLOAT
+   && (register_operand (operands[0], <MODE>mode)
+       || aarch64_reg_or_fp_zero (operands[1], <MODE>mode))"
   {@ [ cons: =0 , 1   ; attrs: type , arch  ]
      [ w        , Y   ; neon_move   , simd  ] movi\t%d0, #0
      [ w        , ?rY ; f_mcr       , *     ] fmov\t%d0, %x1
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index 1ab1c696c62..b7886e38023 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -410,8 +410,14 @@ (define_predicate "aarch64_mov_operand"
   (and (match_code "reg,subreg,mem,const,const_int,symbol_ref,label_ref,high,
                    const_poly_int,const_vector")
        (ior (match_operand 0 "register_operand")
-           (ior (match_operand 0 "memory_operand")
-                (match_test "aarch64_mov_operand_p (op, mode)")))))
+           (match_operand 0 "memory_operand")
+           (match_test "aarch64_mov_operand_p (op, mode)"))))
+
+(define_predicate "aarch64_mov_fp_operand"
+  (ior (match_operand 0 "register_operand")
+       (match_operand 0 "memory_operand")
+       (and (match_code "const_double")
+           (match_test "aarch64_mov_fp_immediate_p (op)"))))
 
 (define_predicate "aarch64_nonmemory_operand"
   (and (match_code "reg,subreg,const,const_int,symbol_ref,label_ref,high,
-- 
2.25.1

Re: [PATCH v2] AArch64: Block combine_and_move from creating FP literal loads

Reply via email to