We would like to implement the following to store a single-precision FP
constant in a hardware FP register:

- Load the bit-exact integer image of the pooled single-precision FP
  constant into an address (integer) register
- Then, assign from that address register to a hardware single-precision
  FP register

        .literal_position
        .literal        .LC1, 0x3f800000
...
        l32r    a9, .LC1
        wfr     f0, a9

However, it was emitted as follows:

- Load the address of the FP constant entry in litpool into an address
  register
- Then, dereference the address via that address register into a hardware
  single-precision FP register

        .literal_position
        .literal        .LC1, 0x3f800000
        .literal        .LC2, .LC1
...
        l32r    a9, .LC2
        lsi     f0, a9, 0

It is obviously inefficient to read the pool twice.

gcc/ChangeLog:

        * config/xtensa/xtensa.md (movsf_internal):
        Reorder alternative that corresponds to L32R machine instruction,
        and prefix alternatives that correspond to LSI/SSI instructions
        with the constraint character '^' so that they are disparaged by
        reload/LRA.
From a552e4fca21ff9a0c7a5327dd15ccdada36930c1 Mon Sep 17 00:00:00 2001
From: Takayuki 'January June' Suwa <jjsuwa_sys3...@yahoo.co.jp>
Date: Tue, 23 Jul 2024 16:03:12 +0900
Subject: [PATCH 1/2] xtensa: Fix suboptimal loading of pooled constant value
 into hardware single-precision FP register

We would like to implement the following to store a single-precision FP
constant in a hardware FP register:

- Load the bit-exact integer image of the pooled single-precision FP
  constant into an address (integer) register
- Then, assign from that address register to a hardware single-precision
  FP register

        .literal_position
        .literal        .LC1, 0x3f800000
...
        l32r    a9, .LC1
        wfr     f0, a9

However, it was emitted as follows:

- Load the address of the FP constant entry in litpool into an address
  register
- Then, dereference the address via that address register into a hardware
  single-precision FP register

        .literal_position
        .literal        .LC1, 0x3f800000
        .literal        .LC2, .LC1
...
        l32r    a9, .LC2
        lsi     f0, a9, 0

It is obviously inefficient to read the pool twice.

gcc/ChangeLog:

        * config/xtensa/xtensa.md (movsf_internal):
        Reorder alternative that corresponds to L32R machine instruction,
        and prefix alternatives that correspond to LSI/SSI instructions
        with the constraint character '^' so that they are disparaged by
        reload/LRA.
---
 gcc/config/xtensa/xtensa.md | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
index a3b99dc381d..f19e1fd16b5 100644
--- a/gcc/config/xtensa/xtensa.md
+++ b/gcc/config/xtensa/xtensa.md
@@ -1453,8 +1453,8 @@
 })
 
 (define_insn "movsf_internal"
-  [(set (match_operand:SF 0 "nonimmed_operand" "=f,f,U,D,D,R,a,f,a,a,W,a,a,U")
-       (match_operand:SF 1 "move_operand" "f,U,f,d,R,d,r,r,f,Y,iF,T,U,r"))]
+  [(set (match_operand:SF 0 "nonimmed_operand" "=f,f,^U,D,a,D,R,a,f,a,a,W,a,U")
+       (match_operand:SF 1 "move_operand" "f,^U,f,d,T,R,d,r,r,f,Y,iF,U,r"))]
   "((register_operand (operands[0], SFmode)
      || register_operand (operands[1], SFmode))
     && !(FP_REG_P (xt_true_regnum (operands[0]))
@@ -1464,6 +1464,7 @@
    %v1lsi\t%0, %1
    %v0ssi\t%1, %0
    mov.n\t%0, %1
+   %v1l32r\t%0, %1
    %v1l32i.n\t%0, %1
    %v0s32i.n\t%1, %0
    mov\t%0, %1
@@ -1471,12 +1472,11 @@
    rfr\t%0, %1
    movi\t%0, %y1
    const16\t%0, %t1\;const16\t%0, %b1
-   %v1l32r\t%0, %1
    %v1l32i\t%0, %1
    %v0s32i\t%1, %0"
-  [(set_attr "type"    
"farith,fload,fstore,move,load,store,move,farith,farith,move,move,load,load,store")
+  [(set_attr "type"    
"farith,fload,fstore,move,load,load,store,move,farith,farith,move,move,load,store")
    (set_attr "mode"    "SF")
-   (set_attr "length"  "3,3,3,2,2,2,3,3,3,3,6,3,3,3")])
+   (set_attr "length"  "3,3,3,2,3,2,2,3,3,3,3,6,3,3")])
 
 (define_insn "*lsiu"
   [(set (match_operand:SF 0 "register_operand" "=f")
-- 
2.39.2

Reply via email to