https://gcc.gnu.org/g:df20aac4c6662ef58e7577501ca9bb3c8d4ae507

commit r16-2317-gdf20aac4c6662ef58e7577501ca9bb3c8d4ae507
Author: Stefan Schulze Frielinghaus <stefa...@gcc.gnu.org>
Date:   Thu Jul 17 12:44:06 2025 +0200

    s390: Adapt GPR<->VR costs
    
    Moving between GPRs and VRs in any mode with size less than or equal to
    8 bytes becomes available with vector extensions.  Without adapting
    costs for those loads, we typically go over memory.
    
    gcc/ChangeLog:
    
            * config/s390/s390.cc (s390_register_move_cost): Add costing for
            vlvg/vlgv.

Diff:
---
 gcc/config/s390/s390.cc | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
index 737b176766a2..b5e636c53767 100644
--- a/gcc/config/s390/s390.cc
+++ b/gcc/config/s390/s390.cc
@@ -3862,7 +3862,21 @@ s390_register_move_cost (machine_mode mode,
 {
   /* On s390, copy between fprs and gprs is expensive.  */
 
-  /* It becomes somewhat faster having ldgr/lgdr.  */
+  /* With vector extensions any GPR<->VR load up to 8 bytes is supported.  */
+  if (TARGET_VX && GET_MODE_SIZE (mode) <= 8)
+    {
+      /* ldgr/vlvgg take one cycle and vlvg[bhf] take two cycles. */
+      if (reg_classes_intersect_p (from, GENERAL_REGS)
+         && reg_classes_intersect_p (to, VEC_REGS))
+       return GET_MODE_SIZE (mode) == 8 ? 1 : 2;
+      /* lgdr/vlgv[fg] take three cycles and vlgv[bh] take five cycles. */
+      if (reg_classes_intersect_p (to, GENERAL_REGS)
+         && reg_classes_intersect_p (from, VEC_REGS))
+       return GET_MODE_SIZE (mode) >= 4 ? 3 : 4;
+    }
+
+  /* Without vector extensions it still becomes somewhat faster having
+     ldgr/lgdr.  */
   if (TARGET_Z10 && GET_MODE_SIZE (mode) == 8)
     {
       /* ldgr is single cycle. */

Reply via email to