https://gcc.gnu.org/g:df20aac4c6662ef58e7577501ca9bb3c8d4ae507
commit r16-2317-gdf20aac4c6662ef58e7577501ca9bb3c8d4ae507 Author: Stefan Schulze Frielinghaus <stefa...@gcc.gnu.org> Date: Thu Jul 17 12:44:06 2025 +0200 s390: Adapt GPR<->VR costs Moving between GPRs and VRs in any mode with size less than or equal to 8 bytes becomes available with vector extensions. Without adapting costs for those loads, we typically go over memory. gcc/ChangeLog: * config/s390/s390.cc (s390_register_move_cost): Add costing for vlvg/vlgv. Diff: --- gcc/config/s390/s390.cc | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc index 737b176766a2..b5e636c53767 100644 --- a/gcc/config/s390/s390.cc +++ b/gcc/config/s390/s390.cc @@ -3862,7 +3862,21 @@ s390_register_move_cost (machine_mode mode, { /* On s390, copy between fprs and gprs is expensive. */ - /* It becomes somewhat faster having ldgr/lgdr. */ + /* With vector extensions any GPR<->VR load up to 8 bytes is supported. */ + if (TARGET_VX && GET_MODE_SIZE (mode) <= 8) + { + /* ldgr/vlvgg take one cycle and vlvg[bhf] take two cycles. */ + if (reg_classes_intersect_p (from, GENERAL_REGS) + && reg_classes_intersect_p (to, VEC_REGS)) + return GET_MODE_SIZE (mode) == 8 ? 1 : 2; + /* lgdr/vlgv[fg] take three cycles and vlgv[bh] take five cycles. */ + if (reg_classes_intersect_p (to, GENERAL_REGS) + && reg_classes_intersect_p (from, VEC_REGS)) + return GET_MODE_SIZE (mode) >= 4 ? 3 : 4; + } + + /* Without vector extensions it still becomes somewhat faster having + ldgr/lgdr. */ if (TARGET_Z10 && GET_MODE_SIZE (mode) == 8) { /* ldgr is single cycle. */