https://gcc.gnu.org/g:cb0d29db88e367a709f32e49bfa540e54db02f82
commit r16-7122-gcb0d29db88e367a709f32e49bfa540e54db02f82 Author: Robin Dapp <[email protected]> Date: Mon Jan 26 17:59:58 2026 +0100 RISC-V: Fix ABI vector passing on stack and GPR [PR123824]. Krister reported that we violate the psABI when one vector argument halfway fits into a register: "Aggregates whose total size is no more than 2×XLEN bits are passed in a pair of registers; if only one register is available, the first XLEN bits are passed in a register and the remaining bits are passed on the stack. If no registers are available, the aggregate is passed on the stack." This patch fixes this oversight and adds a few tests. Regtested on rv64gcv_zvl512b. PR target/123824 gcc/ChangeLog: * config/riscv/riscv.cc (riscv_vls_mode_fits_in_gprs_p): New helper. (riscv_pass_vls_aggregate_in_gpr): Use helper and distribute half-fitting vector to GPR and stack. (riscv_pass_aggregate_in_vr): Reformat comment. (riscv_get_arg_info): Use helper. (riscv_pass_by_reference): Ditto. gcc/testsuite/ChangeLog: * gcc.target/riscv/abi/vls-gpr-1.c: New test. * gcc.target/riscv/abi/vls-gpr-10.c: New test. * gcc.target/riscv/abi/vls-gpr-11.c: New test. * gcc.target/riscv/abi/vls-gpr-12.c: New test. * gcc.target/riscv/abi/vls-gpr-13.c: New test. * gcc.target/riscv/abi/vls-gpr-14.c: New test. * gcc.target/riscv/abi/vls-gpr-2.c: New test. * gcc.target/riscv/abi/vls-gpr-3.c: New test. * gcc.target/riscv/abi/vls-gpr-4.c: New test. * gcc.target/riscv/abi/vls-gpr-5.c: New test. * gcc.target/riscv/abi/vls-gpr-6.c: New test. * gcc.target/riscv/abi/vls-gpr-7.c: New test. * gcc.target/riscv/abi/vls-gpr-8.c: New test. * gcc.target/riscv/abi/vls-gpr-9.c: New test. Diff: --- gcc/config/riscv/riscv.cc | 75 +++++++++++++++++-------- gcc/testsuite/gcc.target/riscv/abi/vls-gpr-1.c | 15 +++++ gcc/testsuite/gcc.target/riscv/abi/vls-gpr-10.c | 17 ++++++ gcc/testsuite/gcc.target/riscv/abi/vls-gpr-11.c | 15 +++++ gcc/testsuite/gcc.target/riscv/abi/vls-gpr-12.c | 19 +++++++ gcc/testsuite/gcc.target/riscv/abi/vls-gpr-13.c | 19 +++++++ gcc/testsuite/gcc.target/riscv/abi/vls-gpr-14.c | 12 ++++ gcc/testsuite/gcc.target/riscv/abi/vls-gpr-2.c | 15 +++++ gcc/testsuite/gcc.target/riscv/abi/vls-gpr-3.c | 16 ++++++ gcc/testsuite/gcc.target/riscv/abi/vls-gpr-4.c | 19 +++++++ gcc/testsuite/gcc.target/riscv/abi/vls-gpr-5.c | 21 +++++++ gcc/testsuite/gcc.target/riscv/abi/vls-gpr-6.c | 12 ++++ gcc/testsuite/gcc.target/riscv/abi/vls-gpr-7.c | 14 +++++ gcc/testsuite/gcc.target/riscv/abi/vls-gpr-8.c | 15 +++++ gcc/testsuite/gcc.target/riscv/abi/vls-gpr-9.c | 17 ++++++ 15 files changed, 279 insertions(+), 22 deletions(-) diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index 9f7109bc25a4..32e72128f049 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -6448,6 +6448,17 @@ riscv_pass_fpr_pair (machine_mode mode, unsigned regno1, GEN_INT (offset2)))); } +/* Return true if VLS mode MODE fits in general purpose registers per the + psABI. The psABI allows aggregates up to 2 * XLEN bits to be passed in + GPRs. */ + +static bool +riscv_vls_mode_fits_in_gprs_p (machine_mode mode) +{ + return riscv_vls_mode_p (mode) + && known_le (GET_MODE_SIZE (mode), 2 * UNITS_PER_WORD); +} + static rtx riscv_pass_vls_aggregate_in_gpr (struct riscv_arg_info *info, machine_mode mode, unsigned gpr_base) @@ -6460,16 +6471,29 @@ riscv_pass_vls_aggregate_in_gpr (struct riscv_arg_info *info, machine_mode mode, unsigned vls_size = GET_MODE_SIZE (mode).to_constant (); unsigned gpr_size = GET_MODE_SIZE (Xmode); - if (IN_RANGE (vls_size, 0, gpr_size * 2)) + if (riscv_vls_mode_fits_in_gprs_p (mode)) { count = riscv_v_vls_mode_aggregate_gpr_count (vls_size, gpr_size); + unsigned gprs_left = MAX_ARGS_IN_REGISTERS - info->gpr_offset; - if (count + info->gpr_offset <= MAX_ARGS_IN_REGISTERS) + if (count <= gprs_left) { + /* Entire VLS fits in remaining GPRs. */ regnum = gpr_base + info->gpr_offset; info->num_gprs = count; gpr_mode = riscv_v_vls_to_gpr_mode (vls_size); } + else if (gprs_left > 0) + { + /* Per the psABI, split between GPRs and stack: + "if only one register is available, the first XLEN bits are + passed in a register and the remaining bits are passed on + the stack." */ + regnum = gpr_base + info->gpr_offset; + info->num_gprs = gprs_left; + info->stack_p = true; + gpr_mode = Xmode; + } } if (!regnum) @@ -6816,11 +6840,12 @@ riscv_pass_aggregate_in_vr (struct riscv_arg_info *info, /* Fill INFO with information about a single argument, and return an RTL pattern to pass or return the argument. Return NULL_RTX if argument cannot - pass or return in registers, then the argument may be passed by reference or - through the stack or . CUM is the cumulative state for earlier arguments. - MODE is the mode of this argument and TYPE is its type (if known). NAMED is - true if this is a named (fixed) argument rather than a variable one. RETURN_P - is true if returning the argument, or false if passing the argument. */ + pass or return in registers, then the argument may be passed by reference + or through the stack. CUM is the cumulative state for earlier arguments. + MODE is the mode of this argument and TYPE is its type (if known). + NAMED is true if this is a named (fixed) argument rather than a variable + one. RETURN_P is true if returning the argument, or false if passing + the argument. */ static rtx riscv_get_arg_info (struct riscv_arg_info *info, const CUMULATIVE_ARGS *cum, @@ -6836,8 +6861,10 @@ riscv_get_arg_info (struct riscv_arg_info *info, const CUMULATIVE_ARGS *cum, info->gpr_offset = cum->num_gprs; info->fpr_offset = cum->num_fprs; - /* Passed by reference when the scalable vector argument is anonymous. */ - if (riscv_vector_mode_p (mode) && !named) + /* Passed by reference when the scalable vector argument is anonymous. + VLS modes <= 2*XLEN follow regular aggregate rules per the psABI. */ + if (riscv_vector_mode_p (mode) && !named + && !riscv_vls_mode_fits_in_gprs_p (mode)) return NULL_RTX; if (named) @@ -6929,13 +6956,19 @@ riscv_get_arg_info (struct riscv_arg_info *info, const CUMULATIVE_ARGS *cum, if (!named && num_bytes != 0 && alignment > BITS_PER_WORD) info->gpr_offset += info->gpr_offset & 1; - /* Partition the argument between registers and stack. */ - info->num_fprs = 0; - info->num_gprs = MIN (num_words, MAX_ARGS_IN_REGISTERS - info->gpr_offset); - info->stack_p = (num_words - info->num_gprs) != 0; + if (riscv_vls_mode_p (mode)) + return riscv_pass_vls_aggregate_in_gpr (info, mode, gpr_base); + else + { + /* Partition the argument between registers and stack. */ + info->num_fprs = 0; + info->num_gprs + = MIN (num_words, MAX_ARGS_IN_REGISTERS - info->gpr_offset); + info->stack_p = (num_words - info->num_gprs) != 0; - if (info->num_gprs || return_p) - return gen_rtx_REG (mode, gpr_base + info->gpr_offset); + if (info->num_gprs || return_p) + return gen_rtx_REG (mode, gpr_base + info->gpr_offset); + } return NULL_RTX; } @@ -7065,19 +7098,17 @@ riscv_pass_by_reference (cumulative_args_t cum_v, const function_arg_info &arg) if (info.num_fprs) return false; - /* Don't pass by reference if we can use general register(s) for vls. */ - if (info.num_gprs && riscv_vls_mode_p (arg.mode)) - return false; - /* Don't pass by reference if we can use vector register groups. */ if (info.num_vrs > 0 || info.num_mrs > 0) return false; } /* Passed by reference when: - 1. The scalable vector argument is anonymous. - 2. Args cannot be passed through vector registers. */ - if (riscv_vector_mode_p (arg.mode)) + (1) The scalable vector argument is anonymous. + (2) Args cannot be passed through vector registers. + VLS modes <= 2*XLEN follow regular aggregate rules per the psABI. */ + if (riscv_vector_mode_p (arg.mode) + && !riscv_vls_mode_fits_in_gprs_p (arg.mode)) return true; /* Pass by reference if the data do not fit in two integer registers. */ diff --git a/gcc/testsuite/gcc.target/riscv/abi/vls-gpr-1.c b/gcc/testsuite/gcc.target/riscv/abi/vls-gpr-1.c new file mode 100644 index 000000000000..9d663acfc71c --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/abi/vls-gpr-1.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O2" } */ + +/* Test that small VLS types (<= 2*XLEN = 128 bits) are passed in GPRs + per the psABI, not by reference. */ + +typedef int __attribute__((vector_size(8))) v2si; + +v2si test_vls_in_gpr (int a0, int a1, v2si a2) +{ + return a2; +} + +/* The 8-byte VLS vector should be passed in a2 and returned in a0. */ +/* { dg-final { scan-assembler-times "mv\ta0,a2" 1 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/abi/vls-gpr-10.c b/gcc/testsuite/gcc.target/riscv/abi/vls-gpr-10.c new file mode 100644 index 000000000000..42a18f4f3337 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/abi/vls-gpr-10.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gcv -mabi=ilp32d -O2" } */ + +/* Test that VLS types > 2*XLEN (> 64 bits for rv32) are passed + by reference, not in GPRs. */ + +typedef int __attribute__((vector_size(16))) v4si; + +v4si test_vls_by_reference_rv32 (int a0, v4si a1) +{ + return a1; +} + +/* The 16-byte VLS vector should be passed by reference on rv32 + (since 16 bytes > 2*4 = 8 bytes). */ +/* { dg-final { scan-assembler "vle32.v\tv\[0-9\]+,0\\(a2\\)" } } */ +/* { dg-final { scan-assembler "vse32.v\tv\[0-9\]+,0\\(a0\\)" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/abi/vls-gpr-11.c b/gcc/testsuite/gcc.target/riscv/abi/vls-gpr-11.c new file mode 100644 index 000000000000..741b0b838349 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/abi/vls-gpr-11.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O2" } */ + +/* Test return value: small VLS types should be returned in GPRs. */ + +typedef int __attribute__((vector_size(8))) v2si; + +v2si make_vls (void) +{ + v2si v = {1, 2}; + return v; +} + +/* The return value should use a0. */ +/* { dg-final { scan-assembler "ld\ta0," } } */ diff --git a/gcc/testsuite/gcc.target/riscv/abi/vls-gpr-12.c b/gcc/testsuite/gcc.target/riscv/abi/vls-gpr-12.c new file mode 100644 index 000000000000..da068821e55d --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/abi/vls-gpr-12.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gcv -mabi=ilp32d -O2" } */ + +/* Test that 8-byte VLS is split between GPR and stack when only + one GPR is available on rv32. Per the psABI: "if only one register + is available, the first XLEN bits are passed in a register and the + remaining bits are passed on the stack." */ + +typedef int __attribute__((vector_size(8))) v2si; + +v2si test_vls_gpr_stack_split_rv32 (int a0, int a1, int a2, int a3, + int a4, int a5, int a6, v2si a7) +{ + return a7; +} + +/* a0-a6 use 7 GPRs, leaving only a7. The 8-byte VLS should be + split: first 4 bytes in a7, remaining 4 bytes on stack. */ +/* { dg-final { scan-assembler "sw\ta7," } } */ diff --git a/gcc/testsuite/gcc.target/riscv/abi/vls-gpr-13.c b/gcc/testsuite/gcc.target/riscv/abi/vls-gpr-13.c new file mode 100644 index 000000000000..8bc1c58dfb07 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/abi/vls-gpr-13.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gcv -mabi=ilp32d -O2" } */ + +/* Test stack split with two 8-byte VLS arguments on rv32. + First VLS splits between a7 and stack, second is fully on stack. */ + +typedef int __attribute__((vector_size(8))) v2si; + +v2si test_vls_gpr_stack_split2_rv32 (int a0, int a1, int a2, int a3, + int a4, int a5, int a6, v2si a7, + v2si a8) +{ + v2si res = a7 + a8; + return res; +} + +/* a7 splits (4 bytes in a7, 4 on stack), a8 fully on stack. */ +/* { dg-final { scan-assembler "sw\ta7," } } */ +/* { dg-final { scan-assembler "lw\t\[at\]\[0-9\]+,\[0-9\]+\\(sp\\)" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/abi/vls-gpr-14.c b/gcc/testsuite/gcc.target/riscv/abi/vls-gpr-14.c new file mode 100644 index 000000000000..72b241670456 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/abi/vls-gpr-14.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gcv -mabi=ilp32d -O2" } */ + +typedef int v2si __attribute__ ((vector_size (8))); +int test (int accumulator, int dummy, v2si v1, v2si v2, v2si v3, v2si v4) +{ + accumulator &= v4[0] & v4[1]; + return accumulator; +} + +/* v4 should be passed on the stack. */ +/* { dg-final { scan-assembler "vle32.v\tv\[0-9\]+,0\\(sp\\)" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/abi/vls-gpr-2.c b/gcc/testsuite/gcc.target/riscv/abi/vls-gpr-2.c new file mode 100644 index 000000000000..e8e1efa6cdcb --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/abi/vls-gpr-2.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O2" } */ + +/* Test that 16-byte VLS types (= 2*XLEN for rv64) are passed in 2 GPRs. */ + +typedef long __attribute__((vector_size(16))) v2di; + +v2di test_vls_two_gprs (int a0, v2di a1) +{ + return a1; +} + +/* The 16-byte VLS vector is passed in a1,a2 and returned in a0,a1. */ +/* { dg-final { scan-assembler "sd\ta1," } } */ +/* { dg-final { scan-assembler "sd\ta2," } } */ diff --git a/gcc/testsuite/gcc.target/riscv/abi/vls-gpr-3.c b/gcc/testsuite/gcc.target/riscv/abi/vls-gpr-3.c new file mode 100644 index 000000000000..f28a6e33e52f --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/abi/vls-gpr-3.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O2" } */ + +/* Test that small VLS types are passed in GPRs even when filling + the argument registers. */ + +typedef int __attribute__((vector_size(8))) v2si; + +v2si test_vls_multiple_gprs (int a0, int a1, int a2, int a3, + int a4, int a5, v2si a6) +{ + return a6; +} + +/* a0-a5 are used by ints, the 8-byte VLS should use a6. */ +/* { dg-final { scan-assembler-times "mv\ta0,a6" 1 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/abi/vls-gpr-4.c b/gcc/testsuite/gcc.target/riscv/abi/vls-gpr-4.c new file mode 100644 index 000000000000..2595f4019571 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/abi/vls-gpr-4.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O2" } */ + +/* Test that 16-byte VLS is split between GPR and stack when only + one GPR is available. Per the psABI: "if only one register is + available, the first XLEN bits are passed in a register and the + remaining bits are passed on the stack." */ + +typedef long __attribute__((vector_size(16))) v2di; + +v2di test_vls_gpr_stack_split (int a0, int a1, int a2, int a3, + int a4, int a5, int a6, v2di a7) +{ + return a7; +} + +/* a0-a6 use 7 GPRs, leaving only a7. The 16-byte VLS should be + split: first 8 bytes in a7, remaining 8 bytes on stack. */ +/* { dg-final { scan-assembler "sd\ta7," } } */ diff --git a/gcc/testsuite/gcc.target/riscv/abi/vls-gpr-5.c b/gcc/testsuite/gcc.target/riscv/abi/vls-gpr-5.c new file mode 100644 index 000000000000..da24e1fec61e --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/abi/vls-gpr-5.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O2" } */ + +/* Test that 16-byte VLS is split between GPR and stack when only + one GPR is available. Per the psABI: "if only one register is + available, the first XLEN bits are passed in a register and the + remaining bits are passed on the stack." */ + +typedef long __attribute__((vector_size(16))) v2di; + +v2di test_vls_gpr_stack_split2 (int a0, int a1, int a2, int a3, + int a4, int a5, int a6, v2di a7, + v2di a8) +{ + v2di res = a7 + a8; + return res; +} + +/* a0-a6 use 7 GPRs, leaving only a7. The 16-byte VLS should be + split: first 8 bytes in a7, remaining 8 bytes on stack. */ +/* { dg-final { scan-assembler "sd\ta7," } } */ diff --git a/gcc/testsuite/gcc.target/riscv/abi/vls-gpr-6.c b/gcc/testsuite/gcc.target/riscv/abi/vls-gpr-6.c new file mode 100644 index 000000000000..3fd6d247d333 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/abi/vls-gpr-6.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O2" } */ + +typedef int v4si __attribute__ ((vector_size (16))); +int test (int accumulator, int dummy, v4si v1, v4si v2, v4si v3, v4si v4) +{ + accumulator &= v4[0] & v4[1] & v4[2] & v4[3]; + return accumulator; +} + +/* v4 should be passed on the stack. */ +/* { dg-final { scan-assembler "vle32.v\tv\[0-9\]+,0\\(sp\\)" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/abi/vls-gpr-7.c b/gcc/testsuite/gcc.target/riscv/abi/vls-gpr-7.c new file mode 100644 index 000000000000..df20e773c211 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/abi/vls-gpr-7.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O2" } */ + +/* Test that 4-byte VLS types (< XLEN) are passed in a single GPR. */ + +typedef short __attribute__((vector_size(4))) v2hi; + +v2hi test_vls_small (int a0, v2hi a1) +{ + return a1; +} + +/* The 4-byte VLS vector should be passed in a1. */ +/* { dg-final { scan-assembler-times "mv\ta0,a1" 1 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/abi/vls-gpr-8.c b/gcc/testsuite/gcc.target/riscv/abi/vls-gpr-8.c new file mode 100644 index 000000000000..ef0da0d20593 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/abi/vls-gpr-8.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gcv -mabi=ilp32d -O2" } */ + +/* Test that 8-byte VLS types (= 2*XLEN for rv32) are passed in 2 GPRs. */ + +typedef int __attribute__((vector_size(8))) v2si; + +v2si test_vls_two_gprs_rv32 (int a0, v2si a1) +{ + return a1; +} + +/* The 8-byte VLS vector is passed in a1,a2 and returned in a0,a1 on rv32. */ +/* { dg-final { scan-assembler "mv\ta0,a1" } } */ +/* { dg-final { scan-assembler "mv\ta1,a2" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/abi/vls-gpr-9.c b/gcc/testsuite/gcc.target/riscv/abi/vls-gpr-9.c new file mode 100644 index 000000000000..094071443f41 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/abi/vls-gpr-9.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O2" } */ + +/* Test that VLS types > 2*XLEN (> 128 bits for rv64) are passed + by reference, not in GPRs. */ + +typedef long __attribute__((vector_size(32))) v4di; + +v4di test_vls_by_reference (int a0, v4di a1) +{ + return a1; +} + +/* The 32-byte VLS vector should be passed by reference. + Return value pointer in a0, argument pointer in a2 (a1 holds a0). */ +/* { dg-final { scan-assembler "vle64.v\tv\[0-9\]+,0\\(a2\\)" } } */ +/* { dg-final { scan-assembler "vse64.v\tv\[0-9\]+,0\\(a0\\)" } } */
