Bootstrapped and regtested on s390x-redhat-linux. Depends on https://gcc.gnu.org/pipermail/gcc-patches/2021-January/562898.html; ok for master once the dependency is committed?
After switching the s390 backend to store long doubles in vector registers, "f" constraint broke when used with the former: long doubles correspond to TFmode, which in combination with "f" corresponds to hard regs %v0-%v15, however, asm users expect a %f0-%f15 pair. Fix by using TARGET_MD_ASM_ADJUST hook to convert TFmode values to FPRX2mode and back. gcc/ChangeLog: 2020-12-14 Ilya Leoshkevich <i...@linux.ibm.com> * config/s390/s390.c (s390_md_asm_adjust): Implement TARGET_MD_ASM_ADJUST. (TARGET_MD_ASM_ADJUST): Likewise. * config/s390/vector.md (fprx2_to_tf): Rename from *fprx2_to_tf, add memory alternative. (tf_to_fprx2): New pattern. gcc/testsuite/ChangeLog: 2020-12-14 Ilya Leoshkevich <i...@linux.ibm.com> * gcc.target/s390/vector/long-double-asm-abi.c: New test. * gcc.target/s390/vector/long-double-asm-in-out.c: New test. * gcc.target/s390/vector/long-double-asm-inout.c: New test. * gcc.target/s390/vector/long-double-volatile-from-i64.c: New test. --- gcc/config/s390/s390.c | 73 +++++++++++++++++++ gcc/config/s390/vector.md | 36 +++++++-- .../s390/vector/long-double-asm-abi.c | 26 +++++++ .../s390/vector/long-double-asm-in-out.c | 14 ++++ .../s390/vector/long-double-asm-inout.c | 14 ++++ .../vector/long-double-volatile-from-i64.c | 22 ++++++ 6 files changed, 180 insertions(+), 5 deletions(-) create mode 100644 gcc/testsuite/gcc.target/s390/vector/long-double-asm-abi.c create mode 100644 gcc/testsuite/gcc.target/s390/vector/long-double-asm-in-out.c create mode 100644 gcc/testsuite/gcc.target/s390/vector/long-double-asm-inout.c create mode 100644 gcc/testsuite/gcc.target/s390/vector/long-double-volatile-from-i64.c diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c index 9d2cee950d0..a22fd9fe391 100644 --- a/gcc/config/s390/s390.c +++ b/gcc/config/s390/s390.c @@ -16688,6 +16688,76 @@ s390_shift_truncation_mask (machine_mode mode) return mode == DImode || mode == SImode ? 63 : 0; } +/* Implement TARGET_MD_ASM_ADJUST hook in order to fix up "f" + constraints when long doubles are stored in vector registers. */ + +static rtx_insn * +s390_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &inputs, + vec<machine_mode> &input_modes, + vec<const char *> &constraints, vec<rtx> & /*clobbers*/, + HARD_REG_SET & /*clobbered_regs*/) +{ + if (!TARGET_VXE) + /* Long doubles are stored in FPR pairs - nothing to do. */ + return NULL; + + rtx_insn *after_md_seq = NULL, *after_md_end = NULL; + + unsigned ninputs = inputs.length (); + unsigned noutputs = outputs.length (); + for (unsigned i = 0; i < noutputs; i++) + { + if (GET_MODE (outputs[i]) != TFmode) + /* Not a long double - nothing to do. */ + continue; + const char *constraint = constraints[i]; + bool allows_mem, allows_reg, is_inout; + bool ok = parse_output_constraint (&constraint, i, ninputs, noutputs, + &allows_mem, &allows_reg, &is_inout); + gcc_assert (ok); + if (strcmp (constraint, "=f") != 0) + /* Long double with a constraint other than "=f" - nothing to do. */ + continue; + gcc_assert (allows_reg); + gcc_assert (!allows_mem); + gcc_assert (!is_inout); + /* Copy output value from a FPR pair into a vector register. */ + rtx fprx2 = gen_reg_rtx (FPRX2mode); + push_to_sequence2 (after_md_seq, after_md_end); + emit_insn (gen_fprx2_to_tf (outputs[i], fprx2)); + after_md_seq = get_insns (); + after_md_end = get_last_insn (); + end_sequence (); + outputs[i] = fprx2; + } + + for (unsigned i = 0; i < ninputs; i++) + { + if (GET_MODE (inputs[i]) != TFmode) + /* Not a long double - nothing to do. */ + continue; + const char *constraint = constraints[noutputs + i]; + bool allows_mem, allows_reg; + bool ok = parse_input_constraint (&constraint, i, ninputs, noutputs, 0, + constraints.address (), &allows_mem, + &allows_reg); + gcc_assert (ok); + if (strcmp (constraint, "f") != 0 && strcmp (constraint, "=f") != 0) + /* Long double with a constraint other than "f" (or "=f" for inout + operands) - nothing to do. */ + continue; + gcc_assert (allows_reg); + gcc_assert (!allows_mem); + /* Copy input value from a vector register into a FPR pair. */ + rtx fprx2 = gen_reg_rtx (FPRX2mode); + emit_insn (gen_tf_to_fprx2 (fprx2, inputs[i])); + inputs[i] = fprx2; + input_modes[i] = FPRX2mode; + } + + return after_md_seq; +} + /* Initialize GCC target structure. */ #undef TARGET_ASM_ALIGNED_HI_OP @@ -16995,6 +17065,9 @@ s390_shift_truncation_mask (machine_mode mode) #undef TARGET_MAX_ANCHOR_OFFSET #define TARGET_MAX_ANCHOR_OFFSET 0xfff +#undef TARGET_MD_ASM_ADJUST +#define TARGET_MD_ASM_ADJUST s390_md_asm_adjust + struct gcc_target targetm = TARGET_INITIALIZER; #include "gt-s390.h" diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md index 0e3c31f5d4f..1332a65a1d1 100644 --- a/gcc/config/s390/vector.md +++ b/gcc/config/s390/vector.md @@ -616,12 +616,23 @@ (define_insn "*vec_tf_to_v1tf_vr" vlvgp\t%v0,%1,%N1" [(set_attr "op_type" "VRR,VRX,VRX,VRI,VRR")]) -(define_insn "*fprx2_to_tf" - [(set (match_operand:TF 0 "nonimmediate_operand" "=v") - (subreg:TF (match_operand:FPRX2 1 "general_operand" "f") 0))] +(define_insn_and_split "fprx2_to_tf" + [(set (match_operand:TF 0 "nonimmediate_operand" "=v,R") + (subreg:TF (match_operand:FPRX2 1 "general_operand" "f,f") 0))] "TARGET_VXE" - "vmrhg\t%v0,%1,%N1" - [(set_attr "op_type" "VRR")]) + "@ + vmrhg\t%v0,%1,%N1 + #" + "!(MEM_P (operands[0]) && MEM_VOLATILE_P (operands[0]))" + [(set (match_dup 2) (match_dup 3)) + (set (match_dup 4) (match_dup 5))] +{ + operands[2] = simplify_gen_subreg (DFmode, operands[0], TFmode, 0); + operands[3] = simplify_gen_subreg (DFmode, operands[1], FPRX2mode, 0); + operands[4] = simplify_gen_subreg (DFmode, operands[0], TFmode, 8); + operands[5] = simplify_gen_subreg (DFmode, operands[1], FPRX2mode, 8); +} + [(set_attr "op_type" "VRR,*")]) (define_insn "*vec_ti_to_v1ti" [(set (match_operand:V1TI 0 "nonimmediate_operand" "=v,v,R, v, v,v") @@ -753,6 +764,21 @@ (define_insn "*tf_to_fprx2_1" "vpdi\t%V0,%v1,%V0,5" [(set_attr "op_type" "VRR")]) +(define_insn_and_split "tf_to_fprx2" + [(set (match_operand:FPRX2 0 "nonimmediate_operand" "=f,f") + (subreg:FPRX2 (match_operand:TF 1 "general_operand" "v,R") 0))] + "TARGET_VXE" + "#" + "!(MEM_P (operands[1]) && MEM_VOLATILE_P (operands[1]))" + [(set (match_dup 2) (match_dup 3)) + (set (match_dup 4) (match_dup 5))] +{ + operands[2] = simplify_gen_subreg (DFmode, operands[0], FPRX2mode, 0); + operands[3] = simplify_gen_subreg (DFmode, operands[1], TFmode, 0); + operands[4] = simplify_gen_subreg (DFmode, operands[0], FPRX2mode, 8); + operands[5] = simplify_gen_subreg (DFmode, operands[1], TFmode, 8); +}) + ; vec_perm_const for V2DI using vpdi? ;; diff --git a/gcc/testsuite/gcc.target/s390/vector/long-double-asm-abi.c b/gcc/testsuite/gcc.target/s390/vector/long-double-asm-abi.c new file mode 100644 index 00000000000..9cd50b62b48 --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/vector/long-double-asm-abi.c @@ -0,0 +1,26 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=z14 -mzarch --save-temps" } */ +/* { dg-do run { target { s390_z14_hw } } } */ +#include <assert.h> +#include <stdint.h> + +__attribute__ ((noipa)) static long double +xsqrt (long double x) +{ + long double res; + asm("sqxbr\t%0,%1" : "=f"(res) : "f"(x)); + return res; +} + +/* Check that the generated code is very small and straightforward. In + particular, there must be no unnecessary copying and no stack frame. */ +/* { dg-final { scan-assembler {\n\tld\t.*\n\tld\t.*\n(#.*\n)*\tsqxbr\t.*\n\tstd\t.*\n\tstd\t.*\n\tbr\t%r14\n} } } */ + +int +main (void) +{ + long double res, x = 0x1.0000000000001p+0L, + exp = 1.00000000000000011102230246251564788e+0L; + res = xsqrt (x); + assert (res == exp); +} diff --git a/gcc/testsuite/gcc.target/s390/vector/long-double-asm-in-out.c b/gcc/testsuite/gcc.target/s390/vector/long-double-asm-in-out.c new file mode 100644 index 00000000000..5380311b435 --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/vector/long-double-asm-in-out.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=z14 -mzarch" } */ +/* { dg-do run { target { s390_z14_hw } } } */ +#include <assert.h> +#include <stdint.h> + +int +main (void) +{ + long double res, x = 0x1.0000000000001p+0L, + exp = 1.00000000000000011102230246251564788e+0L; + asm("sqxbr %0,%1" : "=f"(res) : "f"(x)); + assert (res == exp); +} diff --git a/gcc/testsuite/gcc.target/s390/vector/long-double-asm-inout.c b/gcc/testsuite/gcc.target/s390/vector/long-double-asm-inout.c new file mode 100644 index 00000000000..6dcd2dc8ac1 --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/vector/long-double-asm-inout.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=z14 -mzarch" } */ +/* { dg-do run { target { s390_z14_hw } } } */ +#include <assert.h> +#include <stdint.h> + +int +main (void) +{ + long double res = 0x1.0000000000001p+0L, + exp = 1.00000000000000011102230246251564788e+0L; + asm("sqxbr %0,%0" : "+f"(res)); + assert (res == exp); +} diff --git a/gcc/testsuite/gcc.target/s390/vector/long-double-volatile-from-i64.c b/gcc/testsuite/gcc.target/s390/vector/long-double-volatile-from-i64.c new file mode 100644 index 00000000000..f4489841c28 --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/vector/long-double-volatile-from-i64.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=z14 -mzarch --save-temps" } */ +/* { dg-do run { target { s390_z14_hw } } } */ +#include <assert.h> +#include <stdint.h> + +__attribute__ ((noipa)) static long double +long_double_volatile_from_i64 (int64_t x) +{ + static volatile long double y; + y = x; + return y; +} + +/* { dg-final { scan-assembler-times {\n\tcxgbr\t} 1 } } */ + +int +main (void) +{ + assert (long_double_volatile_from_i64 (42) == 42.L); + assert (long_double_volatile_from_i64 (-42) == -42.L); +} -- 2.26.2