v1: https://gcc.gnu.org/pipermail/gcc-patches/2021-January/563799.html
v1 -> v2: Handle constraint modifiers, use AR constraint instead of R, add testcases for & and %. After switching the s390 backend to store long doubles in vector registers, "f" constraint broke when used with the former: long doubles correspond to TFmode, which in combination with "f" corresponds to hard regs %v0-%v15, however, asm users expect a %f0-%f15 pair. Fix by using TARGET_MD_ASM_ADJUST hook to convert TFmode values to FPRX2mode and back. gcc/ChangeLog: 2020-12-14 Ilya Leoshkevich <i...@linux.ibm.com> * config/s390/s390.c (f_constraint_p): New function. (s390_md_asm_adjust): Implement TARGET_MD_ASM_ADJUST. (TARGET_MD_ASM_ADJUST): Likewise. * config/s390/vector.md (fprx2_to_tf): Rename from *fprx2_to_tf, add memory alternative. (tf_to_fprx2): New pattern. gcc/testsuite/ChangeLog: 2020-12-14 Ilya Leoshkevich <i...@linux.ibm.com> * gcc.target/s390/vector/long-double-asm-abi.c: New test. * gcc.target/s390/vector/long-double-asm-commutative.c: New test. * gcc.target/s390/vector/long-double-asm-earlyclobber.c: New test. * gcc.target/s390/vector/long-double-asm-in-out.c: New test. * gcc.target/s390/vector/long-double-asm-inout.c: New test. * gcc.target/s390/vector/long-double-volatile-from-i64.c: New test. --- gcc/config/s390/s390.c | 88 +++++++++++++++++++ gcc/config/s390/vector.md | 36 ++++++-- .../s390/vector/long-double-asm-abi.c | 26 ++++++ .../s390/vector/long-double-asm-commutative.c | 16 ++++ .../vector/long-double-asm-earlyclobber.c | 17 ++++ .../s390/vector/long-double-asm-in-out.c | 14 +++ .../s390/vector/long-double-asm-inout.c | 14 +++ .../s390/vector/long-double-asm-matching.c | 13 +++ .../vector/long-double-volatile-from-i64.c | 22 +++++ 9 files changed, 241 insertions(+), 5 deletions(-) create mode 100644 gcc/testsuite/gcc.target/s390/vector/long-double-asm-abi.c create mode 100644 gcc/testsuite/gcc.target/s390/vector/long-double-asm-commutative.c create mode 100644 gcc/testsuite/gcc.target/s390/vector/long-double-asm-earlyclobber.c create mode 100644 gcc/testsuite/gcc.target/s390/vector/long-double-asm-in-out.c create mode 100644 gcc/testsuite/gcc.target/s390/vector/long-double-asm-inout.c create mode 100644 gcc/testsuite/gcc.target/s390/vector/long-double-asm-matching.c create mode 100644 gcc/testsuite/gcc.target/s390/vector/long-double-volatile-from-i64.c diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c index 9d2cee950d0..d4b098325e8 100644 --- a/gcc/config/s390/s390.c +++ b/gcc/config/s390/s390.c @@ -16688,6 +16688,91 @@ s390_shift_truncation_mask (machine_mode mode) return mode == DImode || mode == SImode ? 63 : 0; } +/* Return TRUE iff CONSTRAINT is an "f" constraint, possibly with additional + modifiers. */ + +static bool +f_constraint_p (const char *constraint) +{ + for (size_t i = 0, c_len = strlen (constraint); i < c_len; + i += CONSTRAINT_LEN (constraint[i], constraint + i)) + { + if (constraint[i] == 'f') + return true; + } + return false; +} + +/* Implement TARGET_MD_ASM_ADJUST hook in order to fix up "f" + constraints when long doubles are stored in vector registers. */ + +static rtx_insn * +s390_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &inputs, + vec<machine_mode> &input_modes, + vec<const char *> &constraints, vec<rtx> & /*clobbers*/, + HARD_REG_SET & /*clobbered_regs*/) +{ + if (!TARGET_VXE) + /* Long doubles are stored in FPR pairs - nothing to do. */ + return NULL; + + rtx_insn *after_md_seq = NULL, *after_md_end = NULL; + + unsigned ninputs = inputs.length (); + unsigned noutputs = outputs.length (); + for (unsigned i = 0; i < noutputs; i++) + { + if (GET_MODE (outputs[i]) != TFmode) + /* Not a long double - nothing to do. */ + continue; + const char *constraint = constraints[i]; + bool allows_mem, allows_reg, is_inout; + bool ok = parse_output_constraint (&constraint, i, ninputs, noutputs, + &allows_mem, &allows_reg, &is_inout); + gcc_assert (ok); + if (!f_constraint_p (constraint + 1)) + /* Long double with a constraint other than "=f" - nothing to do. */ + continue; + gcc_assert (allows_reg); + gcc_assert (!allows_mem); + gcc_assert (!is_inout); + /* Copy output value from a FPR pair into a vector register. */ + rtx fprx2 = gen_reg_rtx (FPRX2mode); + push_to_sequence2 (after_md_seq, after_md_end); + emit_insn (gen_fprx2_to_tf (outputs[i], fprx2)); + after_md_seq = get_insns (); + after_md_end = get_last_insn (); + end_sequence (); + outputs[i] = fprx2; + } + + for (unsigned i = 0; i < ninputs; i++) + { + if (GET_MODE (inputs[i]) != TFmode) + /* Not a long double - nothing to do. */ + continue; + const char *constraint = constraints[noutputs + i]; + bool allows_mem, allows_reg; + bool ok = parse_input_constraint (&constraint, i, ninputs, noutputs, 0, + constraints.address (), &allows_mem, + &allows_reg); + gcc_assert (ok); + if (!f_constraint_p (*constraint == '=' ? constraint + 1 : constraint)) + /* Long double with a constraint other than "f" (or "=f" for inout + operands) - nothing to do. */ + continue; + gcc_assert (allows_reg); + gcc_assert (!allows_mem); + /* Copy input value from a vector register into a FPR pair. */ + rtx fprx2 = gen_reg_rtx (FPRX2mode); + emit_insn (gen_tf_to_fprx2 (fprx2, inputs[i])); + inputs[i] = fprx2; + input_modes[i] = FPRX2mode; + } + + return after_md_seq; +} + /* Initialize GCC target structure. */ #undef TARGET_ASM_ALIGNED_HI_OP @@ -16995,6 +17080,9 @@ s390_shift_truncation_mask (machine_mode mode) #undef TARGET_MAX_ANCHOR_OFFSET #define TARGET_MAX_ANCHOR_OFFSET 0xfff +#undef TARGET_MD_ASM_ADJUST +#define TARGET_MD_ASM_ADJUST s390_md_asm_adjust + struct gcc_target targetm = TARGET_INITIALIZER; #include "gt-s390.h" diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md index 0e3c31f5d4f..e48c965db00 100644 --- a/gcc/config/s390/vector.md +++ b/gcc/config/s390/vector.md @@ -616,12 +616,23 @@ (define_insn "*vec_tf_to_v1tf_vr" vlvgp\t%v0,%1,%N1" [(set_attr "op_type" "VRR,VRX,VRX,VRI,VRR")]) -(define_insn "*fprx2_to_tf" - [(set (match_operand:TF 0 "nonimmediate_operand" "=v") - (subreg:TF (match_operand:FPRX2 1 "general_operand" "f") 0))] +(define_insn_and_split "fprx2_to_tf" + [(set (match_operand:TF 0 "nonimmediate_operand" "=v,AR") + (subreg:TF (match_operand:FPRX2 1 "general_operand" "f,f") 0))] "TARGET_VXE" - "vmrhg\t%v0,%1,%N1" - [(set_attr "op_type" "VRR")]) + "@ + vmrhg\t%v0,%1,%N1 + #" + "!(MEM_P (operands[0]) && MEM_VOLATILE_P (operands[0]))" + [(set (match_dup 2) (match_dup 3)) + (set (match_dup 4) (match_dup 5))] +{ + operands[2] = simplify_gen_subreg (DFmode, operands[0], TFmode, 0); + operands[3] = simplify_gen_subreg (DFmode, operands[1], FPRX2mode, 0); + operands[4] = simplify_gen_subreg (DFmode, operands[0], TFmode, 8); + operands[5] = simplify_gen_subreg (DFmode, operands[1], FPRX2mode, 8); +} + [(set_attr "op_type" "VRR,*")]) (define_insn "*vec_ti_to_v1ti" [(set (match_operand:V1TI 0 "nonimmediate_operand" "=v,v,R, v, v,v") @@ -753,6 +764,21 @@ (define_insn "*tf_to_fprx2_1" "vpdi\t%V0,%v1,%V0,5" [(set_attr "op_type" "VRR")]) +(define_insn_and_split "tf_to_fprx2" + [(set (match_operand:FPRX2 0 "nonimmediate_operand" "=f,f") + (subreg:FPRX2 (match_operand:TF 1 "general_operand" "v,AR") 0))] + "TARGET_VXE" + "#" + "!(MEM_P (operands[1]) && MEM_VOLATILE_P (operands[1]))" + [(set (match_dup 2) (match_dup 3)) + (set (match_dup 4) (match_dup 5))] +{ + operands[2] = simplify_gen_subreg (DFmode, operands[0], FPRX2mode, 0); + operands[3] = simplify_gen_subreg (DFmode, operands[1], TFmode, 0); + operands[4] = simplify_gen_subreg (DFmode, operands[0], FPRX2mode, 8); + operands[5] = simplify_gen_subreg (DFmode, operands[1], TFmode, 8); +}) + ; vec_perm_const for V2DI using vpdi? ;; diff --git a/gcc/testsuite/gcc.target/s390/vector/long-double-asm-abi.c b/gcc/testsuite/gcc.target/s390/vector/long-double-asm-abi.c new file mode 100644 index 00000000000..9cd50b62b48 --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/vector/long-double-asm-abi.c @@ -0,0 +1,26 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=z14 -mzarch --save-temps" } */ +/* { dg-do run { target { s390_z14_hw } } } */ +#include <assert.h> +#include <stdint.h> + +__attribute__ ((noipa)) static long double +xsqrt (long double x) +{ + long double res; + asm("sqxbr\t%0,%1" : "=f"(res) : "f"(x)); + return res; +} + +/* Check that the generated code is very small and straightforward. In + particular, there must be no unnecessary copying and no stack frame. */ +/* { dg-final { scan-assembler {\n\tld\t.*\n\tld\t.*\n(#.*\n)*\tsqxbr\t.*\n\tstd\t.*\n\tstd\t.*\n\tbr\t%r14\n} } } */ + +int +main (void) +{ + long double res, x = 0x1.0000000000001p+0L, + exp = 1.00000000000000011102230246251564788e+0L; + res = xsqrt (x); + assert (res == exp); +} diff --git a/gcc/testsuite/gcc.target/s390/vector/long-double-asm-commutative.c b/gcc/testsuite/gcc.target/s390/vector/long-double-asm-commutative.c new file mode 100644 index 00000000000..59d807c54b3 --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/vector/long-double-asm-commutative.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=z14 -mzarch" } */ +/* { dg-do run { target { s390_z14_hw } } } */ +#include <assert.h> +#include <stdint.h> + +int +main (void) +{ + long double res, x = 40., y = 2.; + asm("lxr\t%0,%1\n" + "\taxbr\t%0,%2" + : "=&f"(res) + : "%f"(x), "f"(y)); + assert (res == 42.); +} diff --git a/gcc/testsuite/gcc.target/s390/vector/long-double-asm-earlyclobber.c b/gcc/testsuite/gcc.target/s390/vector/long-double-asm-earlyclobber.c new file mode 100644 index 00000000000..5dd027596de --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/vector/long-double-asm-earlyclobber.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=z14 -mzarch" } */ +/* { dg-do run { target { s390_z14_hw } } } */ +#include <assert.h> +#include <stdint.h> + +int +main (void) +{ + long double res, x = 0x1.0000000000001p+0L, + exp = 1.00000000000000011102230246251564788e+0L; + asm("lzxr\t%0\n" + "\tsqxbr\t%0,%1" + : "=&f"(res) + : "f"(x)); + assert (res == exp); +} diff --git a/gcc/testsuite/gcc.target/s390/vector/long-double-asm-in-out.c b/gcc/testsuite/gcc.target/s390/vector/long-double-asm-in-out.c new file mode 100644 index 00000000000..27d447f6898 --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/vector/long-double-asm-in-out.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=z14 -mzarch" } */ +/* { dg-do run { target { s390_z14_hw } } } */ +#include <assert.h> +#include <stdint.h> + +int +main (void) +{ + long double res, x = 0x1.0000000000001p+0L, + exp = 1.00000000000000011102230246251564788e+0L; + asm("sqxbr\t%0,%1" : "=f"(res) : "f"(x)); + assert (res == exp); +} diff --git a/gcc/testsuite/gcc.target/s390/vector/long-double-asm-inout.c b/gcc/testsuite/gcc.target/s390/vector/long-double-asm-inout.c new file mode 100644 index 00000000000..e0b6ac518aa --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/vector/long-double-asm-inout.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=z14 -mzarch" } */ +/* { dg-do run { target { s390_z14_hw } } } */ +#include <assert.h> +#include <stdint.h> + +int +main (void) +{ + long double res = 0x1.0000000000001p+0L, + exp = 1.00000000000000011102230246251564788e+0L; + asm("sqxbr\t%0,%0" : "+f"(res)); + assert (res == exp); +} diff --git a/gcc/testsuite/gcc.target/s390/vector/long-double-asm-matching.c b/gcc/testsuite/gcc.target/s390/vector/long-double-asm-matching.c new file mode 100644 index 00000000000..c8b8c3d1c6e --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/vector/long-double-asm-matching.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=z14 -mzarch" } */ +/* { dg-do run { target { s390_z14_hw } } } */ +#include <assert.h> +#include <stdint.h> + +int +main (void) +{ + long double res, x = 40., y = 2.; + asm("axbr\t%0,%2" : "=f"(res) : "0"(x), "f"(y)); + assert (res == 42.); +} diff --git a/gcc/testsuite/gcc.target/s390/vector/long-double-volatile-from-i64.c b/gcc/testsuite/gcc.target/s390/vector/long-double-volatile-from-i64.c new file mode 100644 index 00000000000..f4489841c28 --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/vector/long-double-volatile-from-i64.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=z14 -mzarch --save-temps" } */ +/* { dg-do run { target { s390_z14_hw } } } */ +#include <assert.h> +#include <stdint.h> + +__attribute__ ((noipa)) static long double +long_double_volatile_from_i64 (int64_t x) +{ + static volatile long double y; + y = x; + return y; +} + +/* { dg-final { scan-assembler-times {\n\tcxgbr\t} 1 } } */ + +int +main (void) +{ + assert (long_double_volatile_from_i64 (42) == 42.L); + assert (long_double_volatile_from_i64 (-42) == -42.L); +} -- 2.26.2