Currently subregs originating from *tf_to_fprx2_0 and *tf_to_fprx2_1 survive register allocation. This in turn leads to wrong register renaming. Keeping the current approach would mean we need two insns for *tf_to_fprx2_0 and *tf_to_fprx2_1, respectively. Something along the lines
(define_insn "*tf_to_fprx2_0" [(set (subreg:DF (match_operand:FPRX2 0 "nonimmediate_operand" "=f") 0) (unspec:DF [(match_operand:TF 1 "general_operand" "v")] UNSPEC_TF_TO_FPRX2_0))] "TARGET_VXE" "#") (define_insn "*tf_to_fprx2_0" [(set (match_operand:DF 0 "nonimmediate_operand" "=f") (unspec:DF [(match_operand:TF 1 "general_operand" "v")] UNSPEC_TF_TO_FPRX2_0))] "TARGET_VXE" "vpdi\t%v0,%v1,%v0,1 [(set_attr "op_type" "VRR")]) and similar for *tf_to_fprx2_1. Note, pre register allocation operand 0 has mode FPRX2 and afterwards DF once subregs have been eliminated. Since we always copy a whole vector register into a floating-point register pair, another way to fix this is to merge *tf_to_fprx2_0 and *tf_to_fprx2_1 into a single insn which means we don't have to use subregs at all. The downside of this is that the assembler template contains two instructions, now. The upside is that we don't have to come up with some artificial insn before RA which might be more readable/maintainable. That is implemented by this patch. In commit r11-4872-ge627cda5686592, the output operand specifier %V was introduced which is used in tf_to_fprx2 only, now. I didn't come up with its counterpart like %F for floating-point registers. Instead I printed the register pair in the output function directly. This spares us a new and "rare" format specifier for a single insn. I don't have a strong opinion which option to choose, however, we should either add %F in order to mimic the same behaviour as %V or getting rid of %V and inline the logic in the output function. I lean towards the latter. Any preferences? --- gcc/config/s390/s390.md | 2 + gcc/config/s390/vector.md | 66 +++++++++++----------- gcc/testsuite/gcc.target/s390/pr115860-1.c | 26 +++++++++ 3 files changed, 60 insertions(+), 34 deletions(-) create mode 100644 gcc/testsuite/gcc.target/s390/pr115860-1.c diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md index 3d5759d6252..31240899934 100644 --- a/gcc/config/s390/s390.md +++ b/gcc/config/s390/s390.md @@ -241,6 +241,8 @@ UNSPEC_VEC_VFMIN UNSPEC_VEC_VFMAX + UNSPEC_TF_TO_FPRX2 + UNSPEC_NNPA_VCLFNHS_V8HI UNSPEC_NNPA_VCLFNLS_V8HI UNSPEC_NNPA_VCRNFS_V8HI diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md index a75b7cb5825..561182e0c2c 100644 --- a/gcc/config/s390/vector.md +++ b/gcc/config/s390/vector.md @@ -907,36 +907,36 @@ "vmrlg\t%0,%1,%2"; [(set_attr "op_type" "VRR")]) - -(define_insn "*tf_to_fprx2_0" - [(set (subreg:DF (match_operand:FPRX2 0 "nonimmediate_operand" "+f") 0) - (subreg:DF (match_operand:TF 1 "general_operand" "v") 0))] - "TARGET_VXE" - ; M4 == 1 corresponds to %v0[0] = %v1[0]; %v0[1] = %v0[1]; - "vpdi\t%v0,%v1,%v0,1" - [(set_attr "op_type" "VRR")]) - -(define_insn "*tf_to_fprx2_1" - [(set (subreg:DF (match_operand:FPRX2 0 "nonimmediate_operand" "+f") 8) - (subreg:DF (match_operand:TF 1 "general_operand" "v") 8))] +(define_insn "tf_to_fprx2" + [(set (match_operand:FPRX2 0 "register_operand" "=f,f ,f") + (unspec:FPRX2 [(match_operand:TF 1 "general_operand" "v,AR,AT")] + UNSPEC_TF_TO_FPRX2))] "TARGET_VXE" - ; M4 == 5 corresponds to %V0[0] = %v1[1]; %V0[1] = %V0[1]; - "vpdi\t%V0,%v1,%V0,5" - [(set_attr "op_type" "VRR")]) - -(define_insn_and_split "tf_to_fprx2" - [(set (match_operand:FPRX2 0 "nonimmediate_operand" "=f,f") - (subreg:FPRX2 (match_operand:TF 1 "general_operand" "v,AR") 0))] - "TARGET_VXE" - "#" - "!(MEM_P (operands[1]) && MEM_VOLATILE_P (operands[1]))" - [(set (match_dup 2) (match_dup 3)) - (set (match_dup 4) (match_dup 5))] { - operands[2] = simplify_gen_subreg (DFmode, operands[0], FPRX2mode, 0); - operands[3] = simplify_gen_subreg (DFmode, operands[1], TFmode, 0); - operands[4] = simplify_gen_subreg (DFmode, operands[0], FPRX2mode, 8); - operands[5] = simplify_gen_subreg (DFmode, operands[1], TFmode, 8); + char buf[64]; + switch (which_alternative) + { + case 0: + if (REGNO (operands[0]) == REGNO (operands[1])) + return "vpdi\t%V0,%v1,%V0,5"; + else + return "ldr\t%f0,%f1;vpdi\t%V0,%v1,%V0,5"; + case 1: + { + const char *reg_pair = reg_names[REGNO (operands[0]) + 1]; + snprintf (buf, sizeof (buf), "ld\t%%f0,%%1;ld\t%%%s,8+%%1", reg_pair); + output_asm_insn (buf, operands); + return ""; + } + case 2: + { + const char *reg_pair = reg_names[REGNO (operands[0]) + 1]; + snprintf (buf, sizeof (buf), "ldy\t%%f0,%%1;ldy\t%%%s,8+%%1", reg_pair); + output_asm_insn (buf, operands); + return ""; + } + default: gcc_unreachable (); + } }) ;; VECTOR REVERSE ELEMENTS V16QI @@ -2830,9 +2830,8 @@ ; There is no instruction for rounding an extended BFP operand in a VR into ; a signed integer, therefore copy it into a FPR pair first. (define_expand "fix_trunctf<mode>2_vr" - [(set (subreg:DF (match_dup 2) 0) - (subreg:DF (match_operand:TF 1 "register_operand" "") 0)) - (set (subreg:DF (match_dup 2) 8) (subreg:DF (match_dup 1) 8)) + [(set (match_dup 2) + (unspec:FPRX2 [(match_operand:TF 1 "register_operand")] UNSPEC_TF_TO_FPRX2)) (parallel [(set (match_operand:GPR 0 "register_operand" "") (fix:GPR (match_dup 2))) (unspec:GPR [(const_int BFP_RND_TOWARD_0)] UNSPEC_ROUND) @@ -2863,9 +2862,8 @@ ; There is no instruction for rounding an extended BFP operand in a VR into ; an unsigned integer, therefore copy it into a FPR pair first. (define_expand "fixuns_trunctf<mode>2_vr" - [(set (subreg:DF (match_dup 2) 0) - (subreg:DF (match_operand:TF 1 "register_operand" "") 0)) - (set (subreg:DF (match_dup 2) 8) (subreg:DF (match_dup 1) 8)) + [(set (match_dup 2) + (unspec:FPRX2 [(match_operand:TF 1 "register_operand")] UNSPEC_TF_TO_FPRX2)) (parallel [(set (match_operand:GPR 0 "register_operand" "") (unsigned_fix:GPR (match_dup 2))) (unspec:GPR [(const_int BFP_RND_TOWARD_0)] UNSPEC_ROUND) diff --git a/gcc/testsuite/gcc.target/s390/pr115860-1.c b/gcc/testsuite/gcc.target/s390/pr115860-1.c new file mode 100644 index 00000000000..67d34a80f8e --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/pr115860-1.c @@ -0,0 +1,26 @@ +/* { dg-do run } */ +/* { dg-require-effective-target s390_vxe } */ +/* { dg-options "-O2 -march=z14 -mzarch" } */ + +__attribute__ ((noipa)) +long trunctf (long double x) +{ + /* Ensure via ++x that x is in a register. */ + ++x; + return x; +} + +__attribute__ ((noipa)) +long trunctf_from_mem (long double x) +{ + return x; +} + +int main (void) +{ + if (trunctf (0x7ffffffffffffffeL) != 0x7fffffffffffffffL) + __builtin_abort (); + if (trunctf_from_mem (0x7fffffffffffffffL) != 0x7fffffffffffffffL) + __builtin_abort (); + return 0; +} -- 2.45.2