Ping
On Fri, Aug 16, 2024 at 09:41:55AM +0200, Stefan Schulze Frielinghaus wrote: > Currently subregs originating from *tf_to_fprx2_0 and *tf_to_fprx2_1 > survive register allocation. This in turn leads to wrong register > renaming. Keeping the current approach would mean we need two insns for > *tf_to_fprx2_0 and *tf_to_fprx2_1, respectively. Something along the > lines > > (define_insn "*tf_to_fprx2_0" > [(set (subreg:DF (match_operand:FPRX2 0 "nonimmediate_operand" "=f") 0) > (unspec:DF [(match_operand:TF 1 "general_operand" "v")] > UNSPEC_TF_TO_FPRX2_0))] > "TARGET_VXE" > "#") > > (define_insn "*tf_to_fprx2_0" > [(set (match_operand:DF 0 "nonimmediate_operand" "=f") > (unspec:DF [(match_operand:TF 1 "general_operand" "v")] > UNSPEC_TF_TO_FPRX2_0))] > "TARGET_VXE" > "vpdi\t%v0,%v1,%v0,1 > [(set_attr "op_type" "VRR")]) > > and similar for *tf_to_fprx2_1. Note, pre register allocation operand 0 > has mode FPRX2 and afterwards DF once subregs have been eliminated. > > Since we always copy a whole vector register into a floating-point > register pair, another way to fix this is to merge *tf_to_fprx2_0 and > *tf_to_fprx2_1 into a single insn which means we don't have to use > subregs at all. The downside of this is that the assembler template > contains two instructions, now. The upside is that we don't have to > come up with some artificial insn before RA which might be more > readable/maintainable. That is implemented by this patch. > > In commit r11-4872-ge627cda5686592, the output operand specifier %V was > introduced which is used in tf_to_fprx2 only, now. I didn't come up > with its counterpart like %F for floating-point registers. Instead I > printed the register pair in the output function directly. This spares > us a new and "rare" format specifier for a single insn. I don't have a > strong opinion which option to choose, however, we should either add %F > in order to mimic the same behaviour as %V or getting rid of %V and > inline the logic in the output function. I lean towards the latter. > Any preferences? > --- > gcc/config/s390/s390.md | 2 + > gcc/config/s390/vector.md | 66 +++++++++++----------- > gcc/testsuite/gcc.target/s390/pr115860-1.c | 26 +++++++++ > 3 files changed, 60 insertions(+), 34 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/s390/pr115860-1.c > > diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md > index 3d5759d6252..31240899934 100644 > --- a/gcc/config/s390/s390.md > +++ b/gcc/config/s390/s390.md > @@ -241,6 +241,8 @@ > UNSPEC_VEC_VFMIN > UNSPEC_VEC_VFMAX > > + UNSPEC_TF_TO_FPRX2 > + > UNSPEC_NNPA_VCLFNHS_V8HI > UNSPEC_NNPA_VCLFNLS_V8HI > UNSPEC_NNPA_VCRNFS_V8HI > diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md > index a75b7cb5825..561182e0c2c 100644 > --- a/gcc/config/s390/vector.md > +++ b/gcc/config/s390/vector.md > @@ -907,36 +907,36 @@ > "vmrlg\t%0,%1,%2"; > [(set_attr "op_type" "VRR")]) > > - > -(define_insn "*tf_to_fprx2_0" > - [(set (subreg:DF (match_operand:FPRX2 0 "nonimmediate_operand" "+f") 0) > - (subreg:DF (match_operand:TF 1 "general_operand" "v") 0))] > - "TARGET_VXE" > - ; M4 == 1 corresponds to %v0[0] = %v1[0]; %v0[1] = %v0[1]; > - "vpdi\t%v0,%v1,%v0,1" > - [(set_attr "op_type" "VRR")]) > - > -(define_insn "*tf_to_fprx2_1" > - [(set (subreg:DF (match_operand:FPRX2 0 "nonimmediate_operand" "+f") 8) > - (subreg:DF (match_operand:TF 1 "general_operand" "v") 8))] > +(define_insn "tf_to_fprx2" > + [(set (match_operand:FPRX2 0 "register_operand" "=f,f ,f") > + (unspec:FPRX2 [(match_operand:TF 1 "general_operand" "v,AR,AT")] > + UNSPEC_TF_TO_FPRX2))] > "TARGET_VXE" > - ; M4 == 5 corresponds to %V0[0] = %v1[1]; %V0[1] = %V0[1]; > - "vpdi\t%V0,%v1,%V0,5" > - [(set_attr "op_type" "VRR")]) > - > -(define_insn_and_split "tf_to_fprx2" > - [(set (match_operand:FPRX2 0 "nonimmediate_operand" "=f,f") > - (subreg:FPRX2 (match_operand:TF 1 "general_operand" "v,AR") 0))] > - "TARGET_VXE" > - "#" > - "!(MEM_P (operands[1]) && MEM_VOLATILE_P (operands[1]))" > - [(set (match_dup 2) (match_dup 3)) > - (set (match_dup 4) (match_dup 5))] > { > - operands[2] = simplify_gen_subreg (DFmode, operands[0], FPRX2mode, 0); > - operands[3] = simplify_gen_subreg (DFmode, operands[1], TFmode, 0); > - operands[4] = simplify_gen_subreg (DFmode, operands[0], FPRX2mode, 8); > - operands[5] = simplify_gen_subreg (DFmode, operands[1], TFmode, 8); > + char buf[64]; > + switch (which_alternative) > + { > + case 0: > + if (REGNO (operands[0]) == REGNO (operands[1])) > + return "vpdi\t%V0,%v1,%V0,5"; > + else > + return "ldr\t%f0,%f1;vpdi\t%V0,%v1,%V0,5"; > + case 1: > + { > + const char *reg_pair = reg_names[REGNO (operands[0]) + 1]; > + snprintf (buf, sizeof (buf), "ld\t%%f0,%%1;ld\t%%%s,8+%%1", reg_pair); > + output_asm_insn (buf, operands); > + return ""; > + } > + case 2: > + { > + const char *reg_pair = reg_names[REGNO (operands[0]) + 1]; > + snprintf (buf, sizeof (buf), "ldy\t%%f0,%%1;ldy\t%%%s,8+%%1", reg_pair); > + output_asm_insn (buf, operands); > + return ""; > + } > + default: gcc_unreachable (); > + } > }) > > ;; VECTOR REVERSE ELEMENTS V16QI > @@ -2830,9 +2830,8 @@ > ; There is no instruction for rounding an extended BFP operand in a VR into > ; a signed integer, therefore copy it into a FPR pair first. > (define_expand "fix_trunctf<mode>2_vr" > - [(set (subreg:DF (match_dup 2) 0) > - (subreg:DF (match_operand:TF 1 "register_operand" "") 0)) > - (set (subreg:DF (match_dup 2) 8) (subreg:DF (match_dup 1) 8)) > + [(set (match_dup 2) > + (unspec:FPRX2 [(match_operand:TF 1 "register_operand")] > UNSPEC_TF_TO_FPRX2)) > (parallel [(set (match_operand:GPR 0 "register_operand" "") > (fix:GPR (match_dup 2))) > (unspec:GPR [(const_int BFP_RND_TOWARD_0)] UNSPEC_ROUND) > @@ -2863,9 +2862,8 @@ > ; There is no instruction for rounding an extended BFP operand in a VR into > ; an unsigned integer, therefore copy it into a FPR pair first. > (define_expand "fixuns_trunctf<mode>2_vr" > - [(set (subreg:DF (match_dup 2) 0) > - (subreg:DF (match_operand:TF 1 "register_operand" "") 0)) > - (set (subreg:DF (match_dup 2) 8) (subreg:DF (match_dup 1) 8)) > + [(set (match_dup 2) > + (unspec:FPRX2 [(match_operand:TF 1 "register_operand")] > UNSPEC_TF_TO_FPRX2)) > (parallel [(set (match_operand:GPR 0 "register_operand" "") > (unsigned_fix:GPR (match_dup 2))) > (unspec:GPR [(const_int BFP_RND_TOWARD_0)] UNSPEC_ROUND) > diff --git a/gcc/testsuite/gcc.target/s390/pr115860-1.c > b/gcc/testsuite/gcc.target/s390/pr115860-1.c > new file mode 100644 > index 00000000000..67d34a80f8e > --- /dev/null > +++ b/gcc/testsuite/gcc.target/s390/pr115860-1.c > @@ -0,0 +1,26 @@ > +/* { dg-do run } */ > +/* { dg-require-effective-target s390_vxe } */ > +/* { dg-options "-O2 -march=z14 -mzarch" } */ > + > +__attribute__ ((noipa)) > +long trunctf (long double x) > +{ > + /* Ensure via ++x that x is in a register. */ > + ++x; > + return x; > +} > + > +__attribute__ ((noipa)) > +long trunctf_from_mem (long double x) > +{ > + return x; > +} > + > +int main (void) > +{ > + if (trunctf (0x7ffffffffffffffeL) != 0x7fffffffffffffffL) > + __builtin_abort (); > + if (trunctf_from_mem (0x7fffffffffffffffL) != 0x7fffffffffffffffL) > + __builtin_abort (); > + return 0; > +} > -- > 2.45.2 >