Ping

On Fri, Aug 16, 2024 at 09:41:55AM +0200, Stefan Schulze Frielinghaus wrote:
> Currently subregs originating from *tf_to_fprx2_0 and *tf_to_fprx2_1
> survive register allocation.  This in turn leads to wrong register
> renaming.  Keeping the current approach would mean we need two insns for
> *tf_to_fprx2_0 and *tf_to_fprx2_1, respectively.  Something along the
> lines
> 
> (define_insn "*tf_to_fprx2_0"
>   [(set (subreg:DF (match_operand:FPRX2 0 "nonimmediate_operand" "=f") 0)
>         (unspec:DF [(match_operand:TF 1 "general_operand" "v")]
>                    UNSPEC_TF_TO_FPRX2_0))]
>   "TARGET_VXE"
>   "#")
> 
> (define_insn "*tf_to_fprx2_0"
>   [(set (match_operand:DF 0 "nonimmediate_operand" "=f")
>         (unspec:DF [(match_operand:TF 1 "general_operand" "v")]
>                    UNSPEC_TF_TO_FPRX2_0))]
>   "TARGET_VXE"
>   "vpdi\t%v0,%v1,%v0,1
>   [(set_attr "op_type" "VRR")])
> 
> and similar for *tf_to_fprx2_1.  Note, pre register allocation operand 0
> has mode FPRX2 and afterwards DF once subregs have been eliminated.
> 
> Since we always copy a whole vector register into a floating-point
> register pair, another way to fix this is to merge *tf_to_fprx2_0 and
> *tf_to_fprx2_1 into a single insn which means we don't have to use
> subregs at all.  The downside of this is that the assembler template
> contains two instructions, now.  The upside is that we don't have to
> come up with some artificial insn before RA which might be more
> readable/maintainable.  That is implemented by this patch.
> 
> In commit r11-4872-ge627cda5686592, the output operand specifier %V was
> introduced which is used in tf_to_fprx2 only, now.  I didn't come up
> with its counterpart like %F for floating-point registers.  Instead I
> printed the register pair in the output function directly.  This spares
> us a new and "rare" format specifier for a single insn.  I don't have a
> strong opinion which option to choose, however, we should either add %F
> in order to mimic the same behaviour as %V or getting rid of %V and
> inline the logic in the output function.  I lean towards the latter.
> Any preferences?
> ---
>  gcc/config/s390/s390.md                    |  2 +
>  gcc/config/s390/vector.md                  | 66 +++++++++++-----------
>  gcc/testsuite/gcc.target/s390/pr115860-1.c | 26 +++++++++
>  3 files changed, 60 insertions(+), 34 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/s390/pr115860-1.c
> 
> diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
> index 3d5759d6252..31240899934 100644
> --- a/gcc/config/s390/s390.md
> +++ b/gcc/config/s390/s390.md
> @@ -241,6 +241,8 @@
>     UNSPEC_VEC_VFMIN
>     UNSPEC_VEC_VFMAX
>  
> +   UNSPEC_TF_TO_FPRX2
> +
>     UNSPEC_NNPA_VCLFNHS_V8HI
>     UNSPEC_NNPA_VCLFNLS_V8HI
>     UNSPEC_NNPA_VCRNFS_V8HI
> diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
> index a75b7cb5825..561182e0c2c 100644
> --- a/gcc/config/s390/vector.md
> +++ b/gcc/config/s390/vector.md
> @@ -907,36 +907,36 @@
>    "vmrlg\t%0,%1,%2";
>    [(set_attr "op_type" "VRR")])
>  
> -
> -(define_insn "*tf_to_fprx2_0"
> -  [(set (subreg:DF (match_operand:FPRX2 0 "nonimmediate_operand" "+f") 0)
> -     (subreg:DF (match_operand:TF    1 "general_operand"       "v") 0))]
> -  "TARGET_VXE"
> -  ; M4 == 1 corresponds to %v0[0] = %v1[0]; %v0[1] = %v0[1];
> -  "vpdi\t%v0,%v1,%v0,1"
> -  [(set_attr "op_type" "VRR")])
> -
> -(define_insn "*tf_to_fprx2_1"
> -  [(set (subreg:DF (match_operand:FPRX2 0 "nonimmediate_operand" "+f") 8)
> -     (subreg:DF (match_operand:TF    1 "general_operand"       "v") 8))]
> +(define_insn "tf_to_fprx2"
> +  [(set (match_operand:FPRX2             0 "register_operand" "=f,f ,f")
> +     (unspec:FPRX2 [(match_operand:TF 1 "general_operand"   "v,AR,AT")]
> +                   UNSPEC_TF_TO_FPRX2))]
>    "TARGET_VXE"
> -  ; M4 == 5 corresponds to %V0[0] = %v1[1]; %V0[1] = %V0[1];
> -  "vpdi\t%V0,%v1,%V0,5"
> -  [(set_attr "op_type" "VRR")])
> -
> -(define_insn_and_split "tf_to_fprx2"
> -  [(set (match_operand:FPRX2            0 "nonimmediate_operand" "=f,f")
> -     (subreg:FPRX2 (match_operand:TF 1 "general_operand"       "v,AR") 0))]
> -  "TARGET_VXE"
> -  "#"
> -  "!(MEM_P (operands[1]) && MEM_VOLATILE_P (operands[1]))"
> -  [(set (match_dup 2) (match_dup 3))
> -   (set (match_dup 4) (match_dup 5))]
>  {
> -  operands[2] = simplify_gen_subreg (DFmode, operands[0], FPRX2mode, 0);
> -  operands[3] = simplify_gen_subreg (DFmode, operands[1], TFmode, 0);
> -  operands[4] = simplify_gen_subreg (DFmode, operands[0], FPRX2mode, 8);
> -  operands[5] = simplify_gen_subreg (DFmode, operands[1], TFmode, 8);
> +  char buf[64];
> +  switch (which_alternative)
> +    {
> +    case 0:
> +      if (REGNO (operands[0]) == REGNO (operands[1]))
> +     return "vpdi\t%V0,%v1,%V0,5";
> +      else
> +     return "ldr\t%f0,%f1;vpdi\t%V0,%v1,%V0,5";
> +    case 1:
> +      {
> +     const char *reg_pair = reg_names[REGNO (operands[0]) + 1];
> +     snprintf (buf, sizeof (buf), "ld\t%%f0,%%1;ld\t%%%s,8+%%1", reg_pair);
> +     output_asm_insn (buf, operands);
> +     return "";
> +      }
> +    case 2:
> +      {
> +     const char *reg_pair = reg_names[REGNO (operands[0]) + 1];
> +     snprintf (buf, sizeof (buf), "ldy\t%%f0,%%1;ldy\t%%%s,8+%%1", reg_pair);
> +     output_asm_insn (buf, operands);
> +     return "";
> +      }
> +    default: gcc_unreachable ();
> +    }
>  })
>  
>  ;; VECTOR REVERSE ELEMENTS V16QI
> @@ -2830,9 +2830,8 @@
>  ; There is no instruction for rounding an extended BFP operand in a VR into
>  ; a signed integer, therefore copy it into a FPR pair first.
>  (define_expand "fix_trunctf<mode>2_vr"
> -  [(set (subreg:DF (match_dup 2) 0)
> -     (subreg:DF (match_operand:TF 1 "register_operand" "") 0))
> -   (set (subreg:DF (match_dup 2) 8) (subreg:DF (match_dup 1) 8))
> +  [(set (match_dup 2)
> +     (unspec:FPRX2 [(match_operand:TF 1 "register_operand")] 
> UNSPEC_TF_TO_FPRX2))
>     (parallel [(set (match_operand:GPR 0 "register_operand" "")
>                  (fix:GPR (match_dup 2)))
>             (unspec:GPR [(const_int BFP_RND_TOWARD_0)] UNSPEC_ROUND)
> @@ -2863,9 +2862,8 @@
>  ; There is no instruction for rounding an extended BFP operand in a VR into
>  ; an unsigned integer, therefore copy it into a FPR pair first.
>  (define_expand "fixuns_trunctf<mode>2_vr"
> -  [(set (subreg:DF (match_dup 2) 0)
> -     (subreg:DF (match_operand:TF 1 "register_operand" "") 0))
> -   (set (subreg:DF (match_dup 2) 8) (subreg:DF (match_dup 1) 8))
> +  [(set (match_dup 2)
> +     (unspec:FPRX2 [(match_operand:TF 1 "register_operand")] 
> UNSPEC_TF_TO_FPRX2))
>     (parallel [(set (match_operand:GPR 0 "register_operand" "")
>                  (unsigned_fix:GPR (match_dup 2)))
>             (unspec:GPR [(const_int BFP_RND_TOWARD_0)] UNSPEC_ROUND)
> diff --git a/gcc/testsuite/gcc.target/s390/pr115860-1.c 
> b/gcc/testsuite/gcc.target/s390/pr115860-1.c
> new file mode 100644
> index 00000000000..67d34a80f8e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/s390/pr115860-1.c
> @@ -0,0 +1,26 @@
> +/* { dg-do run } */
> +/* { dg-require-effective-target s390_vxe } */
> +/* { dg-options "-O2 -march=z14 -mzarch" } */
> +
> +__attribute__ ((noipa))
> +long trunctf (long double x)
> +{
> +  /* Ensure via ++x that x is in a register.  */
> +  ++x;
> +  return x;
> +}
> +
> +__attribute__ ((noipa))
> +long trunctf_from_mem (long double x)
> +{
> +  return x;
> +}
> +
> +int main (void)
> +{
> +  if (trunctf (0x7ffffffffffffffeL) != 0x7fffffffffffffffL)
> +    __builtin_abort ();
> +  if (trunctf_from_mem (0x7fffffffffffffffL) != 0x7fffffffffffffffL)
> +    __builtin_abort ();
> +  return 0;
> +}
> -- 
> 2.45.2
> 

Reply via email to