Ping

On Fri, Aug 16, 2024 at 09:14:02AM +0200, Stefan Schulze Frielinghaus wrote:
> In s390_expand_insv(), if generating code for ICM et al. src is a MEM
> and gen_lowpart might force src into a register such that we end up with
> patterns which do not match anymore.  Use adjust_address() instead in
> order to preserve a MEM.
> 
> Furthermore, it is not straight forward to enforce a subreg.  For
> example, in case of a paradoxical subreg, gen_lowpart() may return a
> register.  In order to compensate this, s390_gen_lowpart_subreg() emits
> a reference to a pseudo which does not coincide with its definition
> which is wrong.  Additionally, if dest is a paradoxical subreg, then do
> not try to emit a strict_low_part since it could mean that dest was not
> initialized even though this might be fixed up later by init-regs.
> 
> Splitter for insn *get_tp_64, *zero_extendhisi2_31,
> *zero_extendqisi2_31, *zero_extendqihi2_31 are applied after reload.
> Thus, operands[0] is a hard register and gen_lowpart (m, operands[0])
> just returns the hard register for mode m which is fine to use as an
> argument for strict_low_part, i.e., we do not need to enforce subregs
> here since after reload subregs are supposed to be eliminated anyway.
> 
> This fixes gcc.dg/torture/pr111821.c.
> 
> gcc/ChangeLog:
> 
>       * config/s390/s390-protos.h (s390_gen_lowpart_subreg): Remove.
>       * config/s390/s390.cc (s390_gen_lowpart_subreg): Remove.
>       (s390_expand_insv): Use adjust_address() and emit a
>       strict_low_part only in case of a natural subreg.
>       * config/s390/s390.md: Use gen_lowpart() instead of
>       s390_gen_lowpart_subreg().
> ---
>  Bootstrapped and regtested on s390.  Ok for mainline,gcc12,gcc13,gcc14?
> 
>  gcc/config/s390/s390-protos.h |  1 -
>  gcc/config/s390/s390.cc       | 47 +++++++++++------------------------
>  gcc/config/s390/s390.md       | 13 +++++-----
>  3 files changed, 20 insertions(+), 41 deletions(-)
> 
> diff --git a/gcc/config/s390/s390-protos.h b/gcc/config/s390/s390-protos.h
> index b4646ccb606..e7ac59d17da 100644
> --- a/gcc/config/s390/s390-protos.h
> +++ b/gcc/config/s390/s390-protos.h
> @@ -50,7 +50,6 @@ extern void s390_set_has_landing_pad_p (bool);
>  extern bool s390_hard_regno_rename_ok (unsigned int, unsigned int);
>  extern int s390_class_max_nregs (enum reg_class, machine_mode);
>  extern bool s390_return_addr_from_memory(void);
> -extern rtx s390_gen_lowpart_subreg (machine_mode, rtx);
>  extern bool s390_fma_allowed_p (machine_mode);
>  #if S390_USE_TARGET_ATTRIBUTE
>  extern tree s390_valid_target_attribute_tree (tree args,
> diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
> index 7aea776da2f..7cdcebfc08b 100644
> --- a/gcc/config/s390/s390.cc
> +++ b/gcc/config/s390/s390.cc
> @@ -516,31 +516,6 @@ s390_return_addr_from_memory ()
>    return cfun_gpr_save_slot(RETURN_REGNUM) == SAVE_SLOT_STACK;
>  }
>  
> -/* Generate a SUBREG for the MODE lowpart of EXPR.
> -
> -   In contrast to gen_lowpart it will always return a SUBREG
> -   expression.  This is useful to generate STRICT_LOW_PART
> -   expressions.  */
> -rtx
> -s390_gen_lowpart_subreg (machine_mode mode, rtx expr)
> -{
> -  rtx lowpart = gen_lowpart (mode, expr);
> -
> -  /* There might be no SUBREG in case it could be applied to the hard
> -     REG rtx or it could be folded with a paradoxical subreg.  Bring
> -     it back.  */
> -  if (!SUBREG_P (lowpart))
> -    {
> -      machine_mode reg_mode = TARGET_ZARCH ? DImode : SImode;
> -      gcc_assert (REG_P (lowpart));
> -      lowpart = gen_lowpart_SUBREG (mode,
> -                                 gen_rtx_REG (reg_mode,
> -                                              REGNO (lowpart)));
> -    }
> -
> -  return lowpart;
> -}
> -
>  /* Return nonzero if it's OK to use fused multiply-add for MODE.  */
>  bool
>  s390_fma_allowed_p (machine_mode mode)
> @@ -7112,15 +7087,21 @@ s390_expand_insv (rtx dest, rtx op1, rtx op2, rtx src)
>        /* Emit a strict_low_part pattern if possible.  */
>        if (smode_bsize == bitsize && bitpos == mode_bsize - smode_bsize)
>       {
> -       rtx low_dest = s390_gen_lowpart_subreg (smode, dest);
> -       rtx low_src = gen_lowpart (smode, src);
> -
> -       switch (smode)
> +       rtx low_dest = gen_lowpart (smode, dest);
> +       if (SUBREG_P (low_dest) && !paradoxical_subreg_p (low_dest))
>           {
> -         case E_QImode: emit_insn (gen_movstrictqi (low_dest, low_src)); 
> return true;
> -         case E_HImode: emit_insn (gen_movstricthi (low_dest, low_src)); 
> return true;
> -         case E_SImode: emit_insn (gen_movstrictsi (low_dest, low_src)); 
> return true;
> -         default: break;
> +           poly_int64 offset = GET_MODE_SIZE (mode) - GET_MODE_SIZE (smode);
> +           rtx low_src = adjust_address (src, smode, offset);
> +           switch (smode)
> +             {
> +             case E_QImode: emit_insn (gen_movstrictqi (low_dest, low_src));
> +                            return true;
> +             case E_HImode: emit_insn (gen_movstricthi (low_dest, low_src));
> +                            return true;
> +             case E_SImode: emit_insn (gen_movstrictsi (low_dest, low_src));
> +                            return true;
> +             default: break;
> +             }
>           }
>       }
>  
> diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
> index 3d5759d6252..592cf62d962 100644
> --- a/gcc/config/s390/s390.md
> +++ b/gcc/config/s390/s390.md
> @@ -1974,12 +1974,11 @@
>    "TARGET_ZARCH"
>    "#"
>    "&& reload_completed"
> -  [(set (match_dup 2) (match_dup 4))
> +  [(set (match_dup 2) (match_dup 3))
>     (set (match_dup 0) (ashift:DI (match_dup 0) (const_int 32)))
> -   (set (strict_low_part (match_dup 3)) (match_dup 5))]
> +   (set (strict_low_part (match_dup 2)) (match_dup 4))]
>    "operands[2] = gen_lowpart (SImode, operands[0]);
> -   operands[3] = s390_gen_lowpart_subreg (SImode, operands[0]);
> -   s390_split_access_reg (operands[1], &operands[5], &operands[4]);")
> +   s390_split_access_reg (operands[1], &operands[4], &operands[3]);")
>  
>  ; Splitters for storing TLS pointer to %a0:DI.
>  
> @@ -5068,7 +5067,7 @@
>     (parallel
>      [(set (strict_low_part (match_dup 2)) (match_dup 1))
>       (clobber (reg:CC CC_REGNUM))])]
> -  "operands[2] = s390_gen_lowpart_subreg (HImode, operands[0]);")
> +  "operands[2] = gen_lowpart (HImode, operands[0]);")
>  
>  (define_insn_and_split "*zero_extendqisi2_31"
>    [(set (match_operand:SI 0 "register_operand" "=&d")
> @@ -5078,7 +5077,7 @@
>    "&& reload_completed"
>    [(set (match_dup 0) (const_int 0))
>     (set (strict_low_part (match_dup 2)) (match_dup 1))]
> -  "operands[2] = s390_gen_lowpart_subreg (QImode, operands[0]);")
> +  "operands[2] = gen_lowpart (QImode, operands[0]);")
>  
>  ;
>  ; zero_extendqihi2 instruction pattern(s).
> @@ -5110,7 +5109,7 @@
>    "&& reload_completed"
>    [(set (match_dup 0) (const_int 0))
>     (set (strict_low_part (match_dup 2)) (match_dup 1))]
> -  "operands[2] = s390_gen_lowpart_subreg (QImode, operands[0]);")
> +  "operands[2] = gen_lowpart (QImode, operands[0]);")
>  
>  ;
>  ; fixuns_trunc(dd|td|sf|df|tf)(si|di)2 expander
> -- 
> 2.45.2
> 

Reply via email to