Re: [Patch 4/4][Aarch64] v2: Implement Aarch64 SIMD ABI

Richard Sandiford Wed, 09 Jan 2019 02:01:32 -0800

Steve Ellcey <sell...@marvell.com> writes:
>  /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED.  The callee only saves
>     the lower 64 bits of a 128-bit register.  Tell the compiler the callee
>     clobbers the top 64 bits when restoring the bottom 64 bits.  */
>  
>  static bool
> -aarch64_hard_regno_call_part_clobbered (unsigned int regno, machine_mode 
> mode)
> +aarch64_hard_regno_call_part_clobbered (rtx_insn *insn, unsigned int regno,
> +                                     machine_mode mode)
>  {
> -  return FP_REGNUM_P (regno) && maybe_gt (GET_MODE_SIZE (mode), 8);
> +  bool simd_p = insn && CALL_P (insn) && aarch64_simd_call_p (insn);
> +  return FP_REGNUM_P (regno) && maybe_gt (GET_MODE_SIZE (mode), simd_p ? 16: 
> 8);


Should be a space before the ":" (which pushes the line over 80 chars).

> diff --git a/gcc/hooks.h b/gcc/hooks.h
> index 9e4bc29..dc6b2e1 100644
> --- a/gcc/hooks.h
> +++ b/gcc/hooks.h
> @@ -40,7 +40,9 @@ extern bool hook_bool_const_rtx_insn_const_rtx_insn_true 
> (const rtx_insn *,
>  extern bool hook_bool_mode_uhwi_false (machine_mode,
>                                      unsigned HOST_WIDE_INT);
>  extern bool hook_bool_puint64_puint64_true (poly_uint64, poly_uint64);
> -extern bool hook_bool_uint_mode_false (unsigned int, machine_mode);
> +extern bool hook_bool_insn_uint_mode_false (rtx_insn *,
> +                                         unsigned int,
> +                                         machine_mode);

No need to break the line after "rtx_insn *,".

>  extern bool hook_bool_uint_mode_true (unsigned int, machine_mode);
>  extern bool hook_bool_tree_false (tree);
>  extern bool hook_bool_const_tree_false (const_tree);
> diff --git a/gcc/ira-conflicts.c b/gcc/ira-conflicts.c
> index b57468b..b697e57 100644
> --- a/gcc/ira-conflicts.c
> +++ b/gcc/ira-conflicts.c
> @@ -808,7 +808,8 @@ ira_build_conflicts (void)
>                regs must conflict with them.  */
>             for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
>               if (!TEST_HARD_REG_BIT (call_used_reg_set, regno)
> -                 && targetm.hard_regno_call_part_clobbered (regno,
> +                 && targetm.hard_regno_call_part_clobbered (NULL,
> +                                                            regno,
>                                                              obj_mode))
>                 {
>                   SET_HARD_REG_BIT (OBJECT_CONFLICT_HARD_REGS (obj), regno);

No need to break the line after "NULL,".

> diff --git a/gcc/ira-costs.c b/gcc/ira-costs.c
> index e5d8804..7f60712 100644
> --- a/gcc/ira-costs.c
> +++ b/gcc/ira-costs.c
> @@ -2379,7 +2379,8 @@ ira_tune_allocno_costs (void)
>                                                  *crossed_calls_clobber_regs)
>                 && (ira_hard_reg_set_intersection_p (regno, mode,
>                                                      call_used_reg_set)
> -                   || targetm.hard_regno_call_part_clobbered (regno,
> +                   || targetm.hard_regno_call_part_clobbered (NULL,
> +                                                              regno,
>                                                                mode)))
>               cost += (ALLOCNO_CALL_FREQ (a)
>                        * (ira_memory_move_cost[mode][rclass][0]

Same here.

> diff --git a/gcc/lra-int.h b/gcc/lra-int.h
> index 9d9e81d..ccc7b00 100644
> --- a/gcc/lra-int.h
> +++ b/gcc/lra-int.h
> @@ -117,6 +117,8 @@ struct lra_reg
>    /* This member is set up in lra-lives.c for subsequent
>       assignments.  */
>    lra_copy_t copies;
> +  /* Call instruction that may affect this register.  */
> +  rtx_insn *call_insn;
>  };
>  
>  /* References to the common info about each register.  */

If we do this right, I think the new field should be able to replace call_p.
The pseudo crosses a call iff call_insn is nonnull.

I think the field belongs after:

  poly_int64 offset;

since it comes under:

  /* The following fields are defined only for pseudos.  */

rather than:

  /* These members are set up in lra-lives.c and updated in
     lra-coalesce.c.  */

> diff --git a/gcc/lra-lives.c b/gcc/lra-lives.c
> index 7b60691..0b96891 100644
> --- a/gcc/lra-lives.c
> +++ b/gcc/lra-lives.c
> @@ -579,18 +579,26 @@ lra_setup_reload_pseudo_preferenced_hard_reg (int regno,
>     PSEUDOS_LIVE_THROUGH_CALLS and PSEUDOS_LIVE_THROUGH_SETJUMPS.  */
>  static inline void
>  check_pseudos_live_through_calls (int regno,
> -                               HARD_REG_SET last_call_used_reg_set)
> +                               HARD_REG_SET last_call_used_reg_set,
> +                               rtx_insn *call_insn)
>  {
>    int hr;
>  
> +  if (call_insn && CALL_P (call_insn) && 
> targetm.return_call_with_max_clobbers)
> +    lra_reg_info[regno].call_insn =
> +      targetm.return_call_with_max_clobbers (call_insn,
> +                                          lra_reg_info[regno].call_insn);
> +

This should happen...

>    if (! sparseset_bit_p (pseudos_live_through_calls, regno))
>      return;

...here, where we know that regno is live across a call like that
described by call_insn.

call_insn should be nonnull and a CALL_P in that case.  We should assert
for that regardless of whether targetm.return_call_with_max_clobbers is
nonnull.

The update should be something like:

  rtx_insn *old_call_insn = lra_reg_info[regno].call_insn;
  if (!old_call_insn
      || (targetm.return_call_with_max_clobbers
          && targetm.return_call_with_max_clobbers (old_call_insn
                                                    call_insn) == call_insn))
    lra_reg_info[regno].call_insn = call_insn;

>    sparseset_clear_bit (pseudos_live_through_calls, regno);
>    IOR_HARD_REG_SET (lra_reg_info[regno].conflict_hard_regs,
>                   last_call_used_reg_set);
>  
>    for (hr = 0; HARD_REGISTER_NUM_P (hr); hr++)
> -    if (targetm.hard_regno_call_part_clobbered (hr,
> +    if (targetm.hard_regno_call_part_clobbered (call_insn,
> +                                             hr,
>                                               PSEUDO_REGNO_MODE (regno)))
>        add_to_hard_reg_set (&lra_reg_info[regno].conflict_hard_regs,
>                          PSEUDO_REGNO_MODE (regno), hr);

No need to break the line after "call_insn,".

> @@ -658,6 +667,17 @@ process_bb_lives (basic_block bb, int &curr_point, bool 
> dead_insn_p)
>    if (lra_dump_file != NULL)
>      fprintf (lra_dump_file, "  BB %d\n", bb->index);
>  
> +  call_insn = NULL;
> +  if (targetm.return_call_with_max_clobbers)
> +    {
> +      FOR_BB_INSNS_REVERSE_SAFE (bb, curr_insn, next)
> +     {
> +       if (CALL_P (curr_insn))
> +         call_insn = targetm.return_call_with_max_clobbers (curr_insn,
> +                                                            call_insn);
> +     }
> +    }
> +
>    /* Scan the code of this basic block, noting which pseudos and hard
>       regs are born or die.
>  

What I meant about keeping track of the current call during the main
block walk is that we shouldn't have this loop.  Instead we should
update call_insn when we see a CALL_P...

> @@ -847,7 +867,8 @@ process_bb_lives (basic_block bb, int &curr_point, bool 
> dead_insn_p)
>             update_pseudo_point (reg->regno, curr_point, USE_POINT);
>             mark_regno_live (reg->regno, reg->biggest_mode);
>             check_pseudos_live_through_calls (reg->regno,
> -                                             last_call_used_reg_set);
> +                                             last_call_used_reg_set,
> +                                             call_insn);
>           }
>  
>         if (!HARD_REGISTER_NUM_P (reg->regno))
> @@ -896,7 +917,7 @@ process_bb_lives (basic_block bb, int &curr_point, bool 
> dead_insn_p)
>  
>        if (call_p)
>       {
> -       if (! flag_ipa_ra)
> +       if (! flag_ipa_ra && ! targetm.return_call_with_max_clobbers)
>           COPY_HARD_REG_SET(last_call_used_reg_set, call_used_reg_set);
>         else
>           {

...here.  In the "if" arm we can set call_insn to the current instruction,
because any call will do.  In the "else" arm we should extend this:

              bool flush = (! hard_reg_set_empty_p (last_call_used_reg_set)
                            && ! hard_reg_set_equal_p (last_call_used_reg_set,
                                                       this_call_used_reg_set))

so that we also flush when the old call insn part-clobbers a
different set of registers.

The idea is that we're trying to do something similar to -fipa-ra:
the registers clobbered by a call depend on the call_insn.  -fipa-ra
does that by dividing the block into regions with the same call
behaviour.  Then:

- pseudos_live_through_calls says which registers are live across
  a call in the current region

- last_call_used_reg_set describes the set of registers that are
  clobbered by calls in the current region

A new region starts whenever we find a call instruction that clobbers a
different set of registers from those in last_call_used_reg_set (whether
that's more registers or fewer).

We want to divide the block into regions in a similar way for
return_call_with_max_clobbers.  A new region starts whenever the
new call preserves a different set of registers from the original call.
This can be tested by something like:

/* Return true if call instructions CALL1 and CALL2 use ABIs that
   preserve the same set of registers.  */

static bool
calls_have_same_clobbers_p (rtx_insn *call1, rtx_insn *call2)
{
  if (!targetm.return_call_with_max_clobbers)
    return false;

  return (targetm.return_call_with_max_clobbers (call1, call2) == call1
          && targetm.return_call_with_max_clobbers (call2, call1) == call2);
}

I think it would make sense for return_call_with_max_clobbers to only
handle nonnull isnsn (I probably said otherwise earlier, sorry).

> diff --git a/gcc/reload.c b/gcc/reload.c
> index 6cfd5e2..bff84da 100644
> --- a/gcc/reload.c
> +++ b/gcc/reload.c
> @@ -6912,13 +6912,16 @@ find_equiv_reg (rtx goal, rtx_insn *insn, enum 
> reg_class rclass, int other,
>         if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER)
>           for (i = 0; i < nregs; ++i)
>             if (call_used_regs[regno + i]
> -               || targetm.hard_regno_call_part_clobbered (regno + i, mode))
> +               || targetm.hard_regno_call_part_clobbered (NULL,
> +                                                          regno + i,
> +                                                          mode))
>               return 0;
>  
>         if (valueno >= 0 && valueno < FIRST_PSEUDO_REGISTER)
>           for (i = 0; i < valuenregs; ++i)
>             if (call_used_regs[valueno + i]
> -               || targetm.hard_regno_call_part_clobbered (valueno + i,
> +               || targetm.hard_regno_call_part_clobbered (NULL,
> +                                                          valueno + i,
>                                                            mode))
>               return 0;
>       }

No need to split the lines after "NULL,".

> diff --git a/gcc/target.def b/gcc/target.def
> index e8f0f70..839d0b9 100644
> --- a/gcc/target.def
> +++ b/gcc/target.def
> @@ -5766,14 +5766,29 @@ DEFHOOK
>  (hard_regno_call_part_clobbered,
>   "This hook should return true if @var{regno} is partly call-saved and\n\
>  partly call-clobbered, and if a value of mode @var{mode} would be partly\n\
> -clobbered by a call.  For example, if the low 32 bits of @var{regno} are\n\
> -preserved across a call but higher bits are clobbered, this hook should\n\
> -return true for a 64-bit mode but false for a 32-bit mode.\n\
> +clobbered by the @var{insn} call.  If @var{insn} is NULL then it should\n\

maybe "by call instruction @var{insn}".

Thanks,
Richard

Re: [Patch 4/4][Aarch64] v2: Implement Aarch64 SIMD ABI

Reply via email to