On Thu, Jul 22, 2021 at 11:29 AM Kito Cheng <kito.ch...@gmail.com> wrote:
>
> Sounds like we could just use !tune_param->slow_unaligned_access for
> TARGET_OVERLAP_OP_BY_PIECES_P?
> since it improves both performance and code size if we have cheap
> unaligned accesses.

Fine for me as well.
I'll prepare a v2, that uses enables overlap_op_by_pieces if
slow_unaligned_access==false.

>
> On Thu, Jul 22, 2021 at 5:23 PM Christoph Müllner via Gcc-patches
> <gcc-patches@gcc.gnu.org> wrote:
> >
> > On Thu, Jul 22, 2021 at 10:53 AM Kito Cheng <kito.ch...@gmail.com> wrote:
> > >
> > > It's my first time seeing this hook :p Did you mind describing when we
> > > need to set it to true?
> > > I mean when a CPU has some feature then we can/should set it to true?
> >
> > The by-pieces infrastructure allows to inline builtins quite well and
> > uses slow_unaligned_access and overlap_op_by_pieces to tune the
> > emitted instruction sequence.
> >
> > In case unaligned accesses are fast, then overlap_op_by_pieces can reduce
> > the number of instructions (emitted by by-pieces for e.g. memset) for the 
> > cost
> > of overlapping memory accesses.
> >
> > E.g. you want to clear 15-bytes with memset (and cheap unaligned accesses):
> > Without overlap_op_by_pieces you will get:
> >   8e:   00053023                sd      zero,0(a0)
> >   92:   00052423                sw      zero,8(a0)
> >   96:   00051623                sh      zero,12(a0)
> >   9a:   00050723                sb      zero,14(a0)
> > With overlap_op_by_pieces you will get:
> >   7e:   00053023                sd      zero,0(a0)
> >   82:   000533a3                sd      zero,7(a0)
> >
> > BR
> > Christoph
> >
> > >
> > >
> > > On Thu, Jul 22, 2021 at 7:33 AM Christoph Muellner via Gcc-patches
> > > <gcc-patches@gcc.gnu.org> wrote:
> > > >
> > > > This patch adds the field overlap_op_by_pieces to the struct
> > > > riscv_tune_param, which allows to enable the overlap_op_by_pieces
> > > > feature of the by-pieces infrastructure.
> > > >
> > > > gcc/ChangeLog:
> > > >
> > > >         * config/riscv/riscv.c (struct riscv_tune_param): New field.
> > > >         (riscv_overlap_op_by_pieces): New function.
> > > >         (TARGET_OVERLAP_OP_BY_PIECES_P): Connect to
> > > >         riscv_overlap_op_by_pieces.
> > > >
> > > > Signed-off-by: Christoph Muellner <cmuell...@gcc.gnu.org>
> > > > ---
> > > >  gcc/config/riscv/riscv.c | 14 ++++++++++++++
> > > >  1 file changed, 14 insertions(+)
> > > >
> > > > diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
> > > > index 576960bb37c..824e930ef05 100644
> > > > --- a/gcc/config/riscv/riscv.c
> > > > +++ b/gcc/config/riscv/riscv.c
> > > > @@ -220,6 +220,7 @@ struct riscv_tune_param
> > > >    unsigned short branch_cost;
> > > >    unsigned short memory_cost;
> > > >    bool slow_unaligned_access;
> > > > +  bool overlap_op_by_pieces;
> > > >  };
> > > >
> > > >  /* Information about one micro-arch we know about.  */
> > > > @@ -285,6 +286,7 @@ static const struct riscv_tune_param 
> > > > rocket_tune_info = {
> > > >    3,                                           /* branch_cost */
> > > >    5,                                           /* memory_cost */
> > > >    true,                                                /* 
> > > > slow_unaligned_access */
> > > > +  false,                                       /* overlap_op_by_pieces 
> > > > */
> > > >  };
> > > >
> > > >  /* Costs to use when optimizing for Sifive 7 Series.  */
> > > > @@ -298,6 +300,7 @@ static const struct riscv_tune_param 
> > > > sifive_7_tune_info = {
> > > >    4,                                           /* branch_cost */
> > > >    3,                                           /* memory_cost */
> > > >    true,                                                /* 
> > > > slow_unaligned_access */
> > > > +  false,                                       /* overlap_op_by_pieces 
> > > > */
> > > >  };
> > > >
> > > >  /* Costs to use when optimizing for T-HEAD c906.  */
> > > > @@ -311,6 +314,7 @@ static const struct riscv_tune_param 
> > > > thead_c906_tune_info = {
> > > >    3,            /* branch_cost */
> > > >    5,            /* memory_cost */
> > > >    false,            /* slow_unaligned_access */
> > > > +  false,                                       /* overlap_op_by_pieces 
> > > > */
> > > >  };
> > > >
> > > >  /* Costs to use when optimizing for size.  */
> > > > @@ -324,6 +328,7 @@ static const struct riscv_tune_param 
> > > > optimize_size_tune_info = {
> > > >    1,                                           /* branch_cost */
> > > >    2,                                           /* memory_cost */
> > > >    false,                                       /* 
> > > > slow_unaligned_access */
> > > > +  false,                                       /* overlap_op_by_pieces 
> > > > */
> > > >  };
> > > >
> > > >  static tree riscv_handle_fndecl_attribute (tree *, tree, tree, int, 
> > > > bool *);
> > > > @@ -5201,6 +5206,12 @@ riscv_slow_unaligned_access (machine_mode, 
> > > > unsigned int)
> > > >    return riscv_slow_unaligned_access_p;
> > > >  }
> > > >
> > > > +static bool
> > > > +riscv_overlap_op_by_pieces (void)
> > > > +{
> > > > +  return tune_param->overlap_op_by_pieces;
> > > > +}
> > > > +
> > > >  /* Implement TARGET_CAN_CHANGE_MODE_CLASS.  */
> > > >
> > > >  static bool
> > > > @@ -5525,6 +5536,9 @@ riscv_asan_shadow_offset (void)
> > > >  #undef TARGET_SLOW_UNALIGNED_ACCESS
> > > >  #define TARGET_SLOW_UNALIGNED_ACCESS riscv_slow_unaligned_access
> > > >
> > > > +#undef TARGET_OVERLAP_OP_BY_PIECES_P
> > > > +#define TARGET_OVERLAP_OP_BY_PIECES_P riscv_overlap_op_by_pieces
> > > > +
> > > >  #undef TARGET_SECONDARY_MEMORY_NEEDED
> > > >  #define TARGET_SECONDARY_MEMORY_NEEDED riscv_secondary_memory_needed
> > > >
> > > > --
> > > > 2.31.1
> > > >

Reply via email to