Sounds like we could just use !tune_param->slow_unaligned_access for TARGET_OVERLAP_OP_BY_PIECES_P? since it improves both performance and code size if we have cheap unaligned accesses.
On Thu, Jul 22, 2021 at 5:23 PM Christoph Müllner via Gcc-patches <gcc-patches@gcc.gnu.org> wrote: > > On Thu, Jul 22, 2021 at 10:53 AM Kito Cheng <kito.ch...@gmail.com> wrote: > > > > It's my first time seeing this hook :p Did you mind describing when we > > need to set it to true? > > I mean when a CPU has some feature then we can/should set it to true? > > The by-pieces infrastructure allows to inline builtins quite well and > uses slow_unaligned_access and overlap_op_by_pieces to tune the > emitted instruction sequence. > > In case unaligned accesses are fast, then overlap_op_by_pieces can reduce > the number of instructions (emitted by by-pieces for e.g. memset) for the cost > of overlapping memory accesses. > > E.g. you want to clear 15-bytes with memset (and cheap unaligned accesses): > Without overlap_op_by_pieces you will get: > 8e: 00053023 sd zero,0(a0) > 92: 00052423 sw zero,8(a0) > 96: 00051623 sh zero,12(a0) > 9a: 00050723 sb zero,14(a0) > With overlap_op_by_pieces you will get: > 7e: 00053023 sd zero,0(a0) > 82: 000533a3 sd zero,7(a0) > > BR > Christoph > > > > > > > On Thu, Jul 22, 2021 at 7:33 AM Christoph Muellner via Gcc-patches > > <gcc-patches@gcc.gnu.org> wrote: > > > > > > This patch adds the field overlap_op_by_pieces to the struct > > > riscv_tune_param, which allows to enable the overlap_op_by_pieces > > > feature of the by-pieces infrastructure. > > > > > > gcc/ChangeLog: > > > > > > * config/riscv/riscv.c (struct riscv_tune_param): New field. > > > (riscv_overlap_op_by_pieces): New function. > > > (TARGET_OVERLAP_OP_BY_PIECES_P): Connect to > > > riscv_overlap_op_by_pieces. > > > > > > Signed-off-by: Christoph Muellner <cmuell...@gcc.gnu.org> > > > --- > > > gcc/config/riscv/riscv.c | 14 ++++++++++++++ > > > 1 file changed, 14 insertions(+) > > > > > > diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c > > > index 576960bb37c..824e930ef05 100644 > > > --- a/gcc/config/riscv/riscv.c > > > +++ b/gcc/config/riscv/riscv.c > > > @@ -220,6 +220,7 @@ struct riscv_tune_param > > > unsigned short branch_cost; > > > unsigned short memory_cost; > > > bool slow_unaligned_access; > > > + bool overlap_op_by_pieces; > > > }; > > > > > > /* Information about one micro-arch we know about. */ > > > @@ -285,6 +286,7 @@ static const struct riscv_tune_param rocket_tune_info > > > = { > > > 3, /* branch_cost */ > > > 5, /* memory_cost */ > > > true, /* > > > slow_unaligned_access */ > > > + false, /* overlap_op_by_pieces */ > > > }; > > > > > > /* Costs to use when optimizing for Sifive 7 Series. */ > > > @@ -298,6 +300,7 @@ static const struct riscv_tune_param > > > sifive_7_tune_info = { > > > 4, /* branch_cost */ > > > 3, /* memory_cost */ > > > true, /* > > > slow_unaligned_access */ > > > + false, /* overlap_op_by_pieces */ > > > }; > > > > > > /* Costs to use when optimizing for T-HEAD c906. */ > > > @@ -311,6 +314,7 @@ static const struct riscv_tune_param > > > thead_c906_tune_info = { > > > 3, /* branch_cost */ > > > 5, /* memory_cost */ > > > false, /* slow_unaligned_access */ > > > + false, /* overlap_op_by_pieces */ > > > }; > > > > > > /* Costs to use when optimizing for size. */ > > > @@ -324,6 +328,7 @@ static const struct riscv_tune_param > > > optimize_size_tune_info = { > > > 1, /* branch_cost */ > > > 2, /* memory_cost */ > > > false, /* slow_unaligned_access > > > */ > > > + false, /* overlap_op_by_pieces */ > > > }; > > > > > > static tree riscv_handle_fndecl_attribute (tree *, tree, tree, int, bool > > > *); > > > @@ -5201,6 +5206,12 @@ riscv_slow_unaligned_access (machine_mode, > > > unsigned int) > > > return riscv_slow_unaligned_access_p; > > > } > > > > > > +static bool > > > +riscv_overlap_op_by_pieces (void) > > > +{ > > > + return tune_param->overlap_op_by_pieces; > > > +} > > > + > > > /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */ > > > > > > static bool > > > @@ -5525,6 +5536,9 @@ riscv_asan_shadow_offset (void) > > > #undef TARGET_SLOW_UNALIGNED_ACCESS > > > #define TARGET_SLOW_UNALIGNED_ACCESS riscv_slow_unaligned_access > > > > > > +#undef TARGET_OVERLAP_OP_BY_PIECES_P > > > +#define TARGET_OVERLAP_OP_BY_PIECES_P riscv_overlap_op_by_pieces > > > + > > > #undef TARGET_SECONDARY_MEMORY_NEEDED > > > #define TARGET_SECONDARY_MEMORY_NEEDED riscv_secondary_memory_needed > > > > > > -- > > > 2.31.1 > > >