On Thu, Jul 22, 2021 at 11:29 AM Kito Cheng <kito.ch...@gmail.com> wrote: > > Sounds like we could just use !tune_param->slow_unaligned_access for > TARGET_OVERLAP_OP_BY_PIECES_P? > since it improves both performance and code size if we have cheap > unaligned accesses.
Fine for me as well. I'll prepare a v2, that uses enables overlap_op_by_pieces if slow_unaligned_access==false. > > On Thu, Jul 22, 2021 at 5:23 PM Christoph Müllner via Gcc-patches > <gcc-patches@gcc.gnu.org> wrote: > > > > On Thu, Jul 22, 2021 at 10:53 AM Kito Cheng <kito.ch...@gmail.com> wrote: > > > > > > It's my first time seeing this hook :p Did you mind describing when we > > > need to set it to true? > > > I mean when a CPU has some feature then we can/should set it to true? > > > > The by-pieces infrastructure allows to inline builtins quite well and > > uses slow_unaligned_access and overlap_op_by_pieces to tune the > > emitted instruction sequence. > > > > In case unaligned accesses are fast, then overlap_op_by_pieces can reduce > > the number of instructions (emitted by by-pieces for e.g. memset) for the > > cost > > of overlapping memory accesses. > > > > E.g. you want to clear 15-bytes with memset (and cheap unaligned accesses): > > Without overlap_op_by_pieces you will get: > > 8e: 00053023 sd zero,0(a0) > > 92: 00052423 sw zero,8(a0) > > 96: 00051623 sh zero,12(a0) > > 9a: 00050723 sb zero,14(a0) > > With overlap_op_by_pieces you will get: > > 7e: 00053023 sd zero,0(a0) > > 82: 000533a3 sd zero,7(a0) > > > > BR > > Christoph > > > > > > > > > > > On Thu, Jul 22, 2021 at 7:33 AM Christoph Muellner via Gcc-patches > > > <gcc-patches@gcc.gnu.org> wrote: > > > > > > > > This patch adds the field overlap_op_by_pieces to the struct > > > > riscv_tune_param, which allows to enable the overlap_op_by_pieces > > > > feature of the by-pieces infrastructure. > > > > > > > > gcc/ChangeLog: > > > > > > > > * config/riscv/riscv.c (struct riscv_tune_param): New field. > > > > (riscv_overlap_op_by_pieces): New function. > > > > (TARGET_OVERLAP_OP_BY_PIECES_P): Connect to > > > > riscv_overlap_op_by_pieces. > > > > > > > > Signed-off-by: Christoph Muellner <cmuell...@gcc.gnu.org> > > > > --- > > > > gcc/config/riscv/riscv.c | 14 ++++++++++++++ > > > > 1 file changed, 14 insertions(+) > > > > > > > > diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c > > > > index 576960bb37c..824e930ef05 100644 > > > > --- a/gcc/config/riscv/riscv.c > > > > +++ b/gcc/config/riscv/riscv.c > > > > @@ -220,6 +220,7 @@ struct riscv_tune_param > > > > unsigned short branch_cost; > > > > unsigned short memory_cost; > > > > bool slow_unaligned_access; > > > > + bool overlap_op_by_pieces; > > > > }; > > > > > > > > /* Information about one micro-arch we know about. */ > > > > @@ -285,6 +286,7 @@ static const struct riscv_tune_param > > > > rocket_tune_info = { > > > > 3, /* branch_cost */ > > > > 5, /* memory_cost */ > > > > true, /* > > > > slow_unaligned_access */ > > > > + false, /* overlap_op_by_pieces > > > > */ > > > > }; > > > > > > > > /* Costs to use when optimizing for Sifive 7 Series. */ > > > > @@ -298,6 +300,7 @@ static const struct riscv_tune_param > > > > sifive_7_tune_info = { > > > > 4, /* branch_cost */ > > > > 3, /* memory_cost */ > > > > true, /* > > > > slow_unaligned_access */ > > > > + false, /* overlap_op_by_pieces > > > > */ > > > > }; > > > > > > > > /* Costs to use when optimizing for T-HEAD c906. */ > > > > @@ -311,6 +314,7 @@ static const struct riscv_tune_param > > > > thead_c906_tune_info = { > > > > 3, /* branch_cost */ > > > > 5, /* memory_cost */ > > > > false, /* slow_unaligned_access */ > > > > + false, /* overlap_op_by_pieces > > > > */ > > > > }; > > > > > > > > /* Costs to use when optimizing for size. */ > > > > @@ -324,6 +328,7 @@ static const struct riscv_tune_param > > > > optimize_size_tune_info = { > > > > 1, /* branch_cost */ > > > > 2, /* memory_cost */ > > > > false, /* > > > > slow_unaligned_access */ > > > > + false, /* overlap_op_by_pieces > > > > */ > > > > }; > > > > > > > > static tree riscv_handle_fndecl_attribute (tree *, tree, tree, int, > > > > bool *); > > > > @@ -5201,6 +5206,12 @@ riscv_slow_unaligned_access (machine_mode, > > > > unsigned int) > > > > return riscv_slow_unaligned_access_p; > > > > } > > > > > > > > +static bool > > > > +riscv_overlap_op_by_pieces (void) > > > > +{ > > > > + return tune_param->overlap_op_by_pieces; > > > > +} > > > > + > > > > /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */ > > > > > > > > static bool > > > > @@ -5525,6 +5536,9 @@ riscv_asan_shadow_offset (void) > > > > #undef TARGET_SLOW_UNALIGNED_ACCESS > > > > #define TARGET_SLOW_UNALIGNED_ACCESS riscv_slow_unaligned_access > > > > > > > > +#undef TARGET_OVERLAP_OP_BY_PIECES_P > > > > +#define TARGET_OVERLAP_OP_BY_PIECES_P riscv_overlap_op_by_pieces > > > > + > > > > #undef TARGET_SECONDARY_MEMORY_NEEDED > > > > #define TARGET_SECONDARY_MEMORY_NEEDED riscv_secondary_memory_needed > > > > > > > > -- > > > > 2.31.1 > > > >