On Thu, Jul 22, 2021 at 10:53 AM Kito Cheng <kito.ch...@gmail.com> wrote: > > It's my first time seeing this hook :p Did you mind describing when we > need to set it to true? > I mean when a CPU has some feature then we can/should set it to true?
The by-pieces infrastructure allows to inline builtins quite well and uses slow_unaligned_access and overlap_op_by_pieces to tune the emitted instruction sequence. In case unaligned accesses are fast, then overlap_op_by_pieces can reduce the number of instructions (emitted by by-pieces for e.g. memset) for the cost of overlapping memory accesses. E.g. you want to clear 15-bytes with memset (and cheap unaligned accesses): Without overlap_op_by_pieces you will get: 8e: 00053023 sd zero,0(a0) 92: 00052423 sw zero,8(a0) 96: 00051623 sh zero,12(a0) 9a: 00050723 sb zero,14(a0) With overlap_op_by_pieces you will get: 7e: 00053023 sd zero,0(a0) 82: 000533a3 sd zero,7(a0) BR Christoph > > > On Thu, Jul 22, 2021 at 7:33 AM Christoph Muellner via Gcc-patches > <gcc-patches@gcc.gnu.org> wrote: > > > > This patch adds the field overlap_op_by_pieces to the struct > > riscv_tune_param, which allows to enable the overlap_op_by_pieces > > feature of the by-pieces infrastructure. > > > > gcc/ChangeLog: > > > > * config/riscv/riscv.c (struct riscv_tune_param): New field. > > (riscv_overlap_op_by_pieces): New function. > > (TARGET_OVERLAP_OP_BY_PIECES_P): Connect to > > riscv_overlap_op_by_pieces. > > > > Signed-off-by: Christoph Muellner <cmuell...@gcc.gnu.org> > > --- > > gcc/config/riscv/riscv.c | 14 ++++++++++++++ > > 1 file changed, 14 insertions(+) > > > > diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c > > index 576960bb37c..824e930ef05 100644 > > --- a/gcc/config/riscv/riscv.c > > +++ b/gcc/config/riscv/riscv.c > > @@ -220,6 +220,7 @@ struct riscv_tune_param > > unsigned short branch_cost; > > unsigned short memory_cost; > > bool slow_unaligned_access; > > + bool overlap_op_by_pieces; > > }; > > > > /* Information about one micro-arch we know about. */ > > @@ -285,6 +286,7 @@ static const struct riscv_tune_param rocket_tune_info = > > { > > 3, /* branch_cost */ > > 5, /* memory_cost */ > > true, /* > > slow_unaligned_access */ > > + false, /* overlap_op_by_pieces */ > > }; > > > > /* Costs to use when optimizing for Sifive 7 Series. */ > > @@ -298,6 +300,7 @@ static const struct riscv_tune_param sifive_7_tune_info > > = { > > 4, /* branch_cost */ > > 3, /* memory_cost */ > > true, /* > > slow_unaligned_access */ > > + false, /* overlap_op_by_pieces */ > > }; > > > > /* Costs to use when optimizing for T-HEAD c906. */ > > @@ -311,6 +314,7 @@ static const struct riscv_tune_param > > thead_c906_tune_info = { > > 3, /* branch_cost */ > > 5, /* memory_cost */ > > false, /* slow_unaligned_access */ > > + false, /* overlap_op_by_pieces */ > > }; > > > > /* Costs to use when optimizing for size. */ > > @@ -324,6 +328,7 @@ static const struct riscv_tune_param > > optimize_size_tune_info = { > > 1, /* branch_cost */ > > 2, /* memory_cost */ > > false, /* slow_unaligned_access */ > > + false, /* overlap_op_by_pieces */ > > }; > > > > static tree riscv_handle_fndecl_attribute (tree *, tree, tree, int, bool > > *); > > @@ -5201,6 +5206,12 @@ riscv_slow_unaligned_access (machine_mode, unsigned > > int) > > return riscv_slow_unaligned_access_p; > > } > > > > +static bool > > +riscv_overlap_op_by_pieces (void) > > +{ > > + return tune_param->overlap_op_by_pieces; > > +} > > + > > /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */ > > > > static bool > > @@ -5525,6 +5536,9 @@ riscv_asan_shadow_offset (void) > > #undef TARGET_SLOW_UNALIGNED_ACCESS > > #define TARGET_SLOW_UNALIGNED_ACCESS riscv_slow_unaligned_access > > > > +#undef TARGET_OVERLAP_OP_BY_PIECES_P > > +#define TARGET_OVERLAP_OP_BY_PIECES_P riscv_overlap_op_by_pieces > > + > > #undef TARGET_SECONDARY_MEMORY_NEEDED > > #define TARGET_SECONDARY_MEMORY_NEEDED riscv_secondary_memory_needed > > > > -- > > 2.31.1 > >