Ping.
On Thu, Aug 5, 2021 at 11:11 AM Christoph Müllner <cmuell...@gcc.gnu.org> wrote: > > Ping. > > On Thu, Jul 29, 2021 at 4:33 PM Christoph Muellner > <cmuell...@gcc.gnu.org> wrote: > > > > The RISC-V cpymemsi expansion is called, whenever the by-pieces > > infrastructure will not be taking care of the builtin expansion. > > Currently, that's the case for e.g. memcpy() with n <= 24 bytes. > > The code emitted by the by-pieces infrastructure emits code, that > > performs unaligned accesses if the target's > > riscv_slow_unaligned_access_p is false (and n is not 1). > > > > If n > 24, then the RISC-V cpymemsi expansion is called, which is > > implemented in riscv_expand_block_move(). The current implementation > > does not check riscv_slow_unaligned_access_p and never emits unaligned > > accesses. > > > > Since by-pieces emits unaligned accesses, it is reasonable to implement > > the same behaviour in the cpymemsi expansion. And that's what this patch > > is doing. > > > > The patch checks riscv_slow_unaligned_access_p at the entry and sets > > the allowed alignment accordingly. This alignment is then propagated > > down to the routines that emit the actual instructions. > > > > Without the patch a memcpy() with n==25 will be exanded only > > if the given pointers are aligned. With the patch also unaligned > > pointers are accepted if riscv_slow_unaligned_access_p is false. > > > > gcc/ChangeLog: > > > > * config/riscv/riscv.c (riscv_block_move_straight): Add > > parameter align. > > (riscv_adjust_block_mem): Replace parameter length by parameter > > align. > > (riscv_block_move_loop): Add parameter align. > > (riscv_expand_block_move): Set alignment properly if the target > > has fast unaligned access. > > > > gcc/testsuite/ChangeLog: > > > > * gcc.target/riscv/builtins-strict-align.c: New test. > > * gcc.target/riscv/builtins-unaligned-1.c: New test. > > * gcc.target/riscv/builtins-unaligned-2.c: New test. > > * gcc.target/riscv/builtins-unaligned-3.c: New test. > > * gcc.target/riscv/builtins-unaligned-4.c: New test. > > * gcc.target/riscv/builtins.h: New test. > > > > Signed-off-by: Christoph Muellner <cmuell...@gcc.gnu.org> > > --- > > gcc/config/riscv/riscv.c | 53 +++++++++++-------- > > .../gcc.target/riscv/builtins-strict-align.c | 13 +++++ > > .../gcc.target/riscv/builtins-unaligned-1.c | 15 ++++++ > > .../gcc.target/riscv/builtins-unaligned-2.c | 15 ++++++ > > .../gcc.target/riscv/builtins-unaligned-3.c | 15 ++++++ > > .../gcc.target/riscv/builtins-unaligned-4.c | 15 ++++++ > > gcc/testsuite/gcc.target/riscv/builtins.h | 10 ++++ > > 7 files changed, 115 insertions(+), 21 deletions(-) > > create mode 100644 gcc/testsuite/gcc.target/riscv/builtins-strict-align.c > > create mode 100644 gcc/testsuite/gcc.target/riscv/builtins-unaligned-1.c > > create mode 100644 gcc/testsuite/gcc.target/riscv/builtins-unaligned-2.c > > create mode 100644 gcc/testsuite/gcc.target/riscv/builtins-unaligned-3.c > > create mode 100644 gcc/testsuite/gcc.target/riscv/builtins-unaligned-4.c > > create mode 100644 gcc/testsuite/gcc.target/riscv/builtins.h > > > > diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c > > index 576960bb37c..0596a9ff1b6 100644 > > --- a/gcc/config/riscv/riscv.c > > +++ b/gcc/config/riscv/riscv.c > > @@ -3173,11 +3173,13 @@ riscv_legitimize_call_address (rtx addr) > > return addr; > > } > > > > -/* Emit straight-line code to move LENGTH bytes from SRC to DEST. > > +/* Emit straight-line code to move LENGTH bytes from SRC to DEST > > + with accesses that are ALIGN bytes aligned. > > Assume that the areas do not overlap. */ > > > > static void > > -riscv_block_move_straight (rtx dest, rtx src, unsigned HOST_WIDE_INT > > length) > > +riscv_block_move_straight (rtx dest, rtx src, unsigned HOST_WIDE_INT > > length, > > + unsigned HOST_WIDE_INT align) > > { > > unsigned HOST_WIDE_INT offset, delta; > > unsigned HOST_WIDE_INT bits; > > @@ -3185,8 +3187,7 @@ riscv_block_move_straight (rtx dest, rtx src, > > unsigned HOST_WIDE_INT length) > > enum machine_mode mode; > > rtx *regs; > > > > - bits = MAX (BITS_PER_UNIT, > > - MIN (BITS_PER_WORD, MIN (MEM_ALIGN (src), MEM_ALIGN (dest)))); > > + bits = MAX (BITS_PER_UNIT, MIN (BITS_PER_WORD, align)); > > > > mode = mode_for_size (bits, MODE_INT, 0).require (); > > delta = bits / BITS_PER_UNIT; > > @@ -3211,21 +3212,20 @@ riscv_block_move_straight (rtx dest, rtx src, > > unsigned HOST_WIDE_INT length) > > { > > src = adjust_address (src, BLKmode, offset); > > dest = adjust_address (dest, BLKmode, offset); > > - move_by_pieces (dest, src, length - offset, > > - MIN (MEM_ALIGN (src), MEM_ALIGN (dest)), > > RETURN_BEGIN); > > + move_by_pieces (dest, src, length - offset, align, RETURN_BEGIN); > > } > > } > > > > /* Helper function for doing a loop-based block operation on memory > > - reference MEM. Each iteration of the loop will operate on LENGTH > > - bytes of MEM. > > + reference MEM. > > > > Create a new base register for use within the loop and point it to > > the start of MEM. Create a new memory reference that uses this > > - register. Store them in *LOOP_REG and *LOOP_MEM respectively. */ > > + register and has an alignment of ALIGN. Store them in *LOOP_REG > > + and *LOOP_MEM respectively. */ > > > > static void > > -riscv_adjust_block_mem (rtx mem, unsigned HOST_WIDE_INT length, > > +riscv_adjust_block_mem (rtx mem, unsigned HOST_WIDE_INT align, > > rtx *loop_reg, rtx *loop_mem) > > { > > *loop_reg = copy_addr_to_reg (XEXP (mem, 0)); > > @@ -3233,15 +3233,17 @@ riscv_adjust_block_mem (rtx mem, unsigned > > HOST_WIDE_INT length, > > /* Although the new mem does not refer to a known location, > > it does keep up to LENGTH bytes of alignment. */ > > *loop_mem = change_address (mem, BLKmode, *loop_reg); > > - set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT)); > > + set_mem_align (*loop_mem, align); > > } > > > > /* Move LENGTH bytes from SRC to DEST using a loop that moves > > BYTES_PER_ITER > > - bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that > > - the memory regions do not overlap. */ > > + bytes at a time. LENGTH must be at least BYTES_PER_ITER. The alignment > > + of the access can be set by ALIGN. Assume that the memory regions do > > not > > + overlap. */ > > > > static void > > riscv_block_move_loop (rtx dest, rtx src, unsigned HOST_WIDE_INT length, > > + unsigned HOST_WIDE_INT align, > > unsigned HOST_WIDE_INT bytes_per_iter) > > { > > rtx label, src_reg, dest_reg, final_src, test; > > @@ -3251,8 +3253,8 @@ riscv_block_move_loop (rtx dest, rtx src, unsigned > > HOST_WIDE_INT length, > > length -= leftover; > > > > /* Create registers and memory references for use within the loop. */ > > - riscv_adjust_block_mem (src, bytes_per_iter, &src_reg, &src); > > - riscv_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest); > > + riscv_adjust_block_mem (src, align, &src_reg, &src); > > + riscv_adjust_block_mem (dest, align, &dest_reg, &dest); > > > > /* Calculate the value that SRC_REG should have after the last iteration > > of the loop. */ > > @@ -3264,7 +3266,7 @@ riscv_block_move_loop (rtx dest, rtx src, unsigned > > HOST_WIDE_INT length, > > emit_label (label); > > > > /* Emit the loop body. */ > > - riscv_block_move_straight (dest, src, bytes_per_iter); > > + riscv_block_move_straight (dest, src, bytes_per_iter, align); > > > > /* Move on to the next block. */ > > riscv_emit_move (src_reg, plus_constant (Pmode, src_reg, > > bytes_per_iter)); > > @@ -3276,7 +3278,7 @@ riscv_block_move_loop (rtx dest, rtx src, unsigned > > HOST_WIDE_INT length, > > > > /* Mop up any left-over bytes. */ > > if (leftover) > > - riscv_block_move_straight (dest, src, leftover); > > + riscv_block_move_straight (dest, src, leftover, align); > > else > > emit_insn(gen_nop ()); > > } > > @@ -3292,8 +3294,17 @@ riscv_expand_block_move (rtx dest, rtx src, rtx > > length) > > unsigned HOST_WIDE_INT hwi_length = UINTVAL (length); > > unsigned HOST_WIDE_INT factor, align; > > > > - align = MIN (MIN (MEM_ALIGN (src), MEM_ALIGN (dest)), BITS_PER_WORD); > > - factor = BITS_PER_WORD / align; > > + if (riscv_slow_unaligned_access_p) > > + { > > + align = MIN (MIN (MEM_ALIGN (src), MEM_ALIGN (dest)), > > BITS_PER_WORD); > > + factor = BITS_PER_WORD / align; > > + } > > + else > > + { > > + /* Assume data to be aligned. */ > > + align = hwi_length * BITS_PER_UNIT; > > + factor = 1; > > + } > > > > if (optimize_function_for_size_p (cfun) > > && hwi_length * factor * UNITS_PER_WORD > MOVE_RATIO (false)) > > @@ -3301,7 +3312,7 @@ riscv_expand_block_move (rtx dest, rtx src, rtx > > length) > > > > if (hwi_length <= (RISCV_MAX_MOVE_BYTES_STRAIGHT / factor)) > > { > > - riscv_block_move_straight (dest, src, INTVAL (length)); > > + riscv_block_move_straight (dest, src, hwi_length, align); > > return true; > > } > > else if (optimize && align >= BITS_PER_WORD) > > @@ -3321,7 +3332,7 @@ riscv_expand_block_move (rtx dest, rtx src, rtx > > length) > > iter_words = i; > > } > > > > - riscv_block_move_loop (dest, src, bytes, iter_words * > > UNITS_PER_WORD); > > + riscv_block_move_loop (dest, src, bytes, align, iter_words * > > UNITS_PER_WORD); > > return true; > > } > > } > > diff --git a/gcc/testsuite/gcc.target/riscv/builtins-strict-align.c > > b/gcc/testsuite/gcc.target/riscv/builtins-strict-align.c > > new file mode 100644 > > index 00000000000..6f7b1f324de > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/riscv/builtins-strict-align.c > > @@ -0,0 +1,13 @@ > > +/* { dg-options "-O2 -mtune=thead-c906 -march=rv64gc -mabi=lp64 > > -mstrict-align" } */ > > +/* { dg-do compile } */ > > + > > +#include "builtins.h" > > + > > +DO_MEMCPY_N(12) > > + > > +/* { dg-final { scan-assembler-times "lbu" 12 } } */ > > +/* { dg-final { scan-assembler-times "sb" 12 } } */ > > +/* { dg-final { scan-assembler-not "lw" } } */ > > +/* { dg-final { scan-assembler-not "sw" } } */ > > +/* { dg-final { scan-assembler-not "ld" } } */ > > +/* { dg-final { scan-assembler-not "sd" } } */ > > diff --git a/gcc/testsuite/gcc.target/riscv/builtins-unaligned-1.c > > b/gcc/testsuite/gcc.target/riscv/builtins-unaligned-1.c > > new file mode 100644 > > index 00000000000..f97d60a35d4 > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/riscv/builtins-unaligned-1.c > > @@ -0,0 +1,15 @@ > > +/* { dg-options "-O2 -mtune=thead-c906 -march=rv64gc -mabi=lp64" } */ > > +/* { dg-do compile } */ > > + > > +#include "builtins.h" > > + > > +DO_MEMCPY_N(7) > > + > > +/* { dg-final { scan-assembler-not "ld" } } */ > > +/* { dg-final { scan-assembler-not "sd" } } */ > > +/* { dg-final { scan-assembler-times "lw" 1 } } */ > > +/* { dg-final { scan-assembler-times "sw" 1 } } */ > > +/* { dg-final { scan-assembler-times "lh" 1 } } */ > > +/* { dg-final { scan-assembler-times "sh" 1 } } */ > > +/* { dg-final { scan-assembler-times "lbu" 1 } } */ > > +/* { dg-final { scan-assembler-times "sb" 1 } } */ > > diff --git a/gcc/testsuite/gcc.target/riscv/builtins-unaligned-2.c > > b/gcc/testsuite/gcc.target/riscv/builtins-unaligned-2.c > > new file mode 100644 > > index 00000000000..b373651d241 > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/riscv/builtins-unaligned-2.c > > @@ -0,0 +1,15 @@ > > +/* { dg-options "-O2 -mtune=thead-c906 -march=rv64gc -mabi=lp64" } */ > > +/* { dg-do compile } */ > > + > > +#include "builtins.h" > > + > > +DO_MEMCPY_N(8) > > + > > +/* { dg-final { scan-assembler-times "ld" 1 } } */ > > +/* { dg-final { scan-assembler-times "sd" 1 } } */ > > +/* { dg-final { scan-assembler-not "lw" } } */ > > +/* { dg-final { scan-assembler-not "sw" } } */ > > +/* { dg-final { scan-assembler-not "lh" } } */ > > +/* { dg-final { scan-assembler-not "sh" } } */ > > +/* { dg-final { scan-assembler-not "lbu" } } */ > > +/* { dg-final { scan-assembler-not "sb" } } */ > > diff --git a/gcc/testsuite/gcc.target/riscv/builtins-unaligned-3.c > > b/gcc/testsuite/gcc.target/riscv/builtins-unaligned-3.c > > new file mode 100644 > > index 00000000000..3f4a6b9630b > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/riscv/builtins-unaligned-3.c > > @@ -0,0 +1,15 @@ > > +/* { dg-options "-O2 -mtune=thead-c906 -march=rv64gc -mabi=lp64" } */ > > +/* { dg-do compile } */ > > + > > +#include "builtins.h" > > + > > +DO_MEMCPY_N(31) > > + > > +/* { dg-final { scan-assembler-times "ld" 3 } } */ > > +/* { dg-final { scan-assembler-times "sd" 3 } } */ > > +/* { dg-final { scan-assembler-times "lw" 1 } } */ > > +/* { dg-final { scan-assembler-times "sw" 1 } } */ > > +/* { dg-final { scan-assembler-times "lh" 1 } } */ > > +/* { dg-final { scan-assembler-times "sh" 1 } } */ > > +/* { dg-final { scan-assembler-times "lbu" 1 } } */ > > +/* { dg-final { scan-assembler-times "sb" 1 } } */ > > diff --git a/gcc/testsuite/gcc.target/riscv/builtins-unaligned-4.c > > b/gcc/testsuite/gcc.target/riscv/builtins-unaligned-4.c > > new file mode 100644 > > index 00000000000..26fcb7a71a7 > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/riscv/builtins-unaligned-4.c > > @@ -0,0 +1,15 @@ > > +/* { dg-options "-O2 -mtune=thead-c906 -march=rv64gc -mabi=lp64" } */ > > +/* { dg-do compile } */ > > + > > +#include "builtins.h" > > + > > +DO_MEMCPY_N(63) > > + > > +/* { dg-final { scan-assembler-times "ld" 7 } } */ > > +/* { dg-final { scan-assembler-times "sd" 7 } } */ > > +/* { dg-final { scan-assembler-times "lw" 1 } } */ > > +/* { dg-final { scan-assembler-times "sw" 1 } } */ > > +/* { dg-final { scan-assembler-times "lh" 1 } } */ > > +/* { dg-final { scan-assembler-times "sh" 1 } } */ > > +/* { dg-final { scan-assembler-times "lbu" 1 } } */ > > +/* { dg-final { scan-assembler-times "sb" 1 } } */ > > diff --git a/gcc/testsuite/gcc.target/riscv/builtins.h > > b/gcc/testsuite/gcc.target/riscv/builtins.h > > new file mode 100644 > > index 00000000000..5cad5fe194b > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/riscv/builtins.h > > @@ -0,0 +1,10 @@ > > +#ifndef BUILTINS_H > > +#define BUILTINS_H > > + > > +#define DO_MEMCPY_N(N) \ > > +void do_memcpy_##N (void *d, const void *s) \ > > +{ \ > > + __builtin_memcpy (d, s, N); \ > > +} > > + > > +#endif /* BUILTINS_H */ > > -- > > 2.31.1 > >