The RISC-V cpymemsi expansion is called, whenever the by-pieces infrastructure will not be taking care of the builtin expansion. Currently, that's the case for e.g. memcpy() with n <= 24 bytes. The code emitted by the by-pieces infrastructure emits code, that performs unaligned accesses if the target's riscv_slow_unaligned_access_p is false (and n is not 1).
If n > 24, then the RISC-V cpymemsi expansion is called, which is implemented in riscv_expand_block_move(). The current implementation does not check riscv_slow_unaligned_access_p and never emits unaligned accesses. Since by-pieces emits unaligned accesses, it is reasonable to implement the same behaviour in the cpymemsi expansion. And that's what this patch is doing. The patch checks riscv_slow_unaligned_access_p at the entry and sets the allowed alignment accordingly. This alignment is then propagated down to the routines that emit the actual instructions. Without the patch a memcpy() with n==25 will be exanded only if the given pointers are aligned. With the patch also unaligned pointers are accepted if riscv_slow_unaligned_access_p is false. gcc/ChangeLog: * config/riscv/riscv.c (riscv_block_move_straight): Add parameter align. (riscv_adjust_block_mem): Replace parameter length by parameter align. (riscv_block_move_loop): Add parameter align. (riscv_expand_block_move): Set alignment properly if the target has fast unaligned access. gcc/testsuite/ChangeLog: * gcc.target/riscv/builtins-strict-align.c: New test. * gcc.target/riscv/builtins-unaligned-1.c: New test. * gcc.target/riscv/builtins-unaligned-2.c: New test. * gcc.target/riscv/builtins-unaligned-3.c: New test. * gcc.target/riscv/builtins-unaligned-4.c: New test. * gcc.target/riscv/builtins.h: New test. Signed-off-by: Christoph Muellner <cmuell...@gcc.gnu.org> --- gcc/config/riscv/riscv.c | 53 +++++++++++-------- .../gcc.target/riscv/builtins-strict-align.c | 13 +++++ .../gcc.target/riscv/builtins-unaligned-1.c | 15 ++++++ .../gcc.target/riscv/builtins-unaligned-2.c | 15 ++++++ .../gcc.target/riscv/builtins-unaligned-3.c | 15 ++++++ .../gcc.target/riscv/builtins-unaligned-4.c | 15 ++++++ gcc/testsuite/gcc.target/riscv/builtins.h | 10 ++++ 7 files changed, 115 insertions(+), 21 deletions(-) create mode 100644 gcc/testsuite/gcc.target/riscv/builtins-strict-align.c create mode 100644 gcc/testsuite/gcc.target/riscv/builtins-unaligned-1.c create mode 100644 gcc/testsuite/gcc.target/riscv/builtins-unaligned-2.c create mode 100644 gcc/testsuite/gcc.target/riscv/builtins-unaligned-3.c create mode 100644 gcc/testsuite/gcc.target/riscv/builtins-unaligned-4.c create mode 100644 gcc/testsuite/gcc.target/riscv/builtins.h diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c index 576960bb37c..0596a9ff1b6 100644 --- a/gcc/config/riscv/riscv.c +++ b/gcc/config/riscv/riscv.c @@ -3173,11 +3173,13 @@ riscv_legitimize_call_address (rtx addr) return addr; } -/* Emit straight-line code to move LENGTH bytes from SRC to DEST. +/* Emit straight-line code to move LENGTH bytes from SRC to DEST + with accesses that are ALIGN bytes aligned. Assume that the areas do not overlap. */ static void -riscv_block_move_straight (rtx dest, rtx src, unsigned HOST_WIDE_INT length) +riscv_block_move_straight (rtx dest, rtx src, unsigned HOST_WIDE_INT length, + unsigned HOST_WIDE_INT align) { unsigned HOST_WIDE_INT offset, delta; unsigned HOST_WIDE_INT bits; @@ -3185,8 +3187,7 @@ riscv_block_move_straight (rtx dest, rtx src, unsigned HOST_WIDE_INT length) enum machine_mode mode; rtx *regs; - bits = MAX (BITS_PER_UNIT, - MIN (BITS_PER_WORD, MIN (MEM_ALIGN (src), MEM_ALIGN (dest)))); + bits = MAX (BITS_PER_UNIT, MIN (BITS_PER_WORD, align)); mode = mode_for_size (bits, MODE_INT, 0).require (); delta = bits / BITS_PER_UNIT; @@ -3211,21 +3212,20 @@ riscv_block_move_straight (rtx dest, rtx src, unsigned HOST_WIDE_INT length) { src = adjust_address (src, BLKmode, offset); dest = adjust_address (dest, BLKmode, offset); - move_by_pieces (dest, src, length - offset, - MIN (MEM_ALIGN (src), MEM_ALIGN (dest)), RETURN_BEGIN); + move_by_pieces (dest, src, length - offset, align, RETURN_BEGIN); } } /* Helper function for doing a loop-based block operation on memory - reference MEM. Each iteration of the loop will operate on LENGTH - bytes of MEM. + reference MEM. Create a new base register for use within the loop and point it to the start of MEM. Create a new memory reference that uses this - register. Store them in *LOOP_REG and *LOOP_MEM respectively. */ + register and has an alignment of ALIGN. Store them in *LOOP_REG + and *LOOP_MEM respectively. */ static void -riscv_adjust_block_mem (rtx mem, unsigned HOST_WIDE_INT length, +riscv_adjust_block_mem (rtx mem, unsigned HOST_WIDE_INT align, rtx *loop_reg, rtx *loop_mem) { *loop_reg = copy_addr_to_reg (XEXP (mem, 0)); @@ -3233,15 +3233,17 @@ riscv_adjust_block_mem (rtx mem, unsigned HOST_WIDE_INT length, /* Although the new mem does not refer to a known location, it does keep up to LENGTH bytes of alignment. */ *loop_mem = change_address (mem, BLKmode, *loop_reg); - set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT)); + set_mem_align (*loop_mem, align); } /* Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER - bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that - the memory regions do not overlap. */ + bytes at a time. LENGTH must be at least BYTES_PER_ITER. The alignment + of the access can be set by ALIGN. Assume that the memory regions do not + overlap. */ static void riscv_block_move_loop (rtx dest, rtx src, unsigned HOST_WIDE_INT length, + unsigned HOST_WIDE_INT align, unsigned HOST_WIDE_INT bytes_per_iter) { rtx label, src_reg, dest_reg, final_src, test; @@ -3251,8 +3253,8 @@ riscv_block_move_loop (rtx dest, rtx src, unsigned HOST_WIDE_INT length, length -= leftover; /* Create registers and memory references for use within the loop. */ - riscv_adjust_block_mem (src, bytes_per_iter, &src_reg, &src); - riscv_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest); + riscv_adjust_block_mem (src, align, &src_reg, &src); + riscv_adjust_block_mem (dest, align, &dest_reg, &dest); /* Calculate the value that SRC_REG should have after the last iteration of the loop. */ @@ -3264,7 +3266,7 @@ riscv_block_move_loop (rtx dest, rtx src, unsigned HOST_WIDE_INT length, emit_label (label); /* Emit the loop body. */ - riscv_block_move_straight (dest, src, bytes_per_iter); + riscv_block_move_straight (dest, src, bytes_per_iter, align); /* Move on to the next block. */ riscv_emit_move (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter)); @@ -3276,7 +3278,7 @@ riscv_block_move_loop (rtx dest, rtx src, unsigned HOST_WIDE_INT length, /* Mop up any left-over bytes. */ if (leftover) - riscv_block_move_straight (dest, src, leftover); + riscv_block_move_straight (dest, src, leftover, align); else emit_insn(gen_nop ()); } @@ -3292,8 +3294,17 @@ riscv_expand_block_move (rtx dest, rtx src, rtx length) unsigned HOST_WIDE_INT hwi_length = UINTVAL (length); unsigned HOST_WIDE_INT factor, align; - align = MIN (MIN (MEM_ALIGN (src), MEM_ALIGN (dest)), BITS_PER_WORD); - factor = BITS_PER_WORD / align; + if (riscv_slow_unaligned_access_p) + { + align = MIN (MIN (MEM_ALIGN (src), MEM_ALIGN (dest)), BITS_PER_WORD); + factor = BITS_PER_WORD / align; + } + else + { + /* Assume data to be aligned. */ + align = hwi_length * BITS_PER_UNIT; + factor = 1; + } if (optimize_function_for_size_p (cfun) && hwi_length * factor * UNITS_PER_WORD > MOVE_RATIO (false)) @@ -3301,7 +3312,7 @@ riscv_expand_block_move (rtx dest, rtx src, rtx length) if (hwi_length <= (RISCV_MAX_MOVE_BYTES_STRAIGHT / factor)) { - riscv_block_move_straight (dest, src, INTVAL (length)); + riscv_block_move_straight (dest, src, hwi_length, align); return true; } else if (optimize && align >= BITS_PER_WORD) @@ -3321,7 +3332,7 @@ riscv_expand_block_move (rtx dest, rtx src, rtx length) iter_words = i; } - riscv_block_move_loop (dest, src, bytes, iter_words * UNITS_PER_WORD); + riscv_block_move_loop (dest, src, bytes, align, iter_words * UNITS_PER_WORD); return true; } } diff --git a/gcc/testsuite/gcc.target/riscv/builtins-strict-align.c b/gcc/testsuite/gcc.target/riscv/builtins-strict-align.c new file mode 100644 index 00000000000..6f7b1f324de --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/builtins-strict-align.c @@ -0,0 +1,13 @@ +/* { dg-options "-O2 -mtune=thead-c906 -march=rv64gc -mabi=lp64 -mstrict-align" } */ +/* { dg-do compile } */ + +#include "builtins.h" + +DO_MEMCPY_N(12) + +/* { dg-final { scan-assembler-times "lbu" 12 } } */ +/* { dg-final { scan-assembler-times "sb" 12 } } */ +/* { dg-final { scan-assembler-not "lw" } } */ +/* { dg-final { scan-assembler-not "sw" } } */ +/* { dg-final { scan-assembler-not "ld" } } */ +/* { dg-final { scan-assembler-not "sd" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/builtins-unaligned-1.c b/gcc/testsuite/gcc.target/riscv/builtins-unaligned-1.c new file mode 100644 index 00000000000..f97d60a35d4 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/builtins-unaligned-1.c @@ -0,0 +1,15 @@ +/* { dg-options "-O2 -mtune=thead-c906 -march=rv64gc -mabi=lp64" } */ +/* { dg-do compile } */ + +#include "builtins.h" + +DO_MEMCPY_N(7) + +/* { dg-final { scan-assembler-not "ld" } } */ +/* { dg-final { scan-assembler-not "sd" } } */ +/* { dg-final { scan-assembler-times "lw" 1 } } */ +/* { dg-final { scan-assembler-times "sw" 1 } } */ +/* { dg-final { scan-assembler-times "lh" 1 } } */ +/* { dg-final { scan-assembler-times "sh" 1 } } */ +/* { dg-final { scan-assembler-times "lbu" 1 } } */ +/* { dg-final { scan-assembler-times "sb" 1 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/builtins-unaligned-2.c b/gcc/testsuite/gcc.target/riscv/builtins-unaligned-2.c new file mode 100644 index 00000000000..b373651d241 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/builtins-unaligned-2.c @@ -0,0 +1,15 @@ +/* { dg-options "-O2 -mtune=thead-c906 -march=rv64gc -mabi=lp64" } */ +/* { dg-do compile } */ + +#include "builtins.h" + +DO_MEMCPY_N(8) + +/* { dg-final { scan-assembler-times "ld" 1 } } */ +/* { dg-final { scan-assembler-times "sd" 1 } } */ +/* { dg-final { scan-assembler-not "lw" } } */ +/* { dg-final { scan-assembler-not "sw" } } */ +/* { dg-final { scan-assembler-not "lh" } } */ +/* { dg-final { scan-assembler-not "sh" } } */ +/* { dg-final { scan-assembler-not "lbu" } } */ +/* { dg-final { scan-assembler-not "sb" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/builtins-unaligned-3.c b/gcc/testsuite/gcc.target/riscv/builtins-unaligned-3.c new file mode 100644 index 00000000000..3f4a6b9630b --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/builtins-unaligned-3.c @@ -0,0 +1,15 @@ +/* { dg-options "-O2 -mtune=thead-c906 -march=rv64gc -mabi=lp64" } */ +/* { dg-do compile } */ + +#include "builtins.h" + +DO_MEMCPY_N(31) + +/* { dg-final { scan-assembler-times "ld" 3 } } */ +/* { dg-final { scan-assembler-times "sd" 3 } } */ +/* { dg-final { scan-assembler-times "lw" 1 } } */ +/* { dg-final { scan-assembler-times "sw" 1 } } */ +/* { dg-final { scan-assembler-times "lh" 1 } } */ +/* { dg-final { scan-assembler-times "sh" 1 } } */ +/* { dg-final { scan-assembler-times "lbu" 1 } } */ +/* { dg-final { scan-assembler-times "sb" 1 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/builtins-unaligned-4.c b/gcc/testsuite/gcc.target/riscv/builtins-unaligned-4.c new file mode 100644 index 00000000000..26fcb7a71a7 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/builtins-unaligned-4.c @@ -0,0 +1,15 @@ +/* { dg-options "-O2 -mtune=thead-c906 -march=rv64gc -mabi=lp64" } */ +/* { dg-do compile } */ + +#include "builtins.h" + +DO_MEMCPY_N(63) + +/* { dg-final { scan-assembler-times "ld" 7 } } */ +/* { dg-final { scan-assembler-times "sd" 7 } } */ +/* { dg-final { scan-assembler-times "lw" 1 } } */ +/* { dg-final { scan-assembler-times "sw" 1 } } */ +/* { dg-final { scan-assembler-times "lh" 1 } } */ +/* { dg-final { scan-assembler-times "sh" 1 } } */ +/* { dg-final { scan-assembler-times "lbu" 1 } } */ +/* { dg-final { scan-assembler-times "sb" 1 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/builtins.h b/gcc/testsuite/gcc.target/riscv/builtins.h new file mode 100644 index 00000000000..5cad5fe194b --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/builtins.h @@ -0,0 +1,10 @@ +#ifndef BUILTINS_H +#define BUILTINS_H + +#define DO_MEMCPY_N(N) \ +void do_memcpy_##N (void *d, const void *s) \ +{ \ + __builtin_memcpy (d, s, N); \ +} + +#endif /* BUILTINS_H */ -- 2.31.1