Hi All, On AArch64 we have integer modes larger than TImode, and while we can generate moves for these they're not as efficient.
So instead make sure we limit the maximum we can copy to TImode. This means copying a 16 byte struct will issue 1 TImode copy, which will be done using a single STP as we expect but an CImode sized copy won't issue CImode operations. Bootstrapped and regtested on aarch4-none-linux-gnu and no issues. Crosstested aarch4_be-none-elf and no issues. Ok for trunk? Thanks, Tamar gcc/ 2018-08-13 Tamar Christina <tamar.christ...@arm.com> * config/aarch64/aarch64.c (aarch64_expand_movmem): Set TImode max. gcc/testsuite/ 2018-08-13 Tamar Christina <tamar.christ...@arm.com> * gcc.target/aarch64/large_struct_copy_2.c: Add assembler scan. --
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 1e8d8104c066a265120ab776f7ab5a959d3512b6..cdd8bca98f8c50a804986510144db9ecf911bf1e 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -15927,13 +15927,17 @@ aarch64_expand_movmem (rtx *operands) /* Convert n to bits to make the rest of the code simpler. */ n = n * BITS_PER_UNIT; + /* Maximum amount to copy in one go. The AArch64 back-end has integer modes + larger than TImode, but we should not use them for loads/stores here. */ + const int copy_limit = GET_MODE_BITSIZE (TImode); + while (n > 0) { /* Find the largest mode in which to do the copy in without over reading or writing. */ opt_scalar_int_mode mode_iter; FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_INT) - if (GET_MODE_BITSIZE (mode_iter.require ()) <= n) + if (GET_MODE_BITSIZE (mode_iter.require ()) <= MIN (n, copy_limit)) cur_mode = mode_iter.require (); gcc_assert (cur_mode != BLKmode); diff --git a/gcc/testsuite/gcc.target/aarch64/large_struct_copy_2.c b/gcc/testsuite/gcc.target/aarch64/large_struct_copy_2.c new file mode 100644 index 0000000000000000000000000000000000000000..8380ce008e7ffd30b6d21d89dc5ff3a9fd395e9a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/large_struct_copy_2.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +struct S0 { + signed f1; + long f2; + unsigned f3; + unsigned f4; + unsigned f5; +} a; +struct S2 { + long f0; + int f2; + struct S0 f3; +}; + +void fn1 () { + struct S2 b = {0, 1, 7, 4073709551611, 4, 8, 7}; + a = b.f3; +} + +/* { dg-final { scan-assembler-times {ldp\s+x[0-9]+} 2 } } */ +/* { dg-final { scan-assembler-times {stp\s+x[0-9]+} 2 } } */ +/* { dg-final { scan-assembler-not {ld[1-3]} } } */