Th 11/14/2017 17:48, James Greenhalgh wrote: > On Tue, Nov 14, 2017 at 04:05:12PM +0000, Tamar Christina wrote: > > Hi James, > > > > I have split off the aarch64 bit off from the generic parts and processed > > your feedback. > > > > Attached is the reworked patch. > > > > Ok for Tunk? > > Thanks for the respin, I'm a bit confused by this comment.
Sorry, the comment was a bit nonsensical. I update the last part but didn't re-read the comment as a whole. I've respun the patch again. Thanks, Tamar > > > diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c > > index > > e89c8156976cecf200cd67c1e938c8156c1240c4..bc197cc973a2bbc2c263809c7e8ccfbc86309c26 > > 100644 > > --- a/gcc/config/aarch64/aarch64.c > > +++ b/gcc/config/aarch64/aarch64.c > > @@ -13974,6 +13974,28 @@ aarch64_expand_movmem (rtx *operands) > > base = copy_to_mode_reg (Pmode, XEXP (src, 0)); > > src = adjust_automodify_address (src, VOIDmode, base, 0); > > > > + /* Optimize routines for MEM to REG copies. > > + This particular function only handles copying to two > > + destination types: 1) a regular register and 2) the stack. > > + When writing to a regular register we may end up writting too much in > > cases > > + where the register already contains a live value or when no data > > padding is > > + happening so we disallow regular registers to use this new code path. > > */ > > I'm struggling to understand when you'd end up with a struct held in > a partial register going through this code path. Maybe the restriction > makes sense, can you write a testcase, or show some sample code where > this goes wrong (or simplify the comment). > > Thanks, > James > --
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index e89c8156976cecf200cd67c1e938c8156c1240c4..2597ec70ff2c49c8276622d3f9d6fe394a1f80c1 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -13974,6 +13974,26 @@ aarch64_expand_movmem (rtx *operands) base = copy_to_mode_reg (Pmode, XEXP (src, 0)); src = adjust_automodify_address (src, VOIDmode, base, 0); + /* Optimize routines for MEM to REG copies. + This particular function only handles copying to two + destination types: 1) a regular register and 2) the stack and only when + the amount to copy fits in less than 8 bytes. */ + if (n < 8 && !REG_P (XEXP (operands[0], 0))) + { + machine_mode dest_mode + = smallest_mode_for_size (n * BITS_PER_UNIT, MODE_INT); + rtx result = gen_reg_rtx (dest_mode); + emit_insn (gen_move_insn (result, GEN_INT (0))); + + src = adjust_address (src, dest_mode, 0); + emit_insn (gen_move_insn (result, src)); + src = aarch64_progress_pointer (src); + + dst = adjust_address (dst, dest_mode, 0); + emit_insn (gen_move_insn (dst, result)); + return true; + } + /* Simple cases. Copy 0-3 bytes, as (if applicable) a 2-byte, then a 1-byte chunk. */ if (n < 4) diff --git a/gcc/testsuite/gcc.target/aarch64/structs.c b/gcc/testsuite/gcc.target/aarch64/structs.c new file mode 100644 index 0000000000000000000000000000000000000000..2be8fae9479500cdbd5f22c6ab4f6adcdd7fcbce --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/structs.c @@ -0,0 +1,226 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-O2" } */ +/* Copyright 1996, 1999, 2007 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. + + Please email any bugs, comments, and/or additions to this file to: + bug-...@prep.ai.mit.edu */ + +struct struct1 { char a;}; +struct struct2 { char a, b;}; +struct struct3 { char a, b, c; }; +struct struct4 { char a, b, c, d; }; +struct struct5 { char a, b, c, d, e; }; +struct struct6 { char a, b, c, d, e, f; }; +struct struct7 { char a, b, c, d, e, f, g; }; +struct struct8 { char a, b, c, d, e, f, g, h; }; +struct struct9 { char a, b, c, d, e, f, g, h, i; }; +struct struct10 { char a, b, c, d, e, f, g, h, i, j; }; +struct struct11 { char a, b, c, d, e, f, g, h, i, j, k; }; +struct struct12 { char a, b, c, d, e, f, g, h, i, j, k, l; }; +struct struct16 { char a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p; }; + +struct struct1 foo1 = {'1'}, L1; +struct struct2 foo2 = { 'a', 'b'}, L2; +struct struct3 foo3 = { 'A', 'B', 'C'}, L3; +struct struct4 foo4 = {'1', '2', '3', '4'}, L4; +struct struct5 foo5 = {'a', 'b', 'c', 'd', 'e'}, L5; +struct struct6 foo6 = {'A', 'B', 'C', 'D', 'E', 'F'}, L6; +struct struct7 foo7 = {'1', '2', '3', '4', '5', '6', '7'}, L7; +struct struct8 foo8 = {'1', '2', '3', '4', '5', '6', '7', '8'}, L8; +struct struct9 foo9 = {'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i'}, L9; +struct struct10 foo10 = { + 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J'}, L10; +struct struct11 foo11 = { + '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B'}, L11; +struct struct12 foo12 = { + 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L'}, L12; +struct struct16 foo16 = { + 'a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p'}, L16; + +struct struct1 fun1() +{ + return foo1; +} +struct struct2 fun2() +{ + return foo2; +} +struct struct3 fun3() +{ + return foo3; +} +struct struct4 fun4() +{ + return foo4; +} +struct struct5 fun5() +{ + return foo5; +} +struct struct6 fun6() +{ + return foo6; +} +struct struct7 fun7() +{ + return foo7; +} +struct struct8 fun8() +{ + return foo8; +} +struct struct9 fun9() +{ + return foo9; +} +struct struct10 fun10() +{ + return foo10; +} +struct struct11 fun11() +{ + return foo11; +} +struct struct12 fun12() +{ + return foo12; +} +struct struct16 fun16() +{ + return foo16; +} + +#ifdef PROTOTYPES +void Fun1(struct struct1 foo1) +#else +void Fun1(foo1) + struct struct1 foo1; +#endif +{ + L1 = foo1; +} +#ifdef PROTOTYPES +void Fun2(struct struct2 foo2) +#else +void Fun2(foo2) + struct struct2 foo2; +#endif +{ + L2 = foo2; +} +#ifdef PROTOTYPES +void Fun3(struct struct3 foo3) +#else +void Fun3(foo3) + struct struct3 foo3; +#endif +{ + L3 = foo3; +} +#ifdef PROTOTYPES +void Fun4(struct struct4 foo4) +#else +void Fun4(foo4) + struct struct4 foo4; +#endif +{ + L4 = foo4; +} +#ifdef PROTOTYPES +void Fun5(struct struct5 foo5) +#else +void Fun5(foo5) + struct struct5 foo5; +#endif +{ + L5 = foo5; +} +#ifdef PROTOTYPES +void Fun6(struct struct6 foo6) +#else +void Fun6(foo6) + struct struct6 foo6; +#endif +{ + L6 = foo6; +} +#ifdef PROTOTYPES +void Fun7(struct struct7 foo7) +#else +void Fun7(foo7) + struct struct7 foo7; +#endif +{ + L7 = foo7; +} +#ifdef PROTOTYPES +void Fun8(struct struct8 foo8) +#else +void Fun8(foo8) + struct struct8 foo8; +#endif +{ + L8 = foo8; +} +#ifdef PROTOTYPES +void Fun9(struct struct9 foo9) +#else +void Fun9(foo9) + struct struct9 foo9; +#endif +{ + L9 = foo9; +} +#ifdef PROTOTYPES +void Fun10(struct struct10 foo10) +#else +void Fun10(foo10) + struct struct10 foo10; +#endif +{ + L10 = foo10; +} +#ifdef PROTOTYPES +void Fun11(struct struct11 foo11) +#else +void Fun11(foo11) + struct struct11 foo11; +#endif +{ + L11 = foo11; +} +#ifdef PROTOTYPES +void Fun12(struct struct12 foo12) +#else +void Fun12(foo12) + struct struct12 foo12; +#endif +{ + L12 = foo12; +} +#ifdef PROTOTYPES +void Fun16(struct struct16 foo16) +#else +void Fun16(foo16) + struct struct16 foo16; +#endif +{ + L16 = foo16; +} + +/* { dg-final { scan-assembler-not {bfi} } } */ +/* { dg-final { scan-assembler-times {bfx} 5 } } */