Hi All, This patch allows larger bitsizes to be used as copy size when the target does not have SLOW_UNALIGNED_ACCESS.
fun3: adrp x2, .LANCHOR0 add x2, x2, :lo12:.LANCHOR0 mov x0, 0 sub sp, sp, #16 ldrh w1, [x2, 16] ldrb w2, [x2, 18] add sp, sp, 16 bfi x0, x1, 0, 8 ubfx x1, x1, 8, 8 bfi x0, x1, 8, 8 bfi x0, x2, 16, 8 ret is turned into fun3: adrp x0, .LANCHOR0 add x0, x0, :lo12:.LANCHOR0 sub sp, sp, #16 ldrh w1, [x0, 16] ldrb w0, [x0, 18] strh w1, [sp, 8] strb w0, [sp, 10] ldr w0, [sp, 8] add sp, sp, 16 ret which avoids the bfi's for a simple 3 byte struct copy. Regression tested on aarch64-none-linux-gnu and x86_64-pc-linux-gnu and no regressions. This patch is just splitting off from the previous combined patch with AArch64 and adding a testcase. I assume Jeff's ACK from https://gcc.gnu.org/ml/gcc-patches/2017-08/msg01523.html is still valid as the code did not change. Thanks, Tamar gcc/ 2017-11-14 Tamar Christina <tamar.christ...@arm.com> * expr.c (copy_blkmode_to_reg): Fix bitsize for targets with fast unaligned access. * doc/sourcebuild.texi (no_slow_unalign): New. gcc/testsuite/ 2017-11-14 Tamar Christina <tamar.christ...@arm.com> * gcc.dg/struct-simple.c: New. * lib/target-supports.exp (check_effective_target_no_slow_unalign): New. --
diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi index 1646d0a99911aa7b2e66762e5907fbb0454ed00d..1df7a82fbd516b9bf07908bb800e441110b28ca4 100644 --- a/gcc/doc/sourcebuild.texi +++ b/gcc/doc/sourcebuild.texi @@ -2178,8 +2178,12 @@ Target supports @code{wchar_t} that is compatible with @code{char32_t}. @item comdat_group Target uses comdat groups. + +@item no_slow_unalign +Target does not have slow unaligned access. @end table + @subsubsection Local to tests in @code{gcc.target/i386} @table @code diff --git a/gcc/expr.c b/gcc/expr.c index 2f8432d92ccac17c0a548faf4a16eff0656cef1b..afcea8fef58155d0a899991c10cd485ba8af888d 100644 --- a/gcc/expr.c +++ b/gcc/expr.c @@ -2769,7 +2769,9 @@ copy_blkmode_to_reg (machine_mode mode, tree src) n_regs = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD; dst_words = XALLOCAVEC (rtx, n_regs); - bitsize = MIN (TYPE_ALIGN (TREE_TYPE (src)), BITS_PER_WORD); + bitsize = BITS_PER_WORD; + if (targetm.slow_unaligned_access (word_mode, TYPE_ALIGN (TREE_TYPE (src)))) + bitsize = MIN (TYPE_ALIGN (TREE_TYPE (src)), BITS_PER_WORD); /* Copy the structure BITSIZE bits at a time. */ for (bitpos = 0, xbitpos = padding_correction; diff --git a/gcc/testsuite/gcc.dg/struct-simple.c b/gcc/testsuite/gcc.dg/struct-simple.c new file mode 100644 index 0000000000000000000000000000000000000000..9f218851d897421b217b0926d29845b6192982fb --- /dev/null +++ b/gcc/testsuite/gcc.dg/struct-simple.c @@ -0,0 +1,52 @@ +/* { dg-do-run } */ +/* { dg-require-effective-target no_slow_unalign } */ +/* { dg-additional-options "-fdump-rtl-final" } */ + +/* Copyright 1996, 1999, 2007 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. + + Please email any bugs, comments, and/or additions to this file to: + bug-...@prep.ai.mit.edu */ + +#include <stdio.h> + +struct struct3 { char a, b, c; }; +struct struct3 foo3 = { 'A', 'B', 'C'}, L3; + +struct struct3 fun3() +{ + return foo3; +} + +#ifdef PROTOTYPES +void Fun3(struct struct3 foo3) +#else +void Fun3(foo3) + struct struct3 foo3; +#endif +{ + L3 = foo3; +} + +int main() +{ + struct struct3 x = fun3(); + + printf("a:%c, b:%c, c:%c\n", x.a, x.b, x.c); +} + +/* { dg-final { scan-rtl-dump-not {zero_extract:.+\[\s*foo3\s*\]} "final" } } */ + diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index b6f9e51c4817cf8235c8e33b14e2763308eb482a..690d8960002a3c38462bbe5524b419c205ba4da9 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -6037,6 +6037,30 @@ proc check_effective_target_unaligned_stack { } { return $et_unaligned_stack_saved } +# Return 1 if the target plus current options does not have +# slow unaligned access. +# +# This won't change for different subtargets so cache the result. + +proc check_effective_target_no_slow_unalign { } { + global et_no_slow_unalign_saved + global et_index + + if [info exists et_no_slow_unalign_saved($et_index)] { + verbose "check_effective_target_no_slow_unalign: using cached result" 2 + } else { + set et_no_slow_unalign_saved($et_index) 0 + if { [istarget x86_64-*-*] + || [istarget aarch64*-*-*] + } { + set et_no_slow_unalign_saved($et_index) 1 + } + } + verbose "check_effective_target_no_slow_unalign:\ + returning $et_no_slow_unalign_saved($et_index)" 2 + return $et_no_slow_unalign_saved($et_index) +} + # Return 1 if the target plus current options does not support a vector # alignment mechanism, 0 otherwise. #