On September 30, 2016 4:43:10 PM GMT+02:00, Kyrill Tkachov <kyrylo.tkac...@foss.arm.com> wrote: > >On 30/09/16 15:36, Kyrill Tkachov wrote: >> Hi Richard, >> >> On 29/09/16 11:45, Richard Biener wrote: >>> >>>> + >>>> + /* In some cases get_inner_reference may return a >>>> + MEM_REF [ptr + byteoffset]. For the purposes of this >pass >>>> + canonicalize the base_addr to MEM_REF [ptr] and take >>>> + byteoffset into account in the bitpos. This occurs in >>>> + PR 23684 and this way we can catch more chains. */ >>>> + if (TREE_CODE (base_addr) == MEM_REF >>>> + && POINTER_TYPE_P (TREE_TYPE (TREE_OPERAND (base_addr, >0))) >>>> + && TREE_CODE (TREE_OPERAND (base_addr, 1)) == >INTEGER_CST >>> This is always an INTEGER_CST. >>> >>>> + && tree_fits_shwi_p (TREE_OPERAND (base_addr, 1)) >>> This will never allow negative offsets (but maybe this is a good >thing?) >>> >>> ) >>>> + { >>>> + bitpos += tree_to_shwi (TREE_OPERAND (base_addr, 1)) >>>> + * BITS_PER_UNIT; >>> this multiplication may overflow. There is mem_ref_offset () which >>> you should really use here, see get_inner_reference itself (and >>> how to translate back from offset_int to HOST_WIDE_INT if it fits). >>> >>>> + >>>> + base_addr = fold_build2 (MEM_REF, TREE_TYPE (base_addr), >>>> + TREE_OPERAND (base_addr, 0), >>>> + build_zero_cst (TREE_TYPE ( >>>> + TREE_OPERAND (base_addr, 1)))); >>> Ugh, building a tree node ... you could use TREE_OPERAND (base_addr, >0) >>> as base_addr instead? >> >> This didn't work for me because aliasing info was lost. >> So in the example: >> void >> foo2 (struct bar *p, struct bar *p2) >> { >> p->b = 0xff; >> p2->b = 0xa; >> p->a = 0xfffff; >> p2->c = 0xc; >> p->c = 0xff; >> p2->d = 0xbf; >> p->d = 0xfff; >> } >> >> we end up merging p->b with p->a even though the p2->b store may >alias. >> We'll record the base objects as being 'p' and 'p2' whereas with my >approach >> we record them as '*p' and '*p2'. I don't suppose I could just do: >> TREE_OPERAND (base_addr, 1) = build_zero_cst (TREE_TYPE (TREE_OPERAND >(base_addr, 1))); >> ? >> > >Although I think I could try to make it work by using >ptr_derefs_may_alias_p in the alias checks >a bit more. I'll see what I can do.
I don't think this will be enough. Don't bother with this comment too much, it's a minor issue. Richard. >Kyrill > >> Thanks, >> Kyrill >> >>> >>>> + } >>>> + >>>> + struct imm_store_chain_info **chain_info >>>> + = m_stores.get (base_addr); >>>> + >>>> + if (!invalid) >>>> + { >>>> + store_immediate_info *info; >>>> + if (chain_info) >>>> + { >>>> + info = new store_immediate_info ( >>>> + bitsize, bitpos, rhs, lhs, stmt, >>>> + (*chain_info)->m_store_info.length ()); >>>> + if (dump_file) >>>> + { >>>> + fprintf (dump_file, >>>> + "Recording immediate store from stmt:\n"); >>>> + print_gimple_stmt (dump_file, stmt, 0, 0); >>>> + } >>>> + (*chain_info)->m_store_info.safe_push (info); >>>> + continue; >>>> + } >>>> + >>>> + /* Store aliases any existing chain? */ >>>> + terminate_all_aliasing_chains (lhs, base_addr, stmt); >>>> + >>>> + /* Start a new chain. */ >>>> + struct imm_store_chain_info *new_chain >>>> + = new imm_store_chain_info; >>>> + info = new store_immediate_info (bitsize, bitpos, rhs, >lhs, >>>> + stmt, 0); >>>> + new_chain->m_store_info.safe_push (info); >>>> + m_stores.put (base_addr, new_chain); >>>> + if (dump_file) >>>> + { >>>> + fprintf (dump_file, >>>> + "Starting new chain with statement:\n"); >>>> + print_gimple_stmt (dump_file, stmt, 0, 0); >>>> + fprintf (dump_file, "The base object is:\n"); >>>> + print_generic_expr (dump_file, base_addr, 0); >>>> + fprintf (dump_file, "\n"); >>>> + } >>>> + } >>>> + else >>>> + terminate_all_aliasing_chains (lhs, base_addr, stmt); >>>> + >>>> + continue; >>>> + } >>>> + >>>> + terminate_all_aliasing_chains (NULL_TREE, NULL_TREE, stmt); >>>> + } >>>> + terminate_and_process_all_chains (bb); >>>> + } >>>> + return 0; >>>> +} >>>> + >>>> +} // anon namespace >>>> + >>>> +/* Construct and return a store merging pass object. */ >>>> + >>>> +gimple_opt_pass * >>>> +make_pass_store_merging (gcc::context *ctxt) >>>> +{ >>>> + return new pass_store_merging (ctxt); >>>> +} >>>> diff --git a/gcc/opts.c b/gcc/opts.c >>>> index 45f1f89c..e63d7e4 100644 >>>> --- a/gcc/opts.c >>>> +++ b/gcc/opts.c >>>> @@ -463,6 +463,7 @@ static const struct default_options >default_options_table[] = >>>> { OPT_LEVELS_1_PLUS, OPT_ftree_dse, NULL, 1 }, >>>> { OPT_LEVELS_1_PLUS, OPT_ftree_ter, NULL, 1 }, >>>> { OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_ftree_sra, NULL, 1 }, >>>> + { OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_fstore_merging, NULL, 1 }, >>> Please leave it to -O[2s]+ -- the chain invalidation is quadratic >and >>> -O1 should work well even for gigantic basic blocks. >>> >>> Overall the pass looks quite well with the comments addressed. >>> >>> Thanks, >>> Richard. >>> >>>> { OPT_LEVELS_1_PLUS, OPT_ftree_fre, NULL, 1 }, >>>> { OPT_LEVELS_1_PLUS, OPT_ftree_copy_prop, NULL, 1 }, >>>> { OPT_LEVELS_1_PLUS, OPT_ftree_sink, NULL, 1 }, >>>> diff --git a/gcc/params.def b/gcc/params.def >>>> index 8907aa4..e63e594 100644 >>>> --- a/gcc/params.def >>>> +++ b/gcc/params.def >>>> @@ -1100,6 +1100,12 @@ DEFPARAM (PARAM_MAX_TAIL_MERGE_COMPARISONS, >>>> "Maximum amount of similar bbs to compare a bb with.", >>>> 10, 0, 0) >>>> +DEFPARAM (PARAM_STORE_MERGING_ALLOW_UNALIGNED, >>>> + "store-merging-allow-unaligned", >>>> + "Allow the store merging pass to introduce unaligned stores >" >>>> + "if it is legal to do so", >>>> + 1, 0, 1) >>>> + >>>> DEFPARAM (PARAM_MAX_TAIL_MERGE_ITERATIONS, >>>> "max-tail-merge-iterations", >>>> "Maximum amount of iterations of the pass over a >function.", >>>> diff --git a/gcc/passes.def b/gcc/passes.def >>>> index 2830421..ee7dd50 100644 >>>> --- a/gcc/passes.def >>>> +++ b/gcc/passes.def >>>> @@ -329,6 +329,7 @@ along with GCC; see the file COPYING3. If not >see >>>> NEXT_PASS (pass_phiopt); >>>> NEXT_PASS (pass_fold_builtins); >>>> NEXT_PASS (pass_optimize_widening_mul); >>>> + NEXT_PASS (pass_store_merging); >>>> NEXT_PASS (pass_tail_calls); >>>> /* If DCE is not run before checking for uninitialized >uses, >>>> we may get false warnings (e.g., >testsuite/gcc.dg/uninit-5.c). >>>> diff --git a/gcc/testsuite/gcc.c-torture/execute/pr22141-1.c >b/gcc/testsuite/gcc.c-torture/execute/pr22141-1.c >>>> new file mode 100644 >>>> index 0000000..7c888b4 >>>> --- /dev/null >>>> +++ b/gcc/testsuite/gcc.c-torture/execute/pr22141-1.c >>>> @@ -0,0 +1,122 @@ >>>> +/* PR middle-end/22141 */ >>>> + >>>> +extern void abort (void); >>>> + >>>> +struct S >>>> +{ >>>> + struct T >>>> + { >>>> + char a; >>>> + char b; >>>> + char c; >>>> + char d; >>>> + } t; >>>> +} u; >>>> + >>>> +struct U >>>> +{ >>>> + struct S s[4]; >>>> +}; >>>> + >>>> +void __attribute__((noinline)) >>>> +c1 (struct T *p) >>>> +{ >>>> + if (p->a != 1 || p->b != 2 || p->c != 3 || p->d != 4) >>>> + abort (); >>>> + __builtin_memset (p, 0xaa, sizeof (*p)); >>>> +} >>>> + >>>> +void __attribute__((noinline)) >>>> +c2 (struct S *p) >>>> +{ >>>> + c1 (&p->t); >>>> +} >>>> + >>>> +void __attribute__((noinline)) >>>> +c3 (struct U *p) >>>> +{ >>>> + c2 (&p->s[2]); >>>> +} >>>> + >>>> +void __attribute__((noinline)) >>>> +f1 (void) >>>> +{ >>>> + u = (struct S) { { 1, 2, 3, 4 } }; >>>> +} >>>> + >>>> +void __attribute__((noinline)) >>>> +f2 (void) >>>> +{ >>>> + u.t.a = 1; >>>> + u.t.b = 2; >>>> + u.t.c = 3; >>>> + u.t.d = 4; >>>> +} >>>> + >>>> +void __attribute__((noinline)) >>>> +f3 (void) >>>> +{ >>>> + u.t.d = 4; >>>> + u.t.b = 2; >>>> + u.t.a = 1; >>>> + u.t.c = 3; >>>> +} >>>> + >>>> +void __attribute__((noinline)) >>>> +f4 (void) >>>> +{ >>>> + struct S v; >>>> + v.t.a = 1; >>>> + v.t.b = 2; >>>> + v.t.c = 3; >>>> + v.t.d = 4; >>>> + c2 (&v); >>>> +} >>>> + >>>> +void __attribute__((noinline)) >>>> +f5 (struct S *p) >>>> +{ >>>> + p->t.a = 1; >>>> + p->t.c = 3; >>>> + p->t.d = 4; >>>> + p->t.b = 2; >>>> +} >>>> + >>>> +void __attribute__((noinline)) >>>> +f6 (void) >>>> +{ >>>> + struct U v; >>>> + v.s[2].t.a = 1; >>>> + v.s[2].t.b = 2; >>>> + v.s[2].t.c = 3; >>>> + v.s[2].t.d = 4; >>>> + c3 (&v); >>>> +} >>>> + >>>> +void __attribute__((noinline)) >>>> +f7 (struct U *p) >>>> +{ >>>> + p->s[2].t.a = 1; >>>> + p->s[2].t.c = 3; >>>> + p->s[2].t.d = 4; >>>> + p->s[2].t.b = 2; >>>> +} >>>> + >>>> +int >>>> +main (void) >>>> +{ >>>> + struct U w; >>>> + f1 (); >>>> + c2 (&u); >>>> + f2 (); >>>> + c1 (&u.t); >>>> + f3 (); >>>> + c2 (&u); >>>> + f4 (); >>>> + f5 (&u); >>>> + c2 (&u); >>>> + f6 (); >>>> + f7 (&w); >>>> + c3 (&w); >>>> + return 0; >>>> +} >>>> diff --git a/gcc/testsuite/gcc.c-torture/execute/pr22141-2.c >b/gcc/testsuite/gcc.c-torture/execute/pr22141-2.c >>>> new file mode 100644 >>>> index 0000000..cb9cc79 >>>> --- /dev/null >>>> +++ b/gcc/testsuite/gcc.c-torture/execute/pr22141-2.c >>>> @@ -0,0 +1,122 @@ >>>> +/* PR middle-end/22141 */ >>>> + >>>> +extern void abort (void); >>>> + >>>> +struct S >>>> +{ >>>> + struct T >>>> + { >>>> + char a; >>>> + char b; >>>> + char c; >>>> + char d; >>>> + } t; >>>> +} u __attribute__((aligned)); >>>> + >>>> +struct U >>>> +{ >>>> + struct S s[4]; >>>> +}; >>>> + >>>> +void __attribute__((noinline)) >>>> +c1 (struct T *p) >>>> +{ >>>> + if (p->a != 1 || p->b != 2 || p->c != 3 || p->d != 4) >>>> + abort (); >>>> + __builtin_memset (p, 0xaa, sizeof (*p)); >>>> +} >>>> + >>>> +void __attribute__((noinline)) >>>> +c2 (struct S *p) >>>> +{ >>>> + c1 (&p->t); >>>> +} >>>> + >>>> +void __attribute__((noinline)) >>>> +c3 (struct U *p) >>>> +{ >>>> + c2 (&p->s[2]); >>>> +} >>>> + >>>> +void __attribute__((noinline)) >>>> +f1 (void) >>>> +{ >>>> + u = (struct S) { { 1, 2, 3, 4 } }; >>>> +} >>>> + >>>> +void __attribute__((noinline)) >>>> +f2 (void) >>>> +{ >>>> + u.t.a = 1; >>>> + u.t.b = 2; >>>> + u.t.c = 3; >>>> + u.t.d = 4; >>>> +} >>>> + >>>> +void __attribute__((noinline)) >>>> +f3 (void) >>>> +{ >>>> + u.t.d = 4; >>>> + u.t.b = 2; >>>> + u.t.a = 1; >>>> + u.t.c = 3; >>>> +} >>>> + >>>> +void __attribute__((noinline)) >>>> +f4 (void) >>>> +{ >>>> + struct S v __attribute__((aligned)); >>>> + v.t.a = 1; >>>> + v.t.b = 2; >>>> + v.t.c = 3; >>>> + v.t.d = 4; >>>> + c2 (&v); >>>> +} >>>> + >>>> +void __attribute__((noinline)) >>>> +f5 (struct S *p) >>>> +{ >>>> + p->t.a = 1; >>>> + p->t.c = 3; >>>> + p->t.d = 4; >>>> + p->t.b = 2; >>>> +} >>>> + >>>> +void __attribute__((noinline)) >>>> +f6 (void) >>>> +{ >>>> + struct U v __attribute__((aligned)); >>>> + v.s[2].t.a = 1; >>>> + v.s[2].t.b = 2; >>>> + v.s[2].t.c = 3; >>>> + v.s[2].t.d = 4; >>>> + c3 (&v); >>>> +} >>>> + >>>> +void __attribute__((noinline)) >>>> +f7 (struct U *p) >>>> +{ >>>> + p->s[2].t.a = 1; >>>> + p->s[2].t.c = 3; >>>> + p->s[2].t.d = 4; >>>> + p->s[2].t.b = 2; >>>> +} >>>> + >>>> +int >>>> +main (void) >>>> +{ >>>> + struct U w __attribute__((aligned)); >>>> + f1 (); >>>> + c2 (&u); >>>> + f2 (); >>>> + c1 (&u.t); >>>> + f3 (); >>>> + c2 (&u); >>>> + f4 (); >>>> + f5 (&u); >>>> + c2 (&u); >>>> + f6 (); >>>> + f7 (&w); >>>> + c3 (&w); >>>> + return 0; >>>> +} >>>> diff --git a/gcc/testsuite/gcc.dg/store_merging_1.c >b/gcc/testsuite/gcc.dg/store_merging_1.c >>>> new file mode 100644 >>>> index 0000000..09a4d14 >>>> --- /dev/null >>>> +++ b/gcc/testsuite/gcc.dg/store_merging_1.c >>>> @@ -0,0 +1,35 @@ >>>> +/* { dg-do compile } */ >>>> +/* { dg-require-effective-target non_strict_align } */ >>>> +/* { dg-options "-O -fdump-tree-store-merging" } */ >>>> + >>>> +struct bar { >>>> + int a; >>>> + char b; >>>> + char c; >>>> + char d; >>>> + char e; >>>> + char f; >>>> + char g; >>>> +}; >>>> + >>>> +void >>>> +foo1 (struct bar *p) >>>> +{ >>>> + p->b = 0; >>>> + p->a = 0; >>>> + p->c = 0; >>>> + p->d = 0; >>>> + p->e = 0; >>>> +} >>>> + >>>> +void >>>> +foo2 (struct bar *p) >>>> +{ >>>> + p->b = 0; >>>> + p->a = 0; >>>> + p->c = 1; >>>> + p->d = 0; >>>> + p->e = 0; >>>> +} >>>> + >>>> +/* { dg-final { scan-tree-dump-times "Merging successful" 2 >"store-merging" } } */ >>>> diff --git a/gcc/testsuite/gcc.dg/store_merging_2.c >b/gcc/testsuite/gcc.dg/store_merging_2.c >>>> new file mode 100644 >>>> index 0000000..d3acc2d >>>> --- /dev/null >>>> +++ b/gcc/testsuite/gcc.dg/store_merging_2.c >>>> @@ -0,0 +1,80 @@ >>>> +/* { dg-do run } */ >>>> +/* { dg-require-effective-target non_strict_align } */ >>>> +/* { dg-options "-O -fdump-tree-store-merging" } */ >>>> + >>>> +struct bar >>>> +{ >>>> + int a; >>>> + unsigned char b; >>>> + unsigned char c; >>>> + short d; >>>> + unsigned char e; >>>> + unsigned char f; >>>> + unsigned char g; >>>> +}; >>>> + >>>> +__attribute__ ((noinline)) void >>>> +foozero (struct bar *p) >>>> +{ >>>> + p->b = 0; >>>> + p->a = 0; >>>> + p->c = 0; >>>> + p->d = 0; >>>> + p->e = 0; >>>> + p->f = 0; >>>> + p->g = 0; >>>> +} >>>> + >>>> +__attribute__ ((noinline)) void >>>> +foo1 (struct bar *p) >>>> +{ >>>> + p->b = 1; >>>> + p->a = 2; >>>> + p->c = 3; >>>> + p->d = 4; >>>> + p->e = 5; >>>> + p->f = 0; >>>> + p->g = 0xff; >>>> +} >>>> + >>>> +__attribute__ ((noinline)) void >>>> +foo2 (struct bar *p, struct bar *p2) >>>> +{ >>>> + p->b = 0xff; >>>> + p2->b = 0xa; >>>> + p->a = 0xfffff; >>>> + p2->c = 0xc; >>>> + p->c = 0xff; >>>> + p2->d = 0xbf; >>>> + p->d = 0xfff; >>>> +} >>>> + >>>> +int >>>> +main (void) >>>> +{ >>>> + struct bar b1, b2; >>>> + foozero (&b1); >>>> + foozero (&b2); >>>> + >>>> + foo1 (&b1); >>>> + if (b1.b != 1 || b1.a != 2 || b1.c != 3 || b1.d != 4 || b1.e != >5 >>>> + || b1.f != 0 || b1.g != 0xff) >>>> + __builtin_abort (); >>>> + >>>> + foozero (&b1); >>>> + /* Make sure writes to aliasing struct pointers preserve the >>>> + correct order. */ >>>> + foo2 (&b1, &b1); >>>> + if (b1.b != 0xa || b1.a != 0xfffff || b1.c != 0xff || b1.d != >0xfff) >>>> + __builtin_abort (); >>>> + >>>> + foozero (&b1); >>>> + foo2 (&b1, &b2); >>>> + if (b1.a != 0xfffff || b1.b != 0xff || b1.c != 0xff || b1.d != >0xfff >>>> + || b2.b != 0xa || b2.c != 0xc || b2.d != 0xbf) >>>> + __builtin_abort (); >>>> + >>>> + return 0; >>>> +} >>>> + >>>> +/* { dg-final { scan-tree-dump-times "Merging successful" 2 >"store-merging" } } */ >>>> diff --git a/gcc/testsuite/gcc.dg/store_merging_3.c >b/gcc/testsuite/gcc.dg/store_merging_3.c >>>> new file mode 100644 >>>> index 0000000..cd756c1 >>>> --- /dev/null >>>> +++ b/gcc/testsuite/gcc.dg/store_merging_3.c >>>> @@ -0,0 +1,32 @@ >>>> +/* { dg-do compile } */ >>>> +/* { dg-require-effective-target non_strict_align } */ >>>> +/* { dg-options "-O -fdump-tree-store-merging" } */ >>>> + >>>> +/* Make sure stores to volatile addresses don't get combined with >>>> + other accesses. */ >>>> + >>>> +struct bar >>>> +{ >>>> + int a; >>>> + char b; >>>> + char c; >>>> + volatile short d; >>>> + char e; >>>> + char f; >>>> + char g; >>>> +}; >>>> + >>>> +void >>>> +foozero (struct bar *p) >>>> +{ >>>> + p->b = 0xa; >>>> + p->a = 0xb; >>>> + p->c = 0xc; >>>> + p->d = 0; >>>> + p->e = 0xd; >>>> + p->f = 0xe; >>>> + p->g = 0xf; >>>> +} >>>> + >>>> +/* { dg-final { scan-tree-dump "Volatile access terminates all >chains" "store-merging" } } */ >>>> +/* { dg-final { scan-tree-dump-times "=\{v\} 0;" 1 "store-merging" >} } */ >>>> diff --git a/gcc/testsuite/gcc.dg/store_merging_4.c >b/gcc/testsuite/gcc.dg/store_merging_4.c >>>> new file mode 100644 >>>> index 0000000..4bf9025 >>>> --- /dev/null >>>> +++ b/gcc/testsuite/gcc.dg/store_merging_4.c >>>> @@ -0,0 +1,32 @@ >>>> +/* { dg-do compile } */ >>>> +/* { dg-require-effective-target non_strict_align } */ >>>> +/* { dg-options "-O -fdump-tree-store-merging" } */ >>>> + >>>> +/* Check that we can merge interleaving stores that are guaranteed >>>> + to be non-aliasing. */ >>>> + >>>> +struct bar >>>> +{ >>>> + int a; >>>> + char b; >>>> + char c; >>>> + short d; >>>> + char e; >>>> + char f; >>>> + char g; >>>> +}; >>>> + >>>> +void >>>> +foozero (struct bar *restrict p, struct bar *restrict p2) >>>> +{ >>>> + p->b = 0xff; >>>> + p2->b = 0xa; >>>> + p->a = 0xfffff; >>>> + p2->a = 0xab; >>>> + p2->c = 0xc; >>>> + p->c = 0xff; >>>> + p2->d = 0xbf; >>>> + p->d = 0xfff; >>>> +} >>>> + >>>> +/* { dg-final { scan-tree-dump-times "Merging successful" 2 >"store-merging" } } */ >>>> diff --git a/gcc/testsuite/gcc.dg/store_merging_5.c >b/gcc/testsuite/gcc.dg/store_merging_5.c >>>> new file mode 100644 >>>> index 0000000..3b82420 >>>> --- /dev/null >>>> +++ b/gcc/testsuite/gcc.dg/store_merging_5.c >>>> @@ -0,0 +1,30 @@ >>>> +/* { dg-do compile } */ >>>> +/* { dg-require-effective-target non_strict_align } */ >>>> +/* { dg-options "-O -fdump-tree-store-merging" } */ >>>> + >>>> +/* Make sure that non-aliasing non-constant interspersed stores do >not >>>> + stop chains. */ >>>> + >>>> +struct bar { >>>> + int a; >>>> + char b; >>>> + char c; >>>> + char d; >>>> + char e; >>>> + char g; >>>> +}; >>>> + >>>> +void >>>> +foo1 (struct bar *p, char tmp) >>>> +{ >>>> + p->a = 0; >>>> + p->b = 0; >>>> + p->g = tmp; >>>> + p->c = 0; >>>> + p->d = 0; >>>> + p->e = 0; >>>> +} >>>> + >>>> + >>>> +/* { dg-final { scan-tree-dump-times "Merging successful" 1 >"store-merging" } } */ >>>> +/* { dg-final { scan-tree-dump-times "MEM\\\[.*\\\]" 1 >"store-merging" } } */ >>>> diff --git a/gcc/testsuite/gcc.dg/store_merging_6.c >b/gcc/testsuite/gcc.dg/store_merging_6.c >>>> new file mode 100644 >>>> index 0000000..7d89baf >>>> --- /dev/null >>>> +++ b/gcc/testsuite/gcc.dg/store_merging_6.c >>>> @@ -0,0 +1,53 @@ >>>> +/* { dg-do run } */ >>>> +/* { dg-require-effective-target non_strict_align } */ >>>> +/* { dg-options "-O -fdump-tree-store-merging" } */ >>>> + >>>> +/* Check that we can widen accesses to bitfields. */ >>>> + >>>> +struct bar { >>>> + int a : 3; >>>> + unsigned char b : 4; >>>> + unsigned char c : 1; >>>> + char d; >>>> + char e; >>>> + char f; >>>> + char g; >>>> +}; >>>> + >>>> +__attribute__ ((noinline)) void >>>> +foozero (struct bar *p) >>>> +{ >>>> + p->b = 0; >>>> + p->a = 0; >>>> + p->c = 0; >>>> + p->d = 0; >>>> + p->e = 0; >>>> + p->f = 0; >>>> + p->g = 0; >>>> +} >>>> + >>>> +__attribute__ ((noinline)) void >>>> +foo1 (struct bar *p) >>>> +{ >>>> + p->b = 3; >>>> + p->a = 2; >>>> + p->c = 1; >>>> + p->d = 4; >>>> + p->e = 5; >>>> +} >>>> + >>>> +int >>>> +main (void) >>>> +{ >>>> + struct bar p; >>>> + foozero (&p); >>>> + foo1 (&p); >>>> + if (p.a != 2 || p.b != 3 || p.c != 1 || p.d != 4 || p.e != 5 >>>> + || p.f != 0 || p.g != 0) >>>> + __builtin_abort (); >>>> + >>>> + return 0; >>>> +} >>>> + >>>> + >>>> +/* { dg-final { scan-tree-dump-times "Merging successful" 2 >"store-merging" } } */ >>>> diff --git a/gcc/testsuite/gcc.dg/store_merging_7.c >b/gcc/testsuite/gcc.dg/store_merging_7.c >>>> new file mode 100644 >>>> index 0000000..02008f7 >>>> --- /dev/null >>>> +++ b/gcc/testsuite/gcc.dg/store_merging_7.c >>>> @@ -0,0 +1,26 @@ >>>> +/* { dg-do compile } */ >>>> +/* { dg-require-effective-target non_strict_align } */ >>>> +/* { dg-options "-O -fdump-tree-store-merging" } */ >>>> + >>>> +/* Check that we can merge consecutive array members through the >pointer. >>>> + PR rtl-optimization/23684. */ >>>> + >>>> +void >>>> +foo (char *input) >>>> +{ >>>> + input = __builtin_assume_aligned (input, 8); >>>> + input[0] = 'H'; >>>> + input[1] = 'e'; >>>> + input[2] = 'l'; >>>> + input[3] = 'l'; >>>> + input[4] = 'o'; >>>> + input[5] = ' '; >>>> + input[6] = 'w'; >>>> + input[7] = 'o'; >>>> + input[8] = 'r'; >>>> + input[9] = 'l'; >>>> + input[10] = 'd'; >>>> + input[11] = '\0'; >>>> +} >>>> + >>>> +/* { dg-final { scan-tree-dump-times "Merging successful" 1 >"store-merging" } } */ >>>> diff --git a/gcc/testsuite/gcc.target/aarch64/ldp_stp_1.c >b/gcc/testsuite/gcc.target/aarch64/ldp_stp_1.c >>>> index f02e55f..9de4e77 100644 >>>> --- a/gcc/testsuite/gcc.target/aarch64/ldp_stp_1.c >>>> +++ b/gcc/testsuite/gcc.target/aarch64/ldp_stp_1.c >>>> @@ -3,22 +3,22 @@ >>>> int arr[4][4]; >>>> void >>>> -foo () >>>> +foo (int x, int y) >>>> { >>>> - arr[0][1] = 1; >>>> - arr[1][0] = -1; >>>> - arr[2][0] = 1; >>>> - arr[1][1] = -1; >>>> - arr[0][2] = 1; >>>> - arr[0][3] = -1; >>>> - arr[1][2] = 1; >>>> - arr[2][1] = -1; >>>> - arr[3][0] = 1; >>>> - arr[3][1] = -1; >>>> - arr[2][2] = 1; >>>> - arr[1][3] = -1; >>>> - arr[2][3] = 1; >>>> - arr[3][2] = -1; >>>> + arr[0][1] = x; >>>> + arr[1][0] = y; >>>> + arr[2][0] = x; >>>> + arr[1][1] = y; >>>> + arr[0][2] = x; >>>> + arr[0][3] = y; >>>> + arr[1][2] = x; >>>> + arr[2][1] = y; >>>> + arr[3][0] = x; >>>> + arr[3][1] = y; >>>> + arr[2][2] = x; >>>> + arr[1][3] = y; >>>> + arr[2][3] = x; >>>> + arr[3][2] = y; >>>> } >>>> /* { dg-final { scan-assembler-times "stp\tw\[0-9\]+, w\[0-9\]" >7 } } */ >>>> diff --git a/gcc/testsuite/gcc.target/aarch64/ldp_stp_4.c >b/gcc/testsuite/gcc.target/aarch64/ldp_stp_4.c >>>> index 40056b1..824f0d2 100644 >>>> --- a/gcc/testsuite/gcc.target/aarch64/ldp_stp_4.c >>>> +++ b/gcc/testsuite/gcc.target/aarch64/ldp_stp_4.c >>>> @@ -3,22 +3,22 @@ >>>> float arr[4][4]; >>>> void >>>> -foo () >>>> +foo (float x, float y) >>>> { >>>> - arr[0][1] = 1; >>>> - arr[1][0] = -1; >>>> - arr[2][0] = 1; >>>> - arr[1][1] = -1; >>>> - arr[0][2] = 1; >>>> - arr[0][3] = -1; >>>> - arr[1][2] = 1; >>>> - arr[2][1] = -1; >>>> - arr[3][0] = 1; >>>> - arr[3][1] = -1; >>>> - arr[2][2] = 1; >>>> - arr[1][3] = -1; >>>> - arr[2][3] = 1; >>>> - arr[3][2] = -1; >>>> + arr[0][1] = x; >>>> + arr[1][0] = y; >>>> + arr[2][0] = x; >>>> + arr[1][1] = y; >>>> + arr[0][2] = x; >>>> + arr[0][3] = y; >>>> + arr[1][2] = x; >>>> + arr[2][1] = y; >>>> + arr[3][0] = x; >>>> + arr[3][1] = y; >>>> + arr[2][2] = x; >>>> + arr[1][3] = y; >>>> + arr[2][3] = x; >>>> + arr[3][2] = y; >>>> } >>>> /* { dg-final { scan-assembler-times "stp\ts\[0-9\]+, s\[0-9\]" >7 } } */ >>>> diff --git a/gcc/testsuite/gcc.target/i386/pr22141.c >b/gcc/testsuite/gcc.target/i386/pr22141.c >>>> new file mode 100644 >>>> index 0000000..036422e >>>> --- /dev/null >>>> +++ b/gcc/testsuite/gcc.target/i386/pr22141.c >>>> @@ -0,0 +1,126 @@ >>>> +/* PR middle-end/22141 */ >>>> +/* { dg-do compile } */ >>>> +/* { dg-options "-Os" } */ >>>> + >>>> +extern void abort (void); >>>> + >>>> +struct S >>>> +{ >>>> + struct T >>>> + { >>>> + char a; >>>> + char b; >>>> + char c; >>>> + char d; >>>> + } t; >>>> +} u; >>>> + >>>> +struct U >>>> +{ >>>> + struct S s[4]; >>>> +}; >>>> + >>>> +void __attribute__((noinline)) >>>> +c1 (struct T *p) >>>> +{ >>>> + if (p->a != 1 || p->b != 2 || p->c != 3 || p->d != 4) >>>> + abort (); >>>> + __builtin_memset (p, 0xaa, sizeof (*p)); >>>> +} >>>> + >>>> +void __attribute__((noinline)) >>>> +c2 (struct S *p) >>>> +{ >>>> + c1 (&p->t); >>>> +} >>>> + >>>> +void __attribute__((noinline)) >>>> +c3 (struct U *p) >>>> +{ >>>> + c2 (&p->s[2]); >>>> +} >>>> + >>>> +void __attribute__((noinline)) >>>> +f1 (void) >>>> +{ >>>> + u = (struct S) { { 1, 2, 3, 4 } }; >>>> +} >>>> + >>>> +void __attribute__((noinline)) >>>> +f2 (void) >>>> +{ >>>> + u.t.a = 1; >>>> + u.t.b = 2; >>>> + u.t.c = 3; >>>> + u.t.d = 4; >>>> +} >>>> + >>>> +void __attribute__((noinline)) >>>> +f3 (void) >>>> +{ >>>> + u.t.d = 4; >>>> + u.t.b = 2; >>>> + u.t.a = 1; >>>> + u.t.c = 3; >>>> +} >>>> + >>>> +void __attribute__((noinline)) >>>> +f4 (void) >>>> +{ >>>> + struct S v; >>>> + v.t.a = 1; >>>> + v.t.b = 2; >>>> + v.t.c = 3; >>>> + v.t.d = 4; >>>> + c2 (&v); >>>> +} >>>> + >>>> +void __attribute__((noinline)) >>>> +f5 (struct S *p) >>>> +{ >>>> + p->t.a = 1; >>>> + p->t.c = 3; >>>> + p->t.d = 4; >>>> + p->t.b = 2; >>>> +} >>>> + >>>> +void __attribute__((noinline)) >>>> +f6 (void) >>>> +{ >>>> + struct U v; >>>> + v.s[2].t.a = 1; >>>> + v.s[2].t.b = 2; >>>> + v.s[2].t.c = 3; >>>> + v.s[2].t.d = 4; >>>> + c3 (&v); >>>> +} >>>> + >>>> +void __attribute__((noinline)) >>>> +f7 (struct U *p) >>>> +{ >>>> + p->s[2].t.a = 1; >>>> + p->s[2].t.c = 3; >>>> + p->s[2].t.d = 4; >>>> + p->s[2].t.b = 2; >>>> +} >>>> + >>>> +int >>>> +main (void) >>>> +{ >>>> + struct U w; >>>> + f1 (); >>>> + c2 (&u); >>>> + f2 (); >>>> + c1 (&u.t); >>>> + f3 (); >>>> + c2 (&u); >>>> + f4 (); >>>> + f5 (&u); >>>> + c2 (&u); >>>> + f6 (); >>>> + f7 (&w); >>>> + c3 (&w); >>>> + return 0; >>>> +} >>>> + >>>> +/* { dg-final { scan-assembler-times "67305985\|4030201" 7 } } */ >>>> diff --git a/gcc/testsuite/gcc.target/i386/pr34012.c >b/gcc/testsuite/gcc.target/i386/pr34012.c >>>> index 00b1240..d0cffa0 100644 >>>> --- a/gcc/testsuite/gcc.target/i386/pr34012.c >>>> +++ b/gcc/testsuite/gcc.target/i386/pr34012.c >>>> @@ -1,7 +1,7 @@ >>>> /* PR rtl-optimization/34012 */ >>>> /* { dg-do compile } */ >>>> /* { dg-require-effective-target lp64 } */ >>>> -/* { dg-options "-O2" } */ >>>> +/* { dg-options "-O2 -fno-store-merging" } */ >>>> void bar (long int *); >>>> void >>>> diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h >>>> index a706729..b5373a3 100644 >>>> --- a/gcc/tree-pass.h >>>> +++ b/gcc/tree-pass.h >>>> @@ -425,6 +425,7 @@ extern gimple_opt_pass >*make_pass_late_warn_uninitialized (gcc::context *ctxt); >>>> extern gimple_opt_pass *make_pass_cse_reciprocals (gcc::context >*ctxt); >>>> extern gimple_opt_pass *make_pass_cse_sincos (gcc::context >*ctxt); >>>> extern gimple_opt_pass *make_pass_optimize_bswap (gcc::context >*ctxt); >>>> +extern gimple_opt_pass *make_pass_store_merging (gcc::context >*ctxt); >>>> extern gimple_opt_pass *make_pass_optimize_widening_mul >(gcc::context *ctxt); >>>> extern gimple_opt_pass *make_pass_warn_function_return >(gcc::context *ctxt); >>>> extern gimple_opt_pass *make_pass_warn_function_noreturn >(gcc::context *ctxt); >>