This fixes PR51528 by hiding the issue that SRA generates copy in/out with a type not suitable for preserving the data (any non-mode-precision thing). It fixes it by instead of emitting
x$i_8 = x.i; D.1720 = x; D.1720.i = x$i_8; for an assign to an unscalarized D.1720 from a partly scalarized x (so the aggregate assignment prevails) emit x.i = x$i_8; D.1720 = x; instead. This allows the unfortunate copy in/out to be optimized away (and thus its undefined behavior). Basically whenever the aggregate copy will prevail prefer to restore the RHS and reload the LHS components - just like we do for aggregate uses/destinations of call statements. I think we can still clean up the aggregate copy path a lot, but that's for 4.8 (unless new bugs pop up). Bootstrap and regtest pending on x86_64-unknown-linux-gnu. Richard. 2012-01-27 Richard Guenther <rguent...@suse.de> PR tree-optimization/51528 * tree-sra.c (sra_modify_assign): Handle the default fallback for aggregate assigns better. * gcc.dg/torture/pr51528.c: New testcase. Index: gcc/tree-sra.c =================================================================== *** gcc/tree-sra.c (revision 183616) --- gcc/tree-sra.c (working copy) *************** sra_modify_assign (gimple *stmt, gimple_ *** 3060,3104 **** } else { ! if (racc) { ! if (!racc->grp_to_be_replaced && !racc->grp_unscalarized_data) { ! if (dump_file) ! { ! fprintf (dump_file, "Removing load: "); ! print_gimple_stmt (dump_file, *stmt, 0, 0); ! } ! ! if (TREE_CODE (lhs) == SSA_NAME) ! { ! rhs = get_repl_default_def_ssa_name (racc); ! if (!useless_type_conversion_p (TREE_TYPE (lhs), ! TREE_TYPE (rhs))) ! rhs = fold_build1_loc (loc, VIEW_CONVERT_EXPR, ! TREE_TYPE (lhs), rhs); ! } ! else ! { ! if (racc->first_child) ! generate_subtree_copies (racc->first_child, lhs, ! racc->offset, 0, 0, gsi, ! false, false, loc); ! ! gcc_assert (*stmt == gsi_stmt (*gsi)); ! unlink_stmt_vdef (*stmt); ! gsi_remove (gsi, true); ! sra_stats.deleted++; ! return SRA_AM_REMOVED; ! } } - else if (racc->first_child) - generate_subtree_copies (racc->first_child, lhs, racc->offset, - 0, 0, gsi, false, true, loc); } if (access_has_children_p (lacc)) ! generate_subtree_copies (lacc->first_child, rhs, lacc->offset, ! 0, 0, gsi, true, true, loc); } } --- 3061,3124 ---- } else { ! if (racc ! && !racc->grp_to_be_replaced && !racc->grp_unscalarized_data) { ! if (dump_file) ! { ! fprintf (dump_file, "Removing load: "); ! print_gimple_stmt (dump_file, *stmt, 0, 0); ! } ! ! if (TREE_CODE (lhs) == SSA_NAME) ! { ! rhs = get_repl_default_def_ssa_name (racc); ! if (!useless_type_conversion_p (TREE_TYPE (lhs), ! TREE_TYPE (rhs))) ! rhs = fold_build1_loc (loc, VIEW_CONVERT_EXPR, ! TREE_TYPE (lhs), rhs); ! } ! else { ! if (racc->first_child) ! generate_subtree_copies (racc->first_child, lhs, ! racc->offset, 0, 0, gsi, ! false, false, loc); ! ! gcc_assert (*stmt == gsi_stmt (*gsi)); ! unlink_stmt_vdef (*stmt); ! gsi_remove (gsi, true); ! sra_stats.deleted++; ! return SRA_AM_REMOVED; } } + + /* If there is no unscalarized data on the RHS replace + the aggregate copy by piecewise storing into the LHS. */ + if (access_has_children_p (racc) && racc->grp_covered) + { + generate_subtree_copies (racc->first_child, lhs, racc->offset, + 0, 0, gsi, false, false, loc); + gcc_assert (!access_has_children_p (lacc)); + gcc_assert (*stmt == gsi_stmt (*gsi)); + unlink_stmt_vdef (*stmt); + gsi_remove (gsi, true); + sra_stats.deleted++; + return SRA_AM_REMOVED; + } + + /* If there is unscalarized data on the RHS the aggregate + copy will prevail, so simply update the RHS from its + scalarized components. */ + if (access_has_children_p (racc)) + generate_subtree_copies (racc->first_child, racc->base, + 0, 0, 0, gsi, false, false, loc); + + /* The aggregate copy has prevailed. Re-load the LHS components + from the LHS. */ if (access_has_children_p (lacc)) ! generate_subtree_copies (lacc->first_child, lhs, ! 0, 0, 0, gsi, true, true, loc); } } Index: gcc/testsuite/gcc.dg/torture/pr51528.c =================================================================== *** gcc/testsuite/gcc.dg/torture/pr51528.c (revision 0) --- gcc/testsuite/gcc.dg/torture/pr51528.c (revision 0) *************** *** 0 **** --- 1,46 ---- + /* { dg-do run } */ + /* { dg-options "-fno-early-inlining" } */ + + extern void abort (void); + + union U + { + int i; + _Bool b; + }; + + _Bool gb; + + void __attribute__ ((noinline)) + use_bool (union U u) + { + gb = u.b; + } + + union U + bar (void) + { + union U u; + u.i = 0xFFFE; + return u; + } + + union U __attribute__ ((noinline)) + foo (void) + { + union U u,v; + + u.b = 1; + use_bool (u); + u = bar (); + + return u; + } + + int main (int argc, char **argv) + { + union U u = foo (); + if (u.i != 0xFFFE) + abort (); + return 0; + }