This fixes PR51528 by hiding the issue that SRA generates
copy in/out with a type not suitable for preserving the data
(any non-mode-precision thing).  It fixes it by instead of
emitting

  x$i_8 = x.i;
  D.1720 = x;
  D.1720.i = x$i_8;

for an assign to an unscalarized D.1720 from a partly scalarized x
(so the aggregate assignment prevails) emit

  x.i = x$i_8;
  D.1720 = x;

instead.  This allows the unfortunate copy in/out to be optimized
away (and thus its undefined behavior).

Basically whenever the aggregate copy will prevail prefer to
restore the RHS and reload the LHS components - just like we
do for aggregate uses/destinations of call statements.

I think we can still clean up the aggregate copy path a lot,
but that's for 4.8 (unless new bugs pop up).

Bootstrap and regtest pending on x86_64-unknown-linux-gnu.

Richard.

2012-01-27  Richard Guenther  <rguent...@suse.de>

        PR tree-optimization/51528
        * tree-sra.c (sra_modify_assign): Handle the default fallback
        for aggregate assigns better.

        * gcc.dg/torture/pr51528.c: New testcase.

Index: gcc/tree-sra.c
===================================================================
*** gcc/tree-sra.c      (revision 183616)
--- gcc/tree-sra.c      (working copy)
*************** sra_modify_assign (gimple *stmt, gimple_
*** 3060,3104 ****
        }
        else
        {
!         if (racc)
            {
!             if (!racc->grp_to_be_replaced && !racc->grp_unscalarized_data)
                {
!                 if (dump_file)
!                   {
!                     fprintf (dump_file, "Removing load: ");
!                     print_gimple_stmt (dump_file, *stmt, 0, 0);
!                   }
! 
!                 if (TREE_CODE (lhs) == SSA_NAME)
!                   {
!                     rhs = get_repl_default_def_ssa_name (racc);
!                     if (!useless_type_conversion_p (TREE_TYPE (lhs),
!                                                     TREE_TYPE (rhs)))
!                       rhs = fold_build1_loc (loc, VIEW_CONVERT_EXPR,
!                                              TREE_TYPE (lhs), rhs);
!                   }
!                 else
!                   {
!                     if (racc->first_child)
!                       generate_subtree_copies (racc->first_child, lhs,
!                                                racc->offset, 0, 0, gsi,
!                                                false, false, loc);
! 
!                     gcc_assert (*stmt == gsi_stmt (*gsi));
!                     unlink_stmt_vdef (*stmt);
!                     gsi_remove (gsi, true);
!                     sra_stats.deleted++;
!                     return SRA_AM_REMOVED;
!                   }
                }
-             else if (racc->first_child)
-               generate_subtree_copies (racc->first_child, lhs, racc->offset,
-                                        0, 0, gsi, false, true, loc);
            }
          if (access_has_children_p (lacc))
!           generate_subtree_copies (lacc->first_child, rhs, lacc->offset,
!                                    0, 0, gsi, true, true, loc);
        }
      }
  
--- 3061,3124 ----
        }
        else
        {
!         if (racc
!             && !racc->grp_to_be_replaced && !racc->grp_unscalarized_data)
            {
!             if (dump_file)
!               {
!                 fprintf (dump_file, "Removing load: ");
!                 print_gimple_stmt (dump_file, *stmt, 0, 0);
!               }
! 
!             if (TREE_CODE (lhs) == SSA_NAME)
!               {
!                 rhs = get_repl_default_def_ssa_name (racc);
!                 if (!useless_type_conversion_p (TREE_TYPE (lhs),
!                                                 TREE_TYPE (rhs)))
!                   rhs = fold_build1_loc (loc, VIEW_CONVERT_EXPR,
!                                          TREE_TYPE (lhs), rhs);
!               }
!             else
                {
!                 if (racc->first_child)
!                   generate_subtree_copies (racc->first_child, lhs,
!                                            racc->offset, 0, 0, gsi,
!                                            false, false, loc);
! 
!                 gcc_assert (*stmt == gsi_stmt (*gsi));
!                 unlink_stmt_vdef (*stmt);
!                 gsi_remove (gsi, true);
!                 sra_stats.deleted++;
!                 return SRA_AM_REMOVED;
                }
            }
+ 
+         /* If there is no unscalarized data on the RHS replace
+            the aggregate copy by piecewise storing into the LHS.  */
+         if (access_has_children_p (racc) && racc->grp_covered)
+           {
+             generate_subtree_copies (racc->first_child, lhs, racc->offset,
+                                      0, 0, gsi, false, false, loc);
+             gcc_assert (!access_has_children_p (lacc));
+             gcc_assert (*stmt == gsi_stmt (*gsi));
+             unlink_stmt_vdef (*stmt);
+             gsi_remove (gsi, true);
+             sra_stats.deleted++;
+             return SRA_AM_REMOVED;
+           }
+ 
+         /* If there is unscalarized data on the RHS the aggregate
+            copy will prevail, so simply update the RHS from its
+            scalarized components.  */
+         if (access_has_children_p (racc))
+           generate_subtree_copies (racc->first_child, racc->base,
+                                    0, 0, 0, gsi, false, false, loc);
+ 
+         /* The aggregate copy has prevailed.  Re-load the LHS components
+            from the LHS.  */
          if (access_has_children_p (lacc))
!           generate_subtree_copies (lacc->first_child, lhs,
!                                    0, 0, 0, gsi, true, true, loc);
        }
      }
  
Index: gcc/testsuite/gcc.dg/torture/pr51528.c
===================================================================
*** gcc/testsuite/gcc.dg/torture/pr51528.c      (revision 0)
--- gcc/testsuite/gcc.dg/torture/pr51528.c      (revision 0)
***************
*** 0 ****
--- 1,46 ----
+ /* { dg-do run } */
+ /* { dg-options "-fno-early-inlining" } */
+ 
+ extern void abort (void);
+ 
+ union U
+ {
+   int i;
+   _Bool b;
+ };
+ 
+ _Bool gb;
+ 
+ void  __attribute__ ((noinline))
+ use_bool (union U u)
+ {
+   gb = u.b;
+ }
+ 
+ union U
+ bar (void)
+ {
+   union U u;
+   u.i = 0xFFFE;
+   return u;
+ }
+ 
+ union U  __attribute__ ((noinline))
+ foo (void)
+ {
+   union U u,v;
+ 
+   u.b = 1;
+   use_bool (u);
+   u = bar ();
+ 
+   return u;
+ }
+ 
+ int main (int argc, char **argv)
+ {
+   union U u = foo ();
+   if (u.i != 0xFFFE)
+     abort ();
+   return 0;
+ }

Reply via email to