Hi, When SRA tries to modify an assignment where on one side it should put a new scalar replacement but the other is actually an aggregate with a number of replacements for it, it will generate MEM-REFs into the former replacement which can lead to miscompilations.
This is avoided by the simple patch below. With it, we deal with these situations like with other type-casts that SRA cannot handle: we channel the data through the original variable and the original statement. The testcase is not miscompiled with 4.6 gcc but the bug is just latent there. I have verified the problem goes away on i686-linux. I have bootstrapped and tested the patch on x86_64-linux too. I intend to do a full i686 bootstrap and test but so far have not managed to do it. OK for trunk and 4.6 after it is unfrozen? Thanks, Martin 2011-06-22 Martin Jambor <mjam...@suse.cz> PR tree-optimizations/49516 * tree-sra.c (sra_modify_assign): Choose the safe path for aggregate copies if we also did scalar replacements. * testsuite/g++.dg/tree-ssa/pr49516.C: New test. Index: src/gcc/tree-sra.c =================================================================== --- src.orig/gcc/tree-sra.c +++ src/gcc/tree-sra.c @@ -2804,7 +2804,8 @@ sra_modify_assign (gimple *stmt, gimple_ there to do the copying and then load the scalar replacements of the LHS. This is what the first branch does. */ - if (gimple_has_volatile_ops (*stmt) + if (modify_this_stmt + || gimple_has_volatile_ops (*stmt) || contains_vce_or_bfcref_p (rhs) || contains_vce_or_bfcref_p (lhs)) { Index: src/gcc/testsuite/g++.dg/tree-ssa/pr49516.C =================================================================== --- /dev/null +++ src/gcc/testsuite/g++.dg/tree-ssa/pr49516.C @@ -0,0 +1,86 @@ +/* { dg-do run } */ +/* { dg-options "-O2" } */ + +extern "C" void abort (void); + +typedef int int32; +typedef unsigned int uint32; +typedef unsigned long long uint64; +typedef short int16; + +class Tp { + public: + Tp(int, const int segment, const int index) __attribute__((noinline)); + + inline bool operator==(const Tp& other) const; + inline bool operator!=(const Tp& other) const; + int GetType() const { return type_; } + int GetSegment() const { return segment_; } + int GetIndex() const { return index_; } + private: + inline static bool IsValidSegment(const int segment); + static const int kSegmentBits = 28; + static const int kTypeBits = 4; + static const int kMaxSegment = (1 << kSegmentBits) - 1; + + union { + + struct { + int32 index_; + uint32 segment_ : kSegmentBits; + uint32 type_ : kTypeBits; + }; + struct { + int32 dummy_; + uint32 type_and_segment_; + }; + uint64 value_; + }; +}; + +Tp::Tp(int t, const int segment, const int index) + : index_(index), segment_(segment), type_(t) {} + +inline bool Tp::operator==(const Tp& other) const { + return value_ == other.value_; +} +inline bool Tp::operator!=(const Tp& other) const { + return value_ != other.value_; +} + +class Range { + public: + inline Range(const Tp& position, const int count) __attribute__((always_inline)); + inline Tp GetBeginTokenPosition() const; + inline Tp GetEndTokenPosition() const; + private: + Tp position_; + int count_; + int16 begin_index_; + int16 end_index_; +}; + +inline Range::Range(const Tp& position, + const int count) + : position_(position), count_(count), begin_index_(0), end_index_(0) + { } + +inline Tp Range::GetBeginTokenPosition() const { + return position_; +} +inline Tp Range::GetEndTokenPosition() const { + return Tp(position_.GetType(), position_.GetSegment(), + position_.GetIndex() + count_); +} + +int main () +{ + Range range(Tp(0, 0, 3), 0); + if (!(range.GetBeginTokenPosition() == Tp(0, 0, 3))) + abort (); + + if (!(range.GetEndTokenPosition() == Tp(0, 0, 3))) + abort(); + + return 0; +}