================ @@ -439,82 +444,247 @@ CGRecordLowering::accumulateBitFields(RecordDecl::field_iterator Field, Members.push_back(MemberInfo(bitsToCharUnits(StartBitOffset), MemberInfo::Field, nullptr, *Field)); } - return; + return Field; } - // Check if OffsetInRecord (the size in bits of the current run) is better - // as a single field run. When OffsetInRecord has legal integer width, and - // its bitfield offset is naturally aligned, it is better to make the - // bitfield a separate storage component so as it can be accessed directly - // with lower cost. - auto IsBetterAsSingleFieldRun = [&](uint64_t OffsetInRecord, - uint64_t StartBitOffset) { - if (!Types.getCodeGenOpts().FineGrainedBitfieldAccesses) - return false; - if (OffsetInRecord < 8 || !llvm::isPowerOf2_64(OffsetInRecord) || - !DataLayout.fitsInLegalInteger(OffsetInRecord)) - return false; - // Make sure StartBitOffset is naturally aligned if it is treated as an - // IType integer. - if (StartBitOffset % - Context.toBits(getAlignment(getIntNType(OffsetInRecord))) != - 0) - return false; - return true; - }; + // The SysV ABI can overlap bitfield storage units with both other bitfield + // storage units /and/ other non-bitfield data members. Accessing a sequence + // of bitfields mustn't interfere with adjacent non-bitfields -- they're + // permitted to be accessed in separate threads for instance. + + // We split runs of bit-fields into a sequence of "access units". When we emit + // a load or store of a bit-field, we'll load/store the entire containing + // access unit. As mentioned, the standard requires that these loads and + // stores must not interfere with accesses to other memory locations, and it + // defines the bit-field's memory location as the current run of + // non-zero-width bit-fields. So an access unit must never overlap with + // non-bit-field storage or cross a zero-width bit-field. Otherwise, we're + // free to draw the lines as we see fit. + + // Drawing these lines well can be complicated. LLVM generally can't modify a + // program to access memory that it didn't before, so using very narrow access + // units can prevent the compiler from using optimal access patterns. For + // example, suppose a run of bit-fields occupies four bytes in a struct. If we + // split that into four 1-byte access units, then a sequence of assignments + // that doesn't touch all four bytes may have to be emitted with multiple + // 8-bit stores instead of a single 32-bit store. On the other hand, if we use + // very wide access units, we may find ourselves emitting accesses to + // bit-fields we didn't really need to touch, just because LLVM was unable to + // clean up after us. + + // It is desirable to have access units be aligned powers of 2 no larger than + // a register. (On non-strict alignment ISAs, the alignment requirement can be + // dropped.) A three byte access unit will be accessed using 2-byte and 1-byte + // accesses and bit manipulation. If no bitfield straddles across the two + // separate accesses, it is better to have separate 2-byte and 1-byte access + // units, as then LLVM will not generate unnecessary memory accesses, or bit + // manipulation. Similarly, on a strict-alignment architecture, it is better + // to keep access-units naturally aligned, to avoid similar bit + // manipulation synthesizing larger unaligned accesses. + + // Bitfields that share parts of a single byte are, of necessity, placed in + // the same access unit. That unit will encompass a consecutive run where + // adjacent bitfields share parts of a byte. (The first bitfield of such an + // access unit will start at the beginning of a byte.) + + // We then try and accumulate adjacent access units when the combined unit is + // naturally sized, no larger than a register, and (on a strict alignment + // ISA), naturally aligned. Note that this requires lookahead to one or more + // subsequent access units. For instance, consider a 2-byte access-unit + // followed by 2 1-byte units. We can merge that into a 4-byte access-unit, + // but we would not want to merge a 2-byte followed by a single 1-byte (and no + // available tail padding). We keep track of the best access unit seen so far, + // and use that when we determine we cannot accumulate any more. Then we start + // again at the bitfield following that best one. + + // The accumulation is also prevented when: + // *) it would cross a character-aigned zero-width bitfield, or + // *) fine-grained bitfield access option is in effect. + + CharUnits RegSize = + bitsToCharUnits(Context.getTargetInfo().getRegisterWidth()); + unsigned CharBits = Context.getCharWidth(); + + // Data about the start of the span we're accumulating to create an access + // unit from. Begin is the first bitfield of the span. If Begin is FieldEnd, + // we've not got a current span. The span starts at the BeginOffset character + // boundary. BitSizeSinceBegin is the size (in bits) of the span -- this might + // include padding when we've advanced to a subsequent bitfield run. + RecordDecl::field_iterator Begin = FieldEnd; + CharUnits BeginOffset; + uint64_t BitSizeSinceBegin; + + // The (non-inclusive) end of the largest acceptable access unit we've found + // since Begin. If this is Begin, we're gathering the initial set of bitfields + // of a new span. BestEndOffset is the end of that acceptable access unit -- + // it might extend beyond the last character of the bitfield run, using + // available padding characters. + RecordDecl::field_iterator BestEnd = Begin; + CharUnits BestEndOffset; - // The start field is better as a single field run. - bool StartFieldAsSingleRun = false; for (;;) { - // Check to see if we need to start a new run. - if (Run == FieldEnd) { - // If we're out of fields, return. - if (Field == FieldEnd) + // AtAlignedBoundary is true iff Field is the (potential) start of a new + // span (or the end of the bitfields). When true, LimitOffset is the + // character offset of that span and Barrier indicates whether the that new + // span cannot be merged into the current one. + bool AtAlignedBoundary = false; + bool Barrier = false; + + if (Field != FieldEnd && Field->isBitField()) { + uint64_t BitOffset = getFieldBitOffset(*Field); + if (Begin == FieldEnd) { + // Beginning a new span. + Begin = Field; + BestEnd = Begin; + + assert((BitOffset % CharBits) == 0 && "Not at start of char"); + BeginOffset = bitsToCharUnits(BitOffset); + BitSizeSinceBegin = 0; + } else if ((BitOffset % CharBits) != 0) { + // Bitfield occupies the same character as previous bitfield, it must be + // part of the same span. This can include zero-length bitfields, should + // the target not align them to character boundaries. Such non-alignment + // is at variance with the C++ std that requires zero-length bitfields + // be a barrier between access units. But of course we can't achieve + // that in the middle of a character. + assert(BitOffset == Context.toBits(BeginOffset) + BitSizeSinceBegin && + "Concatenating non-contiguous bitfields"); + } else { + // Bitfield potentially begins a new span. This includes zero-length + // bitfields on non-aligning targets that lie at character boundaries + // (those are barriers to merging). + if (Field->isZeroLengthBitField(Context)) + Barrier = true; + AtAlignedBoundary = true; + } + } else { + // We've reached the end of the bitfield run. Either we're done, or this + // is a barrier for the current span. + if (Begin == FieldEnd) break; - // Any non-zero-length bitfield can start a new run. - if (!Field->isZeroLengthBitField(Context)) { - Run = Field; - StartBitOffset = getFieldBitOffset(*Field); - Tail = StartBitOffset + Field->getBitWidthValue(Context); - StartFieldAsSingleRun = IsBetterAsSingleFieldRun(Tail - StartBitOffset, - StartBitOffset); + + Barrier = true; + AtAlignedBoundary = true; + } + + // InstallBest indicates whether we should create an access unit for the + // current best span: fields [Begin, BestEnd) occupying characters + // [BeginOffset, BestEndOffset). + bool InstallBest = false; + if (AtAlignedBoundary) { + // Field is the start of a new span or the end of the bitfields. The + // just-seen span now extends to BitSizeSinceBegin. + + // Determine if we can accumulate that just-seen span into the current + // accumulation. + CharUnits AccessSize = bitsToCharUnits(BitSizeSinceBegin + CharBits - 1); + if (BestEnd == Begin) { + // This is the initial run at the start of a new span. By definition, + // this is the best seen so far. + BestEnd = Field; + BestEndOffset = BeginOffset + AccessSize; + if (Types.getCodeGenOpts().FineGrainedBitfieldAccesses) + // Fine-grained access, so no merging of spans. + InstallBest = true; + else if (!BitSizeSinceBegin) + // A zero-sized initial span -- this will install nothing and reset + // for another. + InstallBest = true; + } else if (AccessSize > RegSize) + // Accumulating the just-seen span would create a multi-register access + // unit, which would increase register pressure. + InstallBest = true; + + if (!InstallBest) { + // Determine if accumulating the just-seen span will create an expensive + // access-unit or not. + llvm::Type *Type = getIntNType(Context.toBits(AccessSize)); + if (!Context.getTargetInfo().hasCheapUnalignedBitFieldAccess()) { + // Unaligned accesses are expensive. Only accumulate if the new unit + // is naturally aligned. Otherwise install the best we have, which is + // either the initial access unit (can't do better), or a naturally + // aligned subsequent accumulation. + CharUnits Align = getAlignment(Type); + if (Align > Layout.getAlignment()) + // The alignment required is greater than the containing structure + // itself. + InstallBest = true; + else if (!BeginOffset.isMultipleOf(Align)) + // The access unit is not at a naturally aligned offset within the + // structure. + InstallBest = true; + } + + if (!InstallBest) { + // Find the next used storage offset to determine what the limit of + // the current span is. That's either the offset of the next field + // with storage (which might be Field itself) or the end of the + // non-reusable tail padding. + CharUnits LimitOffset; + for (auto Probe = Field; Probe != FieldEnd; ++Probe) + if (!Probe->isZeroSize(Context)) { + // A member with storage sets the limit. + assert((getFieldBitOffset(*Probe) % CharBits) == 0 && + "Next storage is not byte-aligned"); + LimitOffset = bitsToCharUnits(getFieldBitOffset(*Probe)); + goto FoundLimit; + } + // We reached the end of the fields. We can't necessarily use tail + // padding in C++ structs, so the NonVirtual size is what we must + // use there. + LimitOffset = RD ? Layout.getNonVirtualSize() : Layout.getDataSize(); + FoundLimit:; + + CharUnits TypeSize = getSize(Type); + if (BeginOffset + TypeSize <= LimitOffset) { + // There is space before LimitOffset to create a naturally-sized + // access unit. + BestEndOffset = BeginOffset + TypeSize; + BestEnd = Field; + } + + if (Barrier) + // The next field is a barrier that we cannot merge-across. + InstallBest = true; + else + // LimitOffset is the offset of the (aligned) next bitfield in this ---------------- rjmccall wrote:
```suggestion // Otherwise, we know we're not installing. Update the bit size // the current span to go all the way to LimitOffset. // LimitOffset is the offset of the (aligned) next bitfield in this ``` https://github.com/llvm/llvm-project/pull/65742 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits