Timm =?utf-8?q?Bäder?= <tbae...@redhat.com>, Timm =?utf-8?q?Bäder?= <tbae...@redhat.com> Message-ID: In-Reply-To: <llvm.org/llvm/llvm-project/pull/117...@github.com>
llvmbot wrote: <!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang Author: Timm Baeder (tbaederr) <details> <summary>Changes</summary> --- Patch is 46.57 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/117179.diff 10 Files Affected: - (added) clang/lib/AST/ByteCode/BitcastBuffer.cpp (+95) - (added) clang/lib/AST/ByteCode/BitcastBuffer.h (+86) - (modified) clang/lib/AST/ByteCode/Boolean.h (+1-3) - (modified) clang/lib/AST/ByteCode/Integral.h (+1) - (modified) clang/lib/AST/ByteCode/InterpBuiltinBitCast.cpp (+105-161) - (modified) clang/lib/AST/CMakeLists.txt (+1) - (added) clang/test/AST/ByteCode/builtin-bit-cast-bitfields.cpp (+437) - (modified) clang/test/AST/ByteCode/builtin-bit-cast.cpp (+19-90) - (added) clang/unittests/AST/ByteCode/BitcastBuffer.cpp (+87) - (modified) clang/unittests/AST/ByteCode/CMakeLists.txt (+1) ``````````diff diff --git a/clang/lib/AST/ByteCode/BitcastBuffer.cpp b/clang/lib/AST/ByteCode/BitcastBuffer.cpp new file mode 100644 index 00000000000000..3793e0f4f2dbe2 --- /dev/null +++ b/clang/lib/AST/ByteCode/BitcastBuffer.cpp @@ -0,0 +1,95 @@ +//===-------------------- Bitcastbuffer.cpp ---------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#include "BitcastBuffer.h" + +using namespace clang; +using namespace clang::interp; + +/// Returns the value of the bit in the given sequence of bytes. +static inline bool bitof(const std::byte *B, Bits BitIndex) { + return (B[BitIndex.roundToBytes()] & + (std::byte{1} << (BitIndex.getOffsetInByte()))) != std::byte{0}; +} + +void BitcastBuffer::pushData(const std::byte *In, Bits BitOffset, Bits BitWidth, + Endian TargetEndianness) { + for (unsigned It = 0; It != BitWidth.getQuantity(); ++It) { + bool BitValue = bitof(In, Bits(It)); + if (!BitValue) + continue; + + Bits DstBit; + if (TargetEndianness == Endian::Little) + DstBit = BitOffset + Bits(It); + else + DstBit = size() - BitOffset - BitWidth + Bits(It); + + size_t DstByte = DstBit.roundToBytes(); + Data[DstByte] |= std::byte{1} << DstBit.getOffsetInByte(); + } +} + +std::unique_ptr<std::byte[]> +BitcastBuffer::copyBits(Bits BitOffset, Bits BitWidth, Bits FullBitWidth, + Endian TargetEndianness) const { + assert(BitWidth.getQuantity() <= FullBitWidth.getQuantity()); + assert(FullBitWidth.isFullByte()); + auto Out = std::make_unique<std::byte[]>(FullBitWidth.roundToBytes()); + + for (unsigned It = 0; It != BitWidth.getQuantity(); ++It) { + Bits BitIndex; + if (TargetEndianness == Endian::Little) + BitIndex = BitOffset + Bits(It); + else + BitIndex = size() - BitWidth - BitOffset + Bits(It); + + bool BitValue = bitof(Data.get(), BitIndex); + if (!BitValue) + continue; + + Bits DstBit = Bits(It); + size_t DstByte = DstBit.roundToBytes(); + Out[DstByte] |= std::byte{1} << DstBit.getOffsetInByte(); + } + + return Out; +} + +#if 0 + template<typename T> + static std::string hex(T t) { + std::stringstream stream; + stream << std::hex << (int)t; + return std::string(stream.str()); + } + + + void BitcastBuffer::dump(bool AsHex = true) const { + llvm::errs() << "LSB\n "; + unsigned LineLength = 0; + for (unsigned I = 0; I != (FinalBitSize / 8); ++I) { + std::byte B = Data[I]; + if (AsHex) { + std::stringstream stream; + stream << std::hex << (int)B; + llvm::errs() << stream.str(); + LineLength += stream.str().size() + 1; + } else { + llvm::errs() << std::bitset<8>((int)B).to_string(); + LineLength += 8 + 1; + // llvm::errs() << (int)B; + } + llvm::errs() << ' '; + } + llvm::errs() << '\n'; + + for (unsigned I = 0; I != LineLength; ++I) + llvm::errs() << ' '; + llvm::errs() << "MSB\n"; + } +#endif diff --git a/clang/lib/AST/ByteCode/BitcastBuffer.h b/clang/lib/AST/ByteCode/BitcastBuffer.h new file mode 100644 index 00000000000000..19a7e1151df4c3 --- /dev/null +++ b/clang/lib/AST/ByteCode/BitcastBuffer.h @@ -0,0 +1,86 @@ +//===--------------------- BitcastBuffer.h ----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_CLANG_AST_INTERP_BITCAST_BUFFER_H +#define LLVM_CLANG_AST_INTERP_BITCAST_BUFFER_H + +#include <cassert> +#include <cstddef> +#include <memory> + +namespace clang { +namespace interp { + +enum class Endian { Little, Big }; + +/// A quantity in bits. +struct Bits { + size_t N = 0; + Bits() = default; + static Bits zero() { return Bits(0); } + explicit Bits(size_t Quantity) : N(Quantity) {} + size_t getQuantity() const { return N; } + size_t roundToBytes() const { return N / 8; } + size_t getOffsetInByte() const { return N % 8; } + bool isFullByte() const { return N % 8 == 0; } + bool nonZero() const { return N != 0; } + + Bits operator-(Bits Other) { return Bits(N - Other.N); } + Bits operator+(Bits Other) { return Bits(N + Other.N); } + Bits operator+=(size_t O) { + N += O; + return *this; + } +}; + +/// A quantity in bytes. +struct Bytes { + size_t N; + explicit Bytes(size_t Quantity) : N(Quantity) {} + size_t getQuantity() const { return N; } + Bits toBits() const { return Bits(N * 8); } +}; + +/// Track what bits have been initialized to known values and which ones +/// have indeterminate value. +struct BitcastBuffer { + Bits FinalBitSize; + std::unique_ptr<std::byte[]> Data; + + BitcastBuffer(Bits FinalBitSize) : FinalBitSize(FinalBitSize) { + assert(FinalBitSize.isFullByte()); + unsigned ByteSize = FinalBitSize.roundToBytes(); + Data = std::make_unique<std::byte[]>(ByteSize); + } + + /// Returns the buffer size in bits. + Bits size() const { return FinalBitSize; } + + /// Returns \c true if all bits in the buffer have been initialized. + bool allInitialized() const { + // FIXME: Implement. + return true; + } + + /// Push \p BitWidth bits at \p BitOffset from \p In into the buffer. + /// \p TargetEndianness is the endianness of the target we're compiling for. + /// \p In must hold at least \p BitWidth many bits. + void pushData(const std::byte *In, Bits BitOffset, Bits BitWidth, + Endian TargetEndianness); + + /// Copy \p BitWidth bits at offset \p BitOffset from the buffer. + /// \p TargetEndianness is the endianness of the target we're compiling for. + /// + /// The returned output holds exactly (\p FullBitWidth / 8) bytes. + std::unique_ptr<std::byte[]> copyBits(Bits BitOffset, Bits BitWidth, + Bits FullBitWidth, + Endian TargetEndianness) const; +}; + +} // namespace interp +} // namespace clang +#endif diff --git a/clang/lib/AST/ByteCode/Boolean.h b/clang/lib/AST/ByteCode/Boolean.h index 78d75e75c7531a..8380e85865ac55 100644 --- a/clang/lib/AST/ByteCode/Boolean.h +++ b/clang/lib/AST/ByteCode/Boolean.h @@ -82,9 +82,7 @@ class Boolean final { Boolean truncate(unsigned TruncBits) const { return *this; } static Boolean bitcastFromMemory(const std::byte *Buff, unsigned BitWidth) { - // Boolean width is currently always 8 for all supported targets. If this - // changes we need to get the bool width from the target info. - assert(BitWidth == 8); + // Just load the first byte. bool Val = static_cast<bool>(*Buff); return Boolean(Val); } diff --git a/clang/lib/AST/ByteCode/Integral.h b/clang/lib/AST/ByteCode/Integral.h index ca3674263aef4f..bb1688a8a7622c 100644 --- a/clang/lib/AST/ByteCode/Integral.h +++ b/clang/lib/AST/ByteCode/Integral.h @@ -181,6 +181,7 @@ template <unsigned Bits, bool Signed> class Integral final { } Integral truncate(unsigned TruncBits) const { + assert(TruncBits >= 1); if (TruncBits >= Bits) return *this; const ReprT BitMask = (ReprT(1) << ReprT(TruncBits)) - 1; diff --git a/clang/lib/AST/ByteCode/InterpBuiltinBitCast.cpp b/clang/lib/AST/ByteCode/InterpBuiltinBitCast.cpp index b1230f92ddf1d4..2957b8a25ab958 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltinBitCast.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltinBitCast.cpp @@ -6,6 +6,7 @@ // //===----------------------------------------------------------------------===// #include "InterpBuiltinBitCast.h" +#include "BitcastBuffer.h" #include "Boolean.h" #include "Context.h" #include "Floating.h" @@ -21,9 +22,19 @@ using namespace clang; using namespace clang::interp; +/// Implement __builtin_bit_cast and related operations. +/// Since our internal representation for data is more complex than +/// something we can simply memcpy or memcmp, we first bitcast all the data +/// into a buffer, which we then later use to copy the data into the target. + +// TODO: +// - Try to minimize heap allocations. +// - Optimize the common case of only pushing and pulling full +// bytes to/from the buffer. + /// Used to iterate over pointer fields. using DataFunc = llvm::function_ref<bool(const Pointer &P, PrimType Ty, - size_t BitOffset, bool PackedBools)>; + Bits BitOffset, bool PackedBools)>; #define BITCAST_TYPE_SWITCH(Expr, B) \ do { \ @@ -61,116 +72,44 @@ using DataFunc = llvm::function_ref<bool(const Pointer &P, PrimType Ty, } \ } while (0) -static bool bitof(std::byte B, unsigned BitIndex) { - return (B & (std::byte{1} << BitIndex)) != std::byte{0}; -} - static void swapBytes(std::byte *M, size_t N) { for (size_t I = 0; I != (N / 2); ++I) std::swap(M[I], M[N - 1 - I]); } -/// Track what bits have been initialized to known values and which ones -/// have indeterminate value. -/// All offsets are in bits. -struct BitcastBuffer { - size_t SizeInBits = 0; - llvm::SmallVector<std::byte> Data; - - BitcastBuffer() = default; - - size_t size() const { return SizeInBits; } - - const std::byte *data() const { return Data.data(); } - - std::byte *getBytes(unsigned BitOffset) const { - assert(BitOffset % 8 == 0); - assert(BitOffset < SizeInBits); - return const_cast<std::byte *>(data() + (BitOffset / 8)); - } - - bool allInitialized() const { - // FIXME: Implement. - return true; - } - - bool atByteBoundary() const { return (Data.size() * 8) == SizeInBits; } - - void pushBit(bool Value) { - if (atByteBoundary()) - Data.push_back(std::byte{0}); - - if (Value) - Data.back() |= (std::byte{1} << (SizeInBits % 8)); - ++SizeInBits; - } - - void pushData(const std::byte *data, size_t BitWidth, bool BigEndianTarget) { - bool OnlyFullBytes = BitWidth % 8 == 0; - unsigned NBytes = BitWidth / 8; - - size_t BitsHandled = 0; - // Read all full bytes first - for (size_t I = 0; I != NBytes; ++I) { - std::byte B = - BigEndianTarget ? data[NBytes - OnlyFullBytes - I] : data[I]; - for (unsigned X = 0; X != 8; ++X) { - pushBit(bitof(B, X)); - ++BitsHandled; - } - } - - if (BitsHandled == BitWidth) - return; - - // Rest of the bits. - assert((BitWidth - BitsHandled) < 8); - std::byte B = BigEndianTarget ? data[0] : data[NBytes]; - for (size_t I = 0, E = (BitWidth - BitsHandled); I != E; ++I) { - pushBit(bitof(B, I)); - ++BitsHandled; - } - - assert(BitsHandled == BitWidth); - } -}; - -/// We use this to recursively iterate over all fields and elemends of a pointer +/// We use this to recursively iterate over all fields and elements of a pointer /// and extract relevant data for a bitcast. -static bool enumerateData(const Pointer &P, const Context &Ctx, size_t Offset, +static bool enumerateData(const Pointer &P, const Context &Ctx, Bits Offset, DataFunc F) { const Descriptor *FieldDesc = P.getFieldDesc(); assert(FieldDesc); // Primitives. if (FieldDesc->isPrimitive()) - return F(P, FieldDesc->getPrimType(), Offset, false); + return F(P, FieldDesc->getPrimType(), Offset, /*PackedBools=*/false); // Primitive arrays. if (FieldDesc->isPrimitiveArray()) { - bool BigEndianTarget = Ctx.getASTContext().getTargetInfo().isBigEndian(); QualType ElemType = FieldDesc->getElemQualType(); size_t ElemSizeInBits = Ctx.getASTContext().getTypeSize(ElemType); PrimType ElemT = *Ctx.classify(ElemType); // Special case, since the bools here are packed. bool PackedBools = FieldDesc->getType()->isExtVectorBoolType(); + unsigned NumElems = FieldDesc->getNumElems(); bool Ok = true; - for (unsigned I = 0; I != FieldDesc->getNumElems(); ++I) { - unsigned Index = BigEndianTarget ? (FieldDesc->getNumElems() - 1 - I) : I; - Ok = Ok && F(P.atIndex(Index), ElemT, Offset, PackedBools); - Offset += ElemSizeInBits; + for (unsigned I = 0; I != NumElems; ++I) { + Ok = Ok && F(P.atIndex(I), ElemT, Offset, PackedBools); + Offset += PackedBools ? 1 : ElemSizeInBits; } return Ok; } // Composite arrays. if (FieldDesc->isCompositeArray()) { - bool BigEndianTarget = Ctx.getASTContext().getTargetInfo().isBigEndian(); QualType ElemType = FieldDesc->getElemQualType(); size_t ElemSizeInBits = Ctx.getASTContext().getTypeSize(ElemType); for (unsigned I = 0; I != FieldDesc->getNumElems(); ++I) { - unsigned Index = BigEndianTarget ? (FieldDesc->getNumElems() - 1 - I) : I; - enumerateData(P.atIndex(Index).narrow(), Ctx, Offset, F); + enumerateData(P.atIndex(I).narrow(), Ctx, Offset, F); Offset += ElemSizeInBits; } return true; @@ -178,39 +117,27 @@ static bool enumerateData(const Pointer &P, const Context &Ctx, size_t Offset, // Records. if (FieldDesc->isRecord()) { - bool BigEndianTarget = Ctx.getASTContext().getTargetInfo().isBigEndian(); const Record *R = FieldDesc->ElemRecord; const ASTRecordLayout &Layout = Ctx.getASTContext().getASTRecordLayout(R->getDecl()); bool Ok = true; - auto enumerateFields = [&]() -> void { - for (unsigned I = 0, N = R->getNumFields(); I != N; ++I) { - const Record::Field *Fi = - R->getField(BigEndianTarget ? (N - 1 - I) : I); - Pointer Elem = P.atField(Fi->Offset); - size_t BitOffset = - Offset + Layout.getFieldOffset(Fi->Decl->getFieldIndex()); - Ok = Ok && enumerateData(Elem, Ctx, BitOffset, F); - } - }; - auto enumerateBases = [&]() -> void { - for (unsigned I = 0, N = R->getNumBases(); I != N; ++I) { - const Record::Base *B = R->getBase(BigEndianTarget ? (N - 1 - I) : I); - Pointer Elem = P.atField(B->Offset); - CharUnits ByteOffset = - Layout.getBaseClassOffset(cast<CXXRecordDecl>(B->Decl)); - size_t BitOffset = Offset + Ctx.getASTContext().toBits(ByteOffset); - Ok = Ok && enumerateData(Elem, Ctx, BitOffset, F); - } - }; - - if (BigEndianTarget) { - enumerateFields(); - enumerateBases(); - } else { - enumerateBases(); - enumerateFields(); + for (const Record::Field &Fi : R->fields()) { + Pointer Elem = P.atField(Fi.Offset); + Bits BitOffset = + Offset + Bits(Layout.getFieldOffset(Fi.Decl->getFieldIndex())); + Ok = Ok && enumerateData(Elem, Ctx, BitOffset, F); + } + for (const Record::Base &B : R->bases()) { + Pointer Elem = P.atField(B.Offset); + CharUnits ByteOffset = + Layout.getBaseClassOffset(cast<CXXRecordDecl>(B.Decl)); + Bits BitOffset = Offset + Bits(Ctx.getASTContext().toBits(ByteOffset)); + Ok = Ok && enumerateData(Elem, Ctx, BitOffset, F); + // FIXME: We should only (need to) do this when bitcasting OUT of the + // buffer, not when copying data into it. + if (Ok) + Elem.initialize(); } return Ok; @@ -221,7 +148,7 @@ static bool enumerateData(const Pointer &P, const Context &Ctx, size_t Offset, static bool enumeratePointerFields(const Pointer &P, const Context &Ctx, DataFunc F) { - return enumerateData(P, Ctx, 0, F); + return enumerateData(P, Ctx, Bits::zero(), F); } // This function is constexpr if and only if To, From, and the types of @@ -295,13 +222,12 @@ static bool CheckBitcastType(InterpState &S, CodePtr OpPC, QualType T, static bool readPointerToBuffer(const Context &Ctx, const Pointer &FromPtr, BitcastBuffer &Buffer, bool ReturnOnUninit) { const ASTContext &ASTCtx = Ctx.getASTContext(); - bool SwapData = (ASTCtx.getTargetInfo().isLittleEndian() != - llvm::sys::IsLittleEndianHost); - bool BigEndianTarget = ASTCtx.getTargetInfo().isBigEndian(); + Endian TargetEndianness = + ASTCtx.getTargetInfo().isLittleEndian() ? Endian::Little : Endian::Big; return enumeratePointerFields( FromPtr, Ctx, - [&](const Pointer &P, PrimType T, size_t BitOffset, + [&](const Pointer &P, PrimType T, Bits BitOffset, bool PackedBools) -> bool { if (!P.isInitialized()) { assert(false && "Implement uninitialized value tracking"); @@ -314,43 +240,39 @@ static bool readPointerToBuffer(const Context &Ctx, const Pointer &FromPtr, assert(false && "Implement casting to pointer types"); CharUnits ObjectReprChars = ASTCtx.getTypeSizeInChars(P.getType()); - unsigned BitWidth = ASTCtx.toBits(ObjectReprChars); - llvm::SmallVector<std::byte> Buff(ObjectReprChars.getQuantity()); + Bits BitWidth = Bits(ASTCtx.toBits(ObjectReprChars)); + Bits FullBitWidth = BitWidth; + auto Buff = + std::make_unique<std::byte[]>(ObjectReprChars.getQuantity()); // Work around floating point types that contain unused padding bytes. // This is really just `long double` on x86, which is the only // fundamental type with padding bytes. if (T == PT_Float) { const Floating &F = P.deref<Floating>(); - unsigned NumBits = - llvm::APFloatBase::getSizeInBits(F.getAPFloat().getSemantics()); - assert(NumBits % 8 == 0); - assert(NumBits <= (ObjectReprChars.getQuantity() * 8)); - F.bitcastToMemory(Buff.data()); + Bits NumBits = Bits( + llvm::APFloatBase::getSizeInBits(F.getAPFloat().getSemantics())); + assert(NumBits.isFullByte()); + assert(NumBits.getQuantity() <= FullBitWidth.getQuantity()); + F.bitcastToMemory(Buff.get()); // Now, only (maybe) swap the actual size of the float, excluding the // padding bits. - if (SwapData) - swapBytes(Buff.data(), NumBits / 8); + if (llvm::sys::IsBigEndianHost) + swapBytes(Buff.get(), NumBits.roundToBytes()); } else { if (const FieldDecl *FD = P.getField(); FD && FD->isBitField()) - BitWidth = FD->getBitWidthValue(ASTCtx); + BitWidth = Bits(std::min(FD->getBitWidthValue(ASTCtx), + (unsigned)FullBitWidth.getQuantity())); else if (T == PT_Bool && PackedBools) - BitWidth = 1; - - BITCAST_TYPE_SWITCH(T, { - T Val = P.deref<T>(); - Val.bitcastToMemory(Buff.data()); - }); - if (SwapData) - swapBytes(Buff.data(), ObjectReprChars.getQuantity()); - } + BitWidth = Bits(1); - if (BitWidth != (Buff.size() * 8) && BigEndianTarget) { - Buffer.pushData(Buff.data() + (Buff.size() - 1 - (BitWidth / 8)), - BitWidth, BigEndianTarget); - } else { - Buffer.pushData(Buff.data(), BitWidth, BigEndianTarget); + BITCAST_TYPE_SWITCH(T, { P.deref<T>().bitcastToMemory(Buff.get()); }); + + if (llvm::sys::IsBigEndianHost) + swapBytes(Buff.get(), FullBitWidth.roundToBytes()); } + + Buffer.pushData(Buff.get(), BitOffset, BitWidth, TargetEndianness); return true; }); } @@ -362,16 +284,21 @@ bool clang::interp::DoBitCast(InterpState &S, CodePtr OpPC, const Pointer &Ptr, assert(Ptr.isBlockPointer()); assert(Buff); - BitcastBuffer Buffer; + Bits BitSize = Bytes(BuffSize).toBits(); + BitcastBuffer Buffer(BitSize); if (!CheckBitcastType(S, OpPC, Ptr.getType(), /*IsToType=*/false)) return false; bool Success = readPointerToBuffer(S.getContext(), Ptr, Buffer, /*ReturnOnUninit=*/false); - assert(Buffer.size() == BuffSize * 8); - HasIndeterminateBits = !Buffer.allInitialized(); - std::memcpy(Buff, Buffer.data(), BuffSize); + + const... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/117179 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits