https://github.com/tbaederr updated https://github.com/llvm/llvm-project/pull/116843
>From caf66b089176d1952fa8935db3532eb3ef112b56 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbae...@redhat.com> Date: Fri, 8 Nov 2024 14:37:56 +0100 Subject: [PATCH] [clang][bytecode] Handle bitcasts involving bitfields --- clang/lib/AST/ByteCode/BitcastBuffer.h | 122 +++++ clang/lib/AST/ByteCode/Boolean.h | 4 +- clang/lib/AST/ByteCode/Integral.h | 1 + .../lib/AST/ByteCode/InterpBuiltinBitCast.cpp | 247 ++++------ .../ByteCode/builtin-bit-cast-bitfields.cpp | 433 ++++++++++++++++++ clang/test/AST/ByteCode/builtin-bit-cast.cpp | 98 +--- .../unittests/AST/ByteCode/BitcastBuffer.cpp | 80 ++++ clang/unittests/AST/ByteCode/CMakeLists.txt | 1 + 8 files changed, 744 insertions(+), 242 deletions(-) create mode 100644 clang/lib/AST/ByteCode/BitcastBuffer.h create mode 100644 clang/test/AST/ByteCode/builtin-bit-cast-bitfields.cpp create mode 100644 clang/unittests/AST/ByteCode/BitcastBuffer.cpp diff --git a/clang/lib/AST/ByteCode/BitcastBuffer.h b/clang/lib/AST/ByteCode/BitcastBuffer.h new file mode 100644 index 00000000000000..62898f400afc56 --- /dev/null +++ b/clang/lib/AST/ByteCode/BitcastBuffer.h @@ -0,0 +1,122 @@ +//===--------------------- BitcastBuffer.h ----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_CLANG_AST_INTERP_BITCAST_BUFFER_H +#define LLVM_CLANG_AST_INTERP_BITCAST_BUFFER_H + +#include <cassert> +#include <memory> + +enum class Endian { Little, Big }; + +static inline bool bitof(std::byte B, unsigned BitIndex) { + assert(BitIndex < 8); + return (B & (std::byte{1} << BitIndex)) != std::byte{0}; +} + +static inline bool fullByte(unsigned N) { return N % 8 == 0; } + +/// Track what bits have been initialized to known values and which ones +/// have indeterminate value. +/// All offsets are in bits. +struct BitcastBuffer { + size_t FinalBitSize = 0; + std::unique_ptr<std::byte[]> Data; + + BitcastBuffer(size_t FinalBitSize) : FinalBitSize(FinalBitSize) { + assert(fullByte(FinalBitSize)); + unsigned ByteSize = FinalBitSize / 8; + Data = std::make_unique<std::byte[]>(ByteSize); + } + + size_t size() const { return FinalBitSize; } + + bool allInitialized() const { + // FIXME: Implement. + return true; + } + + void pushData(const std::byte *data, size_t BitOffset, size_t BitWidth, + Endian DataEndianness) { + for (unsigned It = 0; It != BitWidth; ++It) { + bool BitValue; + BitValue = bitof(data[It / 8], It % 8); + if (!BitValue) + continue; + + unsigned DstBit; + if (DataEndianness == Endian::Little) + DstBit = BitOffset + It; + else + DstBit = size() - BitOffset - BitWidth + It; + + unsigned DstByte = (DstBit / 8); + Data[DstByte] |= std::byte{1} << (DstBit % 8); + } + } + + std::unique_ptr<std::byte[]> copyBits(unsigned BitOffset, unsigned BitWidth, + unsigned FullBitWidth, + Endian DataEndianness) const { + assert(BitWidth <= FullBitWidth); + assert(fullByte(FullBitWidth)); + auto Out = std::make_unique<std::byte[]>(FullBitWidth / 8); + + for (unsigned It = 0; It != BitWidth; ++It) { + unsigned BitIndex; + if (DataEndianness == Endian::Little) + BitIndex = BitOffset + It; + else + BitIndex = size() - BitWidth - BitOffset + It; + + bool BitValue = bitof(Data[BitIndex / 8], BitIndex % 8); + if (!BitValue) + continue; + unsigned DstBit = It; + unsigned DstByte = (DstBit / 8); + Out[DstByte] |= std::byte{1} << (DstBit % 8); + } + + return Out; + } + +#if 0 + template<typename T> + static std::string hex(T t) { + std::stringstream stream; + stream << std::hex << (int)t; + return std::string(stream.str()); + } + + + void dump(bool AsHex = true) const { + llvm::errs() << "LSB\n "; + unsigned LineLength = 0; + for (unsigned I = 0; I != (FinalBitSize / 8); ++I) { + std::byte B = Data[I]; + if (AsHex) { + std::stringstream stream; + stream << std::hex << (int)B; + llvm::errs() << stream.str(); + LineLength += stream.str().size() + 1; + } else { + llvm::errs() << std::bitset<8>((int)B).to_string(); + LineLength += 8 + 1; + // llvm::errs() << (int)B; + } + llvm::errs() << ' '; + } + llvm::errs() << '\n'; + + for (unsigned I = 0; I != LineLength; ++I) + llvm::errs() << ' '; + llvm::errs() << "MSB\n"; + } +#endif +}; + +#endif diff --git a/clang/lib/AST/ByteCode/Boolean.h b/clang/lib/AST/ByteCode/Boolean.h index 78d75e75c7531a..8380e85865ac55 100644 --- a/clang/lib/AST/ByteCode/Boolean.h +++ b/clang/lib/AST/ByteCode/Boolean.h @@ -82,9 +82,7 @@ class Boolean final { Boolean truncate(unsigned TruncBits) const { return *this; } static Boolean bitcastFromMemory(const std::byte *Buff, unsigned BitWidth) { - // Boolean width is currently always 8 for all supported targets. If this - // changes we need to get the bool width from the target info. - assert(BitWidth == 8); + // Just load the first byte. bool Val = static_cast<bool>(*Buff); return Boolean(Val); } diff --git a/clang/lib/AST/ByteCode/Integral.h b/clang/lib/AST/ByteCode/Integral.h index ca3674263aef4f..bb1688a8a7622c 100644 --- a/clang/lib/AST/ByteCode/Integral.h +++ b/clang/lib/AST/ByteCode/Integral.h @@ -181,6 +181,7 @@ template <unsigned Bits, bool Signed> class Integral final { } Integral truncate(unsigned TruncBits) const { + assert(TruncBits >= 1); if (TruncBits >= Bits) return *this; const ReprT BitMask = (ReprT(1) << ReprT(TruncBits)) - 1; diff --git a/clang/lib/AST/ByteCode/InterpBuiltinBitCast.cpp b/clang/lib/AST/ByteCode/InterpBuiltinBitCast.cpp index 7e8853d3469317..ac60178467feb3 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltinBitCast.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltinBitCast.cpp @@ -6,6 +6,7 @@ // //===----------------------------------------------------------------------===// #include "InterpBuiltinBitCast.h" +#include "BitcastBuffer.h" #include "Boolean.h" #include "Context.h" #include "Floating.h" @@ -17,10 +18,21 @@ #include "clang/AST/ASTContext.h" #include "clang/AST/RecordLayout.h" #include "clang/Basic/TargetInfo.h" +#include <cmath> using namespace clang; using namespace clang::interp; +/// Implement __builtin_bit_cast and related operations. +/// Since our internal representation for data is more complex than +/// something we can simply memcpy or memcmp, we first bitcast all the data +/// into a buffer, which we then later use to copy the data into the target. + +// TODO: +// - Try to minimize heap allocations. +// - Optimize the common case of only pushing and pulling full +// bytes to/from the buffer. + /// Used to iterate over pointer fields. using DataFunc = llvm::function_ref<bool(const Pointer &P, PrimType Ty, size_t BitOffset, bool PackedBools)>; @@ -61,81 +73,12 @@ using DataFunc = llvm::function_ref<bool(const Pointer &P, PrimType Ty, } \ } while (0) -static bool bitof(std::byte B, unsigned BitIndex) { - return (B & (std::byte{1} << BitIndex)) != std::byte{0}; -} - static void swapBytes(std::byte *M, size_t N) { for (size_t I = 0; I != (N / 2); ++I) std::swap(M[I], M[N - 1 - I]); } -/// Track what bits have been initialized to known values and which ones -/// have indeterminate value. -/// All offsets are in bits. -struct BitcastBuffer { - size_t SizeInBits = 0; - llvm::SmallVector<std::byte> Data; - - BitcastBuffer() = default; - - size_t size() const { return SizeInBits; } - - const std::byte *data() const { return Data.data(); } - - std::byte *getBytes(unsigned BitOffset) const { - assert(BitOffset % 8 == 0); - assert(BitOffset < SizeInBits); - return const_cast<std::byte *>(data() + (BitOffset / 8)); - } - - bool allInitialized() const { - // FIXME: Implement. - return true; - } - - bool atByteBoundary() const { return (Data.size() * 8) == SizeInBits; } - - void pushBit(bool Value) { - if (atByteBoundary()) - Data.push_back(std::byte{0}); - - if (Value) - Data.back() |= (std::byte{1} << (SizeInBits % 8)); - ++SizeInBits; - } - - void pushData(const std::byte *data, size_t BitWidth, bool BigEndianTarget) { - bool OnlyFullBytes = BitWidth % 8 == 0; - unsigned NBytes = BitWidth / 8; - - size_t BitsHandled = 0; - // Read all full bytes first - for (size_t I = 0; I != NBytes; ++I) { - std::byte B = - BigEndianTarget ? data[NBytes - OnlyFullBytes - I] : data[I]; - for (unsigned X = 0; X != 8; ++X) { - pushBit(bitof(B, X)); - ++BitsHandled; - } - } - - if (BitsHandled == BitWidth) - return; - - // Rest of the bits. - assert((BitWidth - BitsHandled) < 8); - std::byte B = BigEndianTarget ? data[0] : data[NBytes]; - for (size_t I = 0, E = (BitWidth - BitsHandled); I != E; ++I) { - pushBit(bitof(B, I)); - ++BitsHandled; - } - - assert(BitsHandled == BitWidth); - } -}; - -/// We use this to recursively iterate over all fields and elemends of a pointer +/// We use this to recursively iterate over all fields and elements of a pointer /// and extract relevant data for a bitcast. static bool enumerateData(const Pointer &P, const Context &Ctx, size_t Offset, DataFunc F) { @@ -144,32 +87,31 @@ static bool enumerateData(const Pointer &P, const Context &Ctx, size_t Offset, // Primitives. if (FieldDesc->isPrimitive()) - return F(P, FieldDesc->getPrimType(), Offset, false); + return F(P, FieldDesc->getPrimType(), Offset, /*PackedBools=*/false); // Primitive arrays. if (FieldDesc->isPrimitiveArray()) { - bool BigEndianTarget = Ctx.getASTContext().getTargetInfo().isBigEndian(); QualType ElemType = FieldDesc->getElemQualType(); size_t ElemSizeInBits = Ctx.getASTContext().getTypeSize(ElemType); PrimType ElemT = *Ctx.classify(ElemType); // Special case, since the bools here are packed. bool PackedBools = FieldDesc->getType()->isExtVectorBoolType(); + unsigned NumElems = FieldDesc->getNumElems(); bool Ok = true; - for (unsigned I = 0; I != FieldDesc->getNumElems(); ++I) { - unsigned Index = BigEndianTarget ? (FieldDesc->getNumElems() - 1 - I) : I; + for (unsigned I = 0; I != NumElems; ++I) { + unsigned Index = I; Ok = Ok && F(P.atIndex(Index), ElemT, Offset, PackedBools); - Offset += ElemSizeInBits; + Offset += PackedBools ? 1 : ElemSizeInBits; } return Ok; } // Composite arrays. if (FieldDesc->isCompositeArray()) { - bool BigEndianTarget = Ctx.getASTContext().getTargetInfo().isBigEndian(); QualType ElemType = FieldDesc->getElemQualType(); size_t ElemSizeInBits = Ctx.getASTContext().getTypeSize(ElemType); for (unsigned I = 0; I != FieldDesc->getNumElems(); ++I) { - unsigned Index = BigEndianTarget ? (FieldDesc->getNumElems() - 1 - I) : I; + unsigned Index = I; enumerateData(P.atIndex(Index).narrow(), Ctx, Offset, F); Offset += ElemSizeInBits; } @@ -178,39 +120,23 @@ static bool enumerateData(const Pointer &P, const Context &Ctx, size_t Offset, // Records. if (FieldDesc->isRecord()) { - bool BigEndianTarget = Ctx.getASTContext().getTargetInfo().isBigEndian(); const Record *R = FieldDesc->ElemRecord; const ASTRecordLayout &Layout = Ctx.getASTContext().getASTRecordLayout(R->getDecl()); bool Ok = true; - auto enumerateFields = [&]() -> void { - for (unsigned I = 0, N = R->getNumFields(); I != N; ++I) { - const Record::Field *Fi = - R->getField(BigEndianTarget ? (N - 1 - I) : I); - Pointer Elem = P.atField(Fi->Offset); - size_t BitOffset = - Offset + Layout.getFieldOffset(Fi->Decl->getFieldIndex()); - Ok = Ok && enumerateData(Elem, Ctx, BitOffset, F); - } - }; - auto enumerateBases = [&]() -> void { - for (unsigned I = 0, N = R->getNumBases(); I != N; ++I) { - const Record::Base *B = R->getBase(BigEndianTarget ? (N - 1 - I) : I); - Pointer Elem = P.atField(B->Offset); - CharUnits ByteOffset = - Layout.getBaseClassOffset(cast<CXXRecordDecl>(B->Decl)); - size_t BitOffset = Offset + Ctx.getASTContext().toBits(ByteOffset); - Ok = Ok && enumerateData(Elem, Ctx, BitOffset, F); - } - }; - - if (BigEndianTarget) { - enumerateFields(); - enumerateBases(); - } else { - enumerateBases(); - enumerateFields(); + for (const Record::Field &Fi : R->fields()) { + Pointer Elem = P.atField(Fi.Offset); + size_t BitOffset = + Offset + Layout.getFieldOffset(Fi.Decl->getFieldIndex()); + Ok = Ok && enumerateData(Elem, Ctx, BitOffset, F); + } + for (const Record::Base &B : R->bases()) { + Pointer Elem = P.atField(B.Offset); + CharUnits ByteOffset = + Layout.getBaseClassOffset(cast<CXXRecordDecl>(B.Decl)); + size_t BitOffset = Offset + Ctx.getASTContext().toBits(ByteOffset); + Ok = Ok && enumerateData(Elem, Ctx, BitOffset, F); } return Ok; @@ -295,27 +221,28 @@ static bool CheckBitcastType(InterpState &S, CodePtr OpPC, QualType T, static bool readPointerToBuffer(const Context &Ctx, const Pointer &FromPtr, BitcastBuffer &Buffer, bool ReturnOnUninit) { const ASTContext &ASTCtx = Ctx.getASTContext(); - bool SwapData = (ASTCtx.getTargetInfo().isLittleEndian() != - llvm::sys::IsLittleEndianHost); - bool BigEndianTarget = ASTCtx.getTargetInfo().isBigEndian(); + Endian TargetEndianness = + ASTCtx.getTargetInfo().isLittleEndian() ? Endian::Little : Endian::Big; return enumeratePointerFields( FromPtr, Ctx, [&](const Pointer &P, PrimType T, size_t BitOffset, bool PackedBools) -> bool { - if (!P.isInitialized()) { - assert(false && "Implement uninitialized value tracking"); - return ReturnOnUninit; - } + // if (!P.isInitialized()) { + // assert(false && "Implement uninitialized value tracking"); + // return ReturnOnUninit; + // } - assert(P.isInitialized()); + // assert(P.isInitialized()); // nullptr_t is a PT_Ptr for us, but it's still not std::is_pointer_v. if (T == PT_Ptr) assert(false && "Implement casting to pointer types"); CharUnits ObjectReprChars = ASTCtx.getTypeSizeInChars(P.getType()); unsigned BitWidth = ASTCtx.toBits(ObjectReprChars); - llvm::SmallVector<std::byte> Buff(ObjectReprChars.getQuantity()); + unsigned FullBitWidth = BitWidth; + auto Buff = + std::make_unique<std::byte[]>(ObjectReprChars.getQuantity()); // Work around floating point types that contain unused padding bytes. // This is really just `long double` on x86, which is the only // fundamental type with padding bytes. @@ -323,34 +250,27 @@ static bool readPointerToBuffer(const Context &Ctx, const Pointer &FromPtr, const Floating &F = P.deref<Floating>(); unsigned NumBits = llvm::APFloatBase::getSizeInBits(F.getAPFloat().getSemantics()); - assert(NumBits % 8 == 0); - assert(NumBits <= (ObjectReprChars.getQuantity() * 8)); - F.bitcastToMemory(Buff.data()); + assert(fullByte(NumBits)); + assert(NumBits <= FullBitWidth); + F.bitcastToMemory(Buff.get()); // Now, only (maybe) swap the actual size of the float, excluding the // padding bits. - if (SwapData) - swapBytes(Buff.data(), NumBits / 8); + if (llvm::sys::IsBigEndianHost) + swapBytes(Buff.get(), NumBits / 8); } else { if (const FieldDecl *FD = P.getField(); FD && FD->isBitField()) - BitWidth = FD->getBitWidthValue(ASTCtx); + BitWidth = std::min(FD->getBitWidthValue(ASTCtx), FullBitWidth); else if (T == PT_Bool && PackedBools) BitWidth = 1; - BITCAST_TYPE_SWITCH(T, { - T Val = P.deref<T>(); - Val.bitcastToMemory(Buff.data()); - }); - if (SwapData) - swapBytes(Buff.data(), ObjectReprChars.getQuantity()); - } + BITCAST_TYPE_SWITCH(T, { P.deref<T>().bitcastToMemory(Buff.get()); }); - if (BitWidth != (Buff.size() * 8) && BigEndianTarget) { - Buffer.pushData(Buff.data() + (Buff.size() - 1 - (BitWidth / 8)), - BitWidth, BigEndianTarget); - } else { - Buffer.pushData(Buff.data(), BitWidth, BigEndianTarget); + if (llvm::sys::IsBigEndianHost) + swapBytes(Buff.get(), FullBitWidth / 8); } + + Buffer.pushData(Buff.get(), BitOffset, BitWidth, TargetEndianness); return true; }); } @@ -362,7 +282,7 @@ bool clang::interp::DoBitCast(InterpState &S, CodePtr OpPC, const Pointer &Ptr, assert(Ptr.isBlockPointer()); assert(Buff); - BitcastBuffer Buffer; + BitcastBuffer Buffer(BuffSize * 8); if (!CheckBitcastType(S, OpPC, Ptr.getType(), /*IsToType=*/false)) return false; @@ -371,13 +291,20 @@ bool clang::interp::DoBitCast(InterpState &S, CodePtr OpPC, const Pointer &Ptr, assert(Buffer.size() == BuffSize * 8); HasIndeterminateBits = !Buffer.allInitialized(); - std::memcpy(Buff, Buffer.data(), BuffSize); + + const ASTContext &ASTCtx = S.getASTContext(); + Endian TargetEndianness = + ASTCtx.getTargetInfo().isLittleEndian() ? Endian::Little : Endian::Big; + auto B = Buffer.copyBits(0, BuffSize * 8, BuffSize * 8, TargetEndianness); + + std::memcpy(Buff, B.get(), BuffSize); if (llvm::sys::IsBigEndianHost) swapBytes(Buff, BuffSize); return Success; } +/// --------------------------------------------------------------------------------------------------------------------- bool clang::interp::DoBitCastPtr(InterpState &S, CodePtr OpPC, const Pointer &FromPtr, Pointer &ToPtr) { @@ -394,43 +321,59 @@ bool clang::interp::DoBitCastPtr(InterpState &S, CodePtr OpPC, if (!CheckBitcastType(S, OpPC, ToType, /*IsToType=*/true)) return false; - BitcastBuffer Buffer; + const ASTContext &ASTCtx = S.getASTContext(); + + CharUnits ObjectReprChars = ASTCtx.getTypeSizeInChars(ToType); + BitcastBuffer Buffer(ASTCtx.toBits(ObjectReprChars)); readPointerToBuffer(S.getContext(), FromPtr, Buffer, /*ReturnOnUninit=*/false); // Now read the values out of the buffer again and into ToPtr. - const ASTContext &ASTCtx = S.getASTContext(); - size_t BitOffset = 0; + Endian TargetEndianness = + ASTCtx.getTargetInfo().isLittleEndian() ? Endian::Little : Endian::Big; bool Success = enumeratePointerFields( ToPtr, S.getContext(), - [&](const Pointer &P, PrimType T, size_t _, bool PackedBools) -> bool { + [&](const Pointer &P, PrimType T, size_t BitOffset, + bool PackedBools) -> bool { + CharUnits ObjectReprChars = ASTCtx.getTypeSizeInChars(P.getType()); + unsigned FullBitWidth = ASTCtx.toBits(ObjectReprChars); if (T == PT_Float) { - CharUnits ObjectReprChars = ASTCtx.getTypeSizeInChars(P.getType()); const auto &Semantics = ASTCtx.getFloatTypeSemantics(P.getType()); unsigned NumBits = llvm::APFloatBase::getSizeInBits(Semantics); - assert(NumBits % 8 == 0); - assert(NumBits <= ASTCtx.toBits(ObjectReprChars)); - std::byte *M = Buffer.getBytes(BitOffset); + assert(fullByte(NumBits)); + assert(NumBits <= FullBitWidth); + auto M = Buffer.copyBits(BitOffset, NumBits, FullBitWidth, + TargetEndianness); if (llvm::sys::IsBigEndianHost) - swapBytes(M, NumBits / 8); + swapBytes(M.get(), NumBits / 8); - P.deref<Floating>() = Floating::bitcastFromMemory(M, Semantics); + P.deref<Floating>() = Floating::bitcastFromMemory(M.get(), Semantics); P.initialize(); - BitOffset += ASTCtx.toBits(ObjectReprChars); return true; } - BITCAST_TYPE_SWITCH_FIXED_SIZE(T, { - std::byte *M = Buffer.getBytes(BitOffset); + unsigned BitWidth; + if (const FieldDecl *FD = P.getField(); FD && FD->isBitField()) + BitWidth = std::min(FD->getBitWidthValue(ASTCtx), FullBitWidth); + else if (T == PT_Bool && PackedBools) + BitWidth = 1; + else + BitWidth = ASTCtx.toBits(ObjectReprChars); - if (llvm::sys::IsBigEndianHost) - swapBytes(M, T::bitWidth() / 8); + auto Memory = Buffer.copyBits(BitOffset, BitWidth, FullBitWidth, + TargetEndianness); + if (llvm::sys::IsBigEndianHost) + swapBytes(Memory.get(), FullBitWidth / 8); - P.deref<T>() = T::bitcastFromMemory(M, T::bitWidth()); - P.initialize(); - BitOffset += T::bitWidth(); + BITCAST_TYPE_SWITCH_FIXED_SIZE(T, { + if (BitWidth > 0) + P.deref<T>() = T::bitcastFromMemory(Memory.get(), T::bitWidth()) + .truncate(BitWidth); + else + P.deref<T>() = T::zero(); }); + P.initialize(); return true; }); diff --git a/clang/test/AST/ByteCode/builtin-bit-cast-bitfields.cpp b/clang/test/AST/ByteCode/builtin-bit-cast-bitfields.cpp new file mode 100644 index 00000000000000..73fce141a06e89 --- /dev/null +++ b/clang/test/AST/ByteCode/builtin-bit-cast-bitfields.cpp @@ -0,0 +1,433 @@ +// RUN: %clang_cc1 -verify=expected,both -std=c++2a -fsyntax-only -fexperimental-new-constant-interpreter %s +// RUN: %clang_cc1 -verify=expected,both -std=c++2a -fsyntax-only -triple aarch64_be-linux-gnu -fexperimental-new-constant-interpreter %s +// RUN: %clang_cc1 -verify=expected,both -std=c++2a -fsyntax-only -fexperimental-new-constant-interpreter -triple powerpc64le-unknown-unknown -mabi=ieeelongdouble %s +// RUN: %clang_cc1 -verify=expected,both -std=c++2a -fsyntax-only -fexperimental-new-constant-interpreter -triple powerpc64-unknown-unknown -mabi=ieeelongdouble %s + +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +# define LITTLE_END 1 +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +# define LITTLE_END 0 +#else +# error "huh?" +#endif + +typedef decltype(nullptr) nullptr_t; +typedef __INTPTR_TYPE__ intptr_t; +typedef unsigned __INT16_TYPE__ uint16_t; +typedef unsigned __INT32_TYPE__ uint32_t; +typedef unsigned __INT64_TYPE__ uint64_t; + +static_assert(sizeof(int) == 4); +static_assert(sizeof(long long) == 8); + +template <class To, class From> +constexpr To bit_cast(const From &from) { + static_assert(sizeof(To) == sizeof(From)); + return __builtin_bit_cast(To, from); +} + +template <class Intermediate, class Init> +constexpr bool check_round_trip(const Init &init) { + return bit_cast<Init>(bit_cast<Intermediate>(init)) == init; +} + +template <class Intermediate, class Init> +constexpr Init round_trip(const Init &init) { + return bit_cast<Init>(bit_cast<Intermediate>(init)); +} + +namespace std { +enum byte : unsigned char {}; +} // namespace std + +template <int N, typename T = unsigned char, int Pad = 0> +struct bits { + T : Pad; + T bits : N; + + constexpr bool operator==(const T& rhs) const { + return bits == rhs; + } +}; + +template <int N, typename T, int P> +constexpr bool operator==(const struct bits<N, T, P>& lhs, const struct bits<N, T, P>& rhs) { + return lhs.bits == rhs.bits; +} + +template<int N> +struct bytes { + using size_t = unsigned int; + unsigned char d[N]; + + constexpr unsigned char operator[](size_t index) { + if (index < N) + return d[index]; + return -1; + } +}; + +namespace Sanity { + /// This is just one byte, and we extract 2 bits from it. + /// + /// 3 is 0000'0011. + /// For both LE and BE, the buffer will contain exactly that + /// byte, unaltered and not reordered in any way. It contains all 8 bits. + static_assert(__builtin_bit_cast(bits<2>, (unsigned char)3) == (LITTLE_END ? 3 : 0)); + + /// Similarly, we have one full byte of data, with the two most-significant + /// bits set: + /// 192 is 1100'0000 + static_assert(__builtin_bit_cast(bits<2>, (unsigned char)192) == (LITTLE_END ? 0 : 3)); + + + /// Here we are instead bitcasting two 1-bits into a destination of 8 bits. + /// On LE, we should pick the two least-significant bits. On BE, the opposite. + /// NOTE: Can't verify this with gcc. + constexpr auto B1 = bits<2>{3}; + static_assert(__builtin_bit_cast(unsigned char, B1) == (LITTLE_END ? 3 : 192)); + + /// This should be 0000'0110. + /// On LE, this should result in 6. + /// On BE, 1100'0000 = 192. + constexpr auto B2 = bits<3>{6}; + static_assert(__builtin_bit_cast(unsigned char, B2) == (LITTLE_END ? 6 : 192)); + + constexpr auto B3 = bits<4>{6}; + static_assert(__builtin_bit_cast(unsigned char, B3) == (LITTLE_END ? 6 : 96)); + + struct B { + std::byte b0 : 4; + std::byte b1 : 4; + }; + + /// We can properly decompose one byte (8 bit) int two 4-bit bitfields. + constexpr struct { unsigned char b0; } T = {0xee}; + constexpr B MB = __builtin_bit_cast(B, T); + static_assert(MB.b0 == 0xe); + static_assert(MB.b1 == 0xe); +} + +namespace BitFields { + struct BitFields { + unsigned a : 2; + unsigned b : 30; + }; + + constexpr unsigned A = __builtin_bit_cast(unsigned, BitFields{3, 16}); + static_assert(A == (LITTLE_END ? 67 : 3221225488)); + + struct S { + unsigned a : 2; + unsigned b : 28; + unsigned c : 2; + }; + + constexpr S s = __builtin_bit_cast(S, 0xFFFFFFFF); + static_assert(s.a == 3); + static_assert(s.b == 268435455); + static_assert(s.c == 3); + + void bitfield_indeterminate() { + struct BF { unsigned char z : 2; }; + enum byte : unsigned char {}; + + constexpr BF bf = {0x3}; + /// Requires bitcasts to composite types. + static_assert(bit_cast<bits<2>>(bf).bits == bf.z); + static_assert(bit_cast<unsigned char>(bf)); + + static_assert(__builtin_bit_cast(byte, bf)); + + struct M { + // ref-note@+1 {{subobject declared here}} + unsigned char mem[sizeof(BF)]; + }; + // ref-error@+2 {{initialized by a constant expression}} + // ref-note@+1 {{not initialized}} + constexpr M m = bit_cast<M>(bf); + + constexpr auto f = []() constexpr { + // bits<24, unsigned int, LITTLE_END ? 0 : 8> B = {0xc0ffee}; + constexpr struct { unsigned short b1; unsigned char b0; } B = {0xc0ff, 0xee}; + return bit_cast<bytes<4>>(B); + }; + + static_assert(f()[0] + f()[1] + f()[2] == 0xc0 + 0xff + 0xee); + { + // ref-error@+2 {{initialized by a constant expression}} + // ref-note@+1 {{read of uninitialized object is not allowed in a constant expression}} + constexpr auto _bad = f()[3]; + } + + struct B { + unsigned short s0 : 8; + unsigned short s1 : 8; + std::byte b0 : 4; + std::byte b1 : 4; + std::byte b2 : 4; + }; + constexpr auto g = [f]() constexpr { + return bit_cast<B>(f()); + }; + static_assert(g().s0 + g().s1 + g().b0 + g().b1 == 0xc0 + 0xff + 0xe + 0xe); + { + // ref-error@+2 {{initialized by a constant expression}} + // ref-note@+1 {{read of uninitialized object is not allowed in a constant expression}} + constexpr auto _bad = g().b2; + } + } +} + +namespace BoolVectors { + typedef bool bool32 __attribute__((ext_vector_type(32))); + constexpr auto v = bit_cast<bool32>(0xa1c0ffee); +#if LITTLE_END + static_assert(!v[0]); + static_assert(v[1]); + static_assert(v[2]); + static_assert(v[3]); + static_assert(!v[4]); + static_assert(v[5]); + static_assert(v[6]); + static_assert(v[7]); + + static_assert(v[8]); + static_assert(v[9]); + static_assert(v[10]); + static_assert(v[11]); + static_assert(v[12]); + static_assert(v[13]); + static_assert(v[14]); + static_assert(v[15]); + + static_assert(!v[16]); + static_assert(!v[17]); + static_assert(!v[18]); + static_assert(!v[19]); + static_assert(!v[20]); + static_assert(!v[21]); + static_assert(v[22]); + static_assert(v[23]); + + static_assert(v[24]); + static_assert(!v[25]); + static_assert(!v[26]); + static_assert(!v[27]); + static_assert(!v[28]); + static_assert(v[29]); + static_assert(!v[30]); + static_assert(v[31]); + +#else + static_assert(v[0]); + static_assert(!v[1]); + static_assert(v[2]); + static_assert(!v[3]); + static_assert(!v[4]); + static_assert(!v[5]); + static_assert(!v[6]); + static_assert(v[7]); + + static_assert(v[8]); + static_assert(v[9]); + static_assert(!v[10]); + static_assert(!v[11]); + static_assert(!v[12]); + static_assert(!v[13]); + static_assert(!v[14]); + static_assert(!v[15]); + + static_assert(v[16]); + static_assert(v[17]); + static_assert(v[18]); + static_assert(v[19]); + static_assert(v[20]); + static_assert(v[21]); + static_assert(v[22]); + static_assert(v[23]); + + static_assert(v[24]); + static_assert(v[25]); + static_assert(v[26]); + static_assert(!v[27]); + static_assert(v[28]); + static_assert(v[29]); + static_assert(v[30]); + static_assert(!v[31]); +#endif + + struct pad { + unsigned short s; + unsigned char c; + }; + + constexpr auto p = bit_cast<pad>(v); + static_assert(p.s == (LITTLE_END ? 0xffee : 0xa1c0)); + static_assert(p.c == (LITTLE_END ? 0xc0 : 0xff)); +} + +namespace TwoShorts { + struct B { + unsigned short s0 : 8; + unsigned short s1 : 8; + }; + constexpr struct { unsigned short b1;} T = {0xc0ff}; + constexpr B MB = __builtin_bit_cast(B, T); +#if LITTLE_END + static_assert(MB.s0 == 0xff); + static_assert(MB.s1 == 0xc0); +#else + static_assert(MB.s0 == 0xc0); + static_assert(MB.s1 == 0xff); + +#endif +} + +typedef bool bool8 __attribute__((ext_vector_type(8))); +typedef bool bool9 __attribute__((ext_vector_type(9))); +typedef bool bool16 __attribute__((ext_vector_type(16))); +typedef bool bool17 __attribute__((ext_vector_type(17))); +typedef bool bool32 __attribute__((ext_vector_type(32))); +typedef bool bool128 __attribute__((ext_vector_type(128))); + +static_assert(bit_cast<unsigned char>(bool8{1,0,1,0,1,0,1,0}) == (LITTLE_END ? 0x55 : 0xAA), ""); +constexpr bool8 b8 = __builtin_bit_cast(bool8, 0x55); // both-error {{'__builtin_bit_cast' source type 'int' does not match destination type 'bool8' (vector of 8 'bool' values) (4 vs 1 bytes)}} +static_assert(check_round_trip<bool8>(static_cast<unsigned char>(0)), ""); +static_assert(check_round_trip<bool8>(static_cast<unsigned char>(1)), ""); +static_assert(check_round_trip<bool8>(static_cast<unsigned char>(0x55)), ""); + +static_assert(bit_cast<unsigned short>(bool16{1,1,1,1,1,0,0,0, 1,1,1,1,0,1,0,0}) == (LITTLE_END ? 0x2F1F : 0xF8F4), ""); + +static_assert(check_round_trip<bool16>(static_cast<short>(0xCAFE)), ""); +static_assert(check_round_trip<bool32>(static_cast<int>(0xCAFEBABE)), ""); +static_assert(check_round_trip<bool128>(static_cast<__int128_t>(0xCAFEBABE0C05FEFEULL)), ""); + +static_assert(bit_cast<bits<8, uint16_t, 7>, uint16_t>(0xcafe) == (LITTLE_END ? 0x95 : 0x7f)); +static_assert(bit_cast<bits<4, uint16_t, 10>, uint16_t>(0xcafe) == (LITTLE_END ? 0x2 : 0xf)); +static_assert(bit_cast<bits<4, uint32_t, 19>, uint32_t>(0xa1cafe) == (LITTLE_END ? 0x4 : 0x5)); + +struct S { + // little endian: + // MSB .... .... LSB + // |y| |x| + // + // big endian + // MSB .... .... LSB + // |x| |y| + + unsigned char x : 4; + unsigned char y : 4; + + constexpr bool operator==(S const &other) const { + return x == other.x && y == other.y; + } +}; + +constexpr S s{0xa, 0xb}; +static_assert(bit_cast<bits<8>>(s) == (LITTLE_END ? 0xba : 0xab)); +static_assert(bit_cast<bits<7>>(s) == (LITTLE_END + ? 0xba & 0x7f + : (0xab & 0xfe) >> 1)); + +static_assert(round_trip<bits<8>>(s) == s); + +struct R { + unsigned int r : 31; + unsigned int : 0; + unsigned int : 32; + constexpr bool operator==(R const &other) const { + return r == other.r; + } + }; +using T = bits<31, signed long long>; +constexpr R r{0x4ac0ffee}; +constexpr T t = bit_cast<T>(r); +static_assert(t == ((0xFFFFFFFF8 << 28) | 0x4ac0ffee)); // sign extension + +static_assert(round_trip<T>(r) == r); +static_assert(round_trip<R>(t) == t); + +struct U { + // expected-warning@+1 {{exceeds the width of its type}} + uint32_t trunc : 33; + uint32_t u : 31; + constexpr bool operator==(U const &other) const { + return trunc == other.trunc && u == other.u; + } +}; +struct V { + uint64_t notrunc : 32; + uint64_t : 1; + uint64_t v : 31; + constexpr bool operator==(V const &other) const { + return notrunc == other.notrunc && v == other.v; + } +}; + +constexpr U u{static_cast<unsigned int>(~0), 0x4ac0ffee}; +constexpr V v = bit_cast<V>(u); +static_assert(v.v == 0x4ac0ffee); + +static_assert(round_trip<V>(u) == u); +static_assert(round_trip<U>(v) == v); + +constexpr auto w = bit_cast<bits<12, unsigned long, 33>>(u); +static_assert(w == (LITTLE_END + ? 0x4ac0ffee & 0xFFF + : (0x4ac0ffee & (0xFFF << (31 - 12))) >> (31-12) + )); + + +namespace NestedStructures { + struct J { + struct { + uint16_t k : 12; + } K; + struct { + uint16_t l : 4; + } L; + }; + + static_assert(sizeof(J) == 4); + constexpr J j = bit_cast<J>(0x8c0ffee5); + + static_assert(j.K.k == (LITTLE_END ? 0xee5 : 0x8c0)); + static_assert(j.L.l == 0xf /* yay symmetry */); + static_assert(bit_cast<bits<4, uint16_t, 16>>(j) == 0xf); + struct N { + bits<12, uint16_t> k; + uint16_t : 16; + }; + static_assert(bit_cast<N>(j).k == j.K.k); + + struct M { + bits<4, uint16_t, 0> m[2]; + constexpr bool operator==(const M& rhs) const { + return m[0] == rhs.m[0] && m[1] == rhs.m[1]; + }; + }; + #if LITTLE_END == 1 + constexpr uint16_t want[2] = {0x5, 0xf}; + #else + constexpr uint16_t want[2] = {0x8000, 0xf000}; + #endif + + static_assert(bit_cast<M>(j) == bit_cast<M>(want)); +} + +namespace Enums { + // ensure we're packed into the top 2 bits + constexpr int pad = LITTLE_END ? 6 : 0; + struct X + { + char : pad; + enum class direction: char { left, right, up, down } direction : 2; + }; + + constexpr X x = { X::direction::down }; + static_assert(bit_cast<bits<2, signed char, pad>>(x) == -1); + static_assert(bit_cast<bits<2, unsigned char, pad>>(x) == 3); + static_assert( + bit_cast<X>((unsigned char)0x40).direction == X::direction::right); +} diff --git a/clang/test/AST/ByteCode/builtin-bit-cast.cpp b/clang/test/AST/ByteCode/builtin-bit-cast.cpp index 60e8c3a615c5e6..95cc5e520a133d 100644 --- a/clang/test/AST/ByteCode/builtin-bit-cast.cpp +++ b/clang/test/AST/ByteCode/builtin-bit-cast.cpp @@ -164,72 +164,6 @@ namespace bitint { // ref-note {{initializer of 'IB' is not a constant expression}} } -namespace BitFields { - struct BitFields { - unsigned a : 2; - unsigned b : 30; - }; - - constexpr unsigned A = __builtin_bit_cast(unsigned, BitFields{3, 16}); // ref-error {{must be initialized by a constant expression}} \ - // ref-note {{not yet supported}} \ - // ref-note {{declared here}} - static_assert(A == (LITTLE_END ? 67 : 3221225488)); // ref-error {{not an integral constant expression}} \ - // ref-note {{initializer of 'A'}} - - - void bitfield_indeterminate() { - struct BF { unsigned char z : 2; }; - enum byte : unsigned char {}; - - constexpr BF bf = {0x3}; - /// Requires bitcasts to composite types. - // static_assert(bit_cast<bits<2>>(bf).bits == bf.z); - // static_assert(bit_cast<unsigned char>(bf)); - -#if 0 - // static_assert(__builtin_bit_cast(byte, bf)); - - struct M { - // expected-note@+1 {{subobject declared here}} - unsigned char mem[sizeof(BF)]; - }; - // expected-error@+2 {{initialized by a constant expression}} - // expected-note@+1 {{not initialized}} - constexpr M m = bit_cast<M>(bf); - - constexpr auto f = []() constexpr { - // bits<24, unsigned int, LITTLE_END ? 0 : 8> B = {0xc0ffee}; - constexpr struct { unsigned short b1; unsigned char b0; } B = {0xc0ff, 0xee}; - return bit_cast<bytes<4>>(B); - }; - - static_assert(f()[0] + f()[1] + f()[2] == 0xc0 + 0xff + 0xee); - { - // expected-error@+2 {{initialized by a constant expression}} - // expected-note@+1 {{read of uninitialized object is not allowed in a constant expression}} - constexpr auto _bad = f()[3]; - } - - struct B { - unsigned short s0 : 8; - unsigned short s1 : 8; - std::byte b0 : 4; - std::byte b1 : 4; - std::byte b2 : 4; - }; - constexpr auto g = [f]() constexpr { - return bit_cast<B>(f()); - }; - static_assert(g().s0 + g().s1 + g().b0 + g().b1 == 0xc0 + 0xff + 0xe + 0xe); - { - // expected-error@+2 {{initialized by a constant expression}} - // expected-note@+1 {{read of uninitialized object is not allowed in a constant expression}} - constexpr auto _bad = g().b2; - } -#endif - } -} - namespace Classes { class A { public: @@ -488,27 +422,6 @@ static_assert(bit_cast<unsigned long long>(test_vector) == (LITTLE_END static_assert(check_round_trip<uint2>(0xCAFEBABE0C05FEFEULL), ""); static_assert(check_round_trip<byte8>(0xCAFEBABE0C05FEFEULL), ""); -typedef bool bool8 __attribute__((ext_vector_type(8))); -typedef bool bool9 __attribute__((ext_vector_type(9))); -typedef bool bool16 __attribute__((ext_vector_type(16))); -typedef bool bool17 __attribute__((ext_vector_type(17))); -typedef bool bool32 __attribute__((ext_vector_type(32))); -typedef bool bool128 __attribute__((ext_vector_type(128))); - -static_assert(bit_cast<unsigned char>(bool8{1,0,1,0,1,0,1,0}) == (LITTLE_END ? 0x55 : 0xAA), ""); -constexpr bool8 b8 = __builtin_bit_cast(bool8, 0x55); // both-error {{'__builtin_bit_cast' source type 'int' does not match destination type 'bool8' (vector of 8 'bool' values) (4 vs 1 bytes)}} -#if 0 -static_assert(check_round_trip<bool8>(static_cast<unsigned char>(0)), ""); -static_assert(check_round_trip<bool8>(static_cast<unsigned char>(1)), ""); -static_assert(check_round_trip<bool8>(static_cast<unsigned char>(0x55)), ""); - -static_assert(bit_cast<unsigned short>(bool16{1,1,1,1,1,0,0,0, 1,1,1,1,0,1,0,0}) == (LITTLE_END ? 0x2F1F : 0xF8F4), ""); - -static_assert(check_round_trip<bool16>(static_cast<short>(0xCAFE)), ""); -static_assert(check_round_trip<bool32>(static_cast<int>(0xCAFEBABE)), ""); -static_assert(check_round_trip<bool128>(static_cast<__int128_t>(0xCAFEBABE0C05FEFEULL)), ""); -#endif - #if 0 // expected-error@+2 {{constexpr variable 'bad_bool9_to_short' must be initialized by a constant expression}} // expected-note@+1 {{bit_cast involving type 'bool __attribute__((ext_vector_type(9)))' (vector of 9 'bool' values) is not allowed in a constant expression; element size 1 * element count 9 is not a multiple of the byte size 8}} @@ -537,3 +450,14 @@ namespace test_complex { constexpr double D = __builtin_bit_cast(double, test_float_complex); constexpr int M = __builtin_bit_cast(int, test_int_complex); // both-error {{size of '__builtin_bit_cast' source type 'const _Complex unsigned int' does not match destination type 'int' (8 vs 4 bytes)}} } + + +namespace OversizedBitField { + typedef unsigned __INT16_TYPE__ uint16_t; + typedef unsigned __INT32_TYPE__ uint32_t; + struct S { + uint16_t a : 20; // both-warning {{exceeds the width of its type}} + }; + static_assert(__builtin_bit_cast(S, (uint32_t)32).a == (LITTLE_END ? 32 : 0)); // ref-error {{not an integral constant expression}} \ + // ref-note {{constexpr bit_cast involving bit-field is not yet supported}} +} diff --git a/clang/unittests/AST/ByteCode/BitcastBuffer.cpp b/clang/unittests/AST/ByteCode/BitcastBuffer.cpp new file mode 100644 index 00000000000000..082d814cef2dea --- /dev/null +++ b/clang/unittests/AST/ByteCode/BitcastBuffer.cpp @@ -0,0 +1,80 @@ +#include "../../../lib/AST/ByteCode/BitcastBuffer.h" +#include "clang/AST/ASTContext.h" +#include "gtest/gtest.h" +#include <bitset> +#include <cassert> +#include <cmath> +#include <memory> +#include <string> + +TEST(BitcastBuffer, PushData) { + BitcastBuffer Buff1(sizeof(int) * 8); + + const unsigned V = 0xCAFEBABE; + std::byte Data[sizeof(V)]; + std::memcpy(Data, &V, sizeof(V)); + + Endian HostEndianness = + llvm::sys::IsLittleEndianHost ? Endian::Little : Endian::Big; + + Buff1.pushData(Data, 0, sizeof(V) * 8, HostEndianness); + + // The buffer is in host-endianness. + if (llvm::sys::IsLittleEndianHost) { + ASSERT_EQ(Buff1.Data[0], std::byte{0xbe}); + ASSERT_EQ(Buff1.Data[1], std::byte{0xba}); + ASSERT_EQ(Buff1.Data[2], std::byte{0xfe}); + ASSERT_EQ(Buff1.Data[3], std::byte{0xca}); + } else { + ASSERT_EQ(Buff1.Data[0], std::byte{0xca}); + ASSERT_EQ(Buff1.Data[1], std::byte{0xfe}); + ASSERT_EQ(Buff1.Data[2], std::byte{0xba}); + ASSERT_EQ(Buff1.Data[3], std::byte{0xbe}); + } + + { + unsigned V2; + auto D = Buff1.copyBits(0, sizeof(V) * 8, sizeof(V) * 8, Endian::Little); + std::memcpy(&V2, D.get(), sizeof(V)); + ASSERT_EQ(V, V2); + + D = Buff1.copyBits(0, sizeof(V) * 8, sizeof(V) * 8, Endian::Big); + std::memcpy(&V2, D.get(), sizeof(V)); + ASSERT_EQ(V, V2); + } + + BitcastBuffer Buff2(sizeof(int) * 8); + { + short s1 = 0xCAFE; + short s2 = 0xBABE; + std::byte sdata[2]; + + std::memcpy(sdata, &s1, sizeof(s1)); + Buff2.pushData(sdata, 0, sizeof(s1) * 8, HostEndianness); + std::memcpy(sdata, &s2, sizeof(s2)); + Buff2.pushData(sdata, sizeof(s1) * 8, sizeof(s2) * 8, HostEndianness); + } + + if (llvm::sys::IsLittleEndianHost) { + ASSERT_EQ(Buff2.Data[0], std::byte{0xfe}); + ASSERT_EQ(Buff2.Data[1], std::byte{0xca}); + ASSERT_EQ(Buff2.Data[2], std::byte{0xbe}); + ASSERT_EQ(Buff2.Data[3], std::byte{0xba}); + } else { + ASSERT_EQ(Buff2.Data[0], std::byte{0xba}); + ASSERT_EQ(Buff2.Data[1], std::byte{0xbe}); + ASSERT_EQ(Buff2.Data[2], std::byte{0xca}); + ASSERT_EQ(Buff2.Data[3], std::byte{0xfe}); + } + + { + unsigned V; + auto D = Buff2.copyBits(0, sizeof(V) * 8, sizeof(V) * 8, Endian::Little); + std::memcpy(&V, D.get(), sizeof(V)); + ASSERT_EQ(V, 0xBABECAFE); + + D = Buff2.copyBits(0, sizeof(V) * 8, sizeof(V) * 8, Endian::Big); + std::memcpy(&V, D.get(), sizeof(V)); + ASSERT_EQ(V, 0xBABECAFE); + } +} diff --git a/clang/unittests/AST/ByteCode/CMakeLists.txt b/clang/unittests/AST/ByteCode/CMakeLists.txt index ea727cdd4412be..b862fb4834fbdc 100644 --- a/clang/unittests/AST/ByteCode/CMakeLists.txt +++ b/clang/unittests/AST/ByteCode/CMakeLists.txt @@ -1,4 +1,5 @@ add_clang_unittest(InterpTests + BitcastBuffer.cpp Descriptor.cpp toAPValue.cpp ) _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits