Timm =?utf-8?q?Bäder?= <tbae...@redhat.com>, Timm =?utf-8?q?Bäder?= <tbae...@redhat.com>, Timm =?utf-8?q?Bäder?= <tbae...@redhat.com>, Timm =?utf-8?q?Bäder?= <tbae...@redhat.com>, Timm =?utf-8?q?Bäder?= <tbae...@redhat.com>, Timm =?utf-8?q?Bäder?= <tbae...@redhat.com>, Timm =?utf-8?q?Bäder?= <tbae...@redhat.com>, Timm =?utf-8?q?Bäder?= <tbae...@redhat.com>, Timm =?utf-8?q?Bäder?= <tbae...@redhat.com>, Timm =?utf-8?q?Bäder?= <tbae...@redhat.com>, Timm =?utf-8?q?Bäder?= <tbae...@redhat.com>, Timm =?utf-8?q?Bäder?= <tbae...@redhat.com>, Timm =?utf-8?q?Bäder?= <tbae...@redhat.com> Message-ID: In-Reply-To: <llvm.org/llvm/llvm-project/pull/68...@github.com>
================ @@ -0,0 +1,452 @@ +//===--- InterpBitcast.cpp - Interpreter for the constexpr VM ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#include "Boolean.h" +#include "Interp.h" +#include "PrimType.h" +#include "clang/AST/ASTContext.h" +#include "clang/AST/RecordLayout.h" +#include "clang/Basic/Builtins.h" +#include "clang/Basic/TargetInfo.h" +#include "llvm/ADT/BitVector.h" + +namespace clang { +namespace interp { + +/// Used to iterate over pointer fields. +using DataFunc = + llvm::function_ref<bool(const Pointer &P, PrimType Ty, size_t BitOffset)>; + +#define BITCAST_TYPE_SWITCH(Expr, B) \ + do { \ + switch (Expr) { \ + TYPE_SWITCH_CASE(PT_Sint8, B) \ + TYPE_SWITCH_CASE(PT_Uint8, B) \ + TYPE_SWITCH_CASE(PT_Sint16, B) \ + TYPE_SWITCH_CASE(PT_Uint16, B) \ + TYPE_SWITCH_CASE(PT_Sint32, B) \ + TYPE_SWITCH_CASE(PT_Uint32, B) \ + TYPE_SWITCH_CASE(PT_Sint64, B) \ + TYPE_SWITCH_CASE(PT_Uint64, B) \ + TYPE_SWITCH_CASE(PT_IntAP, B) \ + TYPE_SWITCH_CASE(PT_IntAPS, B) \ + TYPE_SWITCH_CASE(PT_Bool, B) \ + default: \ + llvm_unreachable("Unhandled bitcast type"); \ + } \ + } while (0) + +/// Float is a special case that sometimes needs the floating point semantics +/// to be available. +#define BITCAST_TYPE_SWITCH_WITH_FLOAT(Expr, B) \ + do { \ + switch (Expr) { \ + TYPE_SWITCH_CASE(PT_Sint8, B) \ + TYPE_SWITCH_CASE(PT_Uint8, B) \ + TYPE_SWITCH_CASE(PT_Sint16, B) \ + TYPE_SWITCH_CASE(PT_Uint16, B) \ + TYPE_SWITCH_CASE(PT_Sint32, B) \ + TYPE_SWITCH_CASE(PT_Uint32, B) \ + TYPE_SWITCH_CASE(PT_Sint64, B) \ + TYPE_SWITCH_CASE(PT_Uint64, B) \ + TYPE_SWITCH_CASE(PT_IntAP, B) \ + TYPE_SWITCH_CASE(PT_IntAPS, B) \ + TYPE_SWITCH_CASE(PT_Bool, B) \ + TYPE_SWITCH_CASE(PT_Float, B) \ + default: \ + llvm_unreachable("Unhandled bitcast type"); \ + } \ + } while (0) + +/// Rotate things around for big endian targets. +static void swapBytes(std::byte *M, size_t N) { + for (size_t I = 0; I != (N / 2); ++I) + std::swap(M[I], M[N - 1 - I]); +} + +/// Track what bits have been initialized to known values and which ones +/// have indeterminate value. +/// All offsets are in bits. +struct BitTracker { + llvm::BitVector Initialized; + llvm::BitVector Data; + + BitTracker() = default; + + size_t size() const { + assert(Initialized.size() == Data.size()); + return Initialized.size(); + } + + const std::byte *getBytes(size_t BitOffset, int a) { + assert(BitOffset % 8 == 0); + return reinterpret_cast<const std::byte *>(Data.getData().data()) + + (BitOffset / 8); + } + + bool allInitialized(size_t Offset, size_t Size) const { + return Initialized.find_first_unset_in(Offset, Offset + Size) == -1; + } + + bool allInitialized() const { return Initialized.all(); } + + void pushData(const std::byte *data, size_t BitOffset, size_t BitWidth) { + assert(BitOffset >= Data.size()); + Data.reserve(BitOffset + BitWidth); + Initialized.reserve(BitOffset + BitWidth); + + // First, fill up the bit vector until BitOffset. The bits are all 0 + // but we record them as indeterminate. + { + Data.resize(BitOffset, false); + Initialized.resize(BitOffset, false); + } + + size_t BitsHandled = 0; + // Read all full bytes first + for (size_t I = 0; I != BitWidth / 8; ++I) { + for (unsigned X = 0; X != 8; ++X) { + Data.push_back((data[I] & std::byte(1 << X)) != std::byte{0}); + Initialized.push_back(true); + ++BitsHandled; + } + } + + // Rest of the bits. + assert((BitWidth - BitsHandled) < 8); + for (size_t I = 0, E = (BitWidth - BitsHandled); I != E; ++I) { + Data.push_back((data[BitWidth / 8] & std::byte(1 << I)) != std::byte{0}); + Initialized.push_back(true); + ++BitsHandled; + } + } + + void pushZeroes(size_t Amount) { + size_t N = Data.size(); + Data.resize(N + Amount, false); + Initialized.resize(N + Amount, true); + } + + void markUninitializedUntil(size_t Offset) { + assert(Offset >= Data.size()); + Data.resize(Offset, false); + Initialized.resize(Offset, false); + } +}; + +/// We use this to recursively iterate over all fields and elemends of a pointer +/// and extract relevant data for a bitcast. +static bool enumerateData(const Pointer &P, const Context &Ctx, size_t Offset, + DataFunc F) { + const Descriptor *FieldDesc = P.getFieldDesc(); + assert(FieldDesc); + + // Primitives. + if (FieldDesc->isPrimitive()) + return F(P, *Ctx.classify(FieldDesc->getType()), Offset); + + // Primitive arrays. + if (FieldDesc->isPrimitiveArray()) { + QualType ElemType = + FieldDesc->getType()->getAsArrayTypeUnsafe()->getElementType(); + size_t ElemSizeInBits = Ctx.getASTContext().getTypeSize(ElemType); + PrimType ElemT = *Ctx.classify(ElemType); + bool Ok = true; + for (unsigned I = 0; I != FieldDesc->getNumElems(); ++I) { + Ok = Ok && F(P.atIndex(I), ElemT, Offset); + Offset += ElemSizeInBits; + } + return Ok; + } + + // Composite arrays. + if (FieldDesc->isCompositeArray()) { + QualType ElemType = + FieldDesc->getType()->getAsArrayTypeUnsafe()->getElementType(); + size_t ElemSizeInBits = Ctx.getASTContext().getTypeSize(ElemType); + for (unsigned I = 0; I != FieldDesc->getNumElems(); ++I) { + enumerateData(P.atIndex(I).narrow(), Ctx, Offset, F); + Offset += ElemSizeInBits; + } + return true; + } + + // Records. + if (FieldDesc->isRecord()) { + const Record *R = FieldDesc->ElemRecord; + const ASTRecordLayout &Layout = + Ctx.getASTContext().getASTRecordLayout(R->getDecl()); + bool Ok = true; + for (const auto &B : R->bases()) { + Pointer Elem = P.atField(B.Offset); + CharUnits ByteOffset = + Layout.getBaseClassOffset(cast<CXXRecordDecl>(B.Decl)); + size_t BitOffset = Offset + Ctx.getASTContext().toBits(ByteOffset); + Ok = Ok && enumerateData(Elem, Ctx, BitOffset, F); + } + + for (unsigned I = 0; I != R->getNumFields(); ++I) { + const Record::Field *Fi = R->getField(I); + Pointer Elem = P.atField(Fi->Offset); + size_t BitOffset = Offset + Layout.getFieldOffset(I); + Ok = Ok && enumerateData(Elem, Ctx, BitOffset, F); + } + return Ok; + } + + llvm_unreachable("Unhandled data type"); +} + +static bool enumeratePointerFields(const Pointer &P, const Context &Ctx, + DataFunc F) { + return enumerateData(P, Ctx, 0, F); +} + +// This function is constexpr if and only if To, From, and the types of +// all subobjects of To and From are types T such that... +// (3.1) - is_union_v<T> is false; +// (3.2) - is_pointer_v<T> is false; +// (3.3) - is_member_pointer_v<T> is false; +// (3.4) - is_volatile_v<T> is false; and +// (3.5) - T has no non-static data members of reference type +// +// NOTE: This is a version of checkBitCastConstexprEligibilityType() in +// ExprConstant.cpp. +static bool CheckBitcastType(InterpState &S, CodePtr OpPC, QualType T, + bool IsToType) { + enum { + E_Union = 0, + E_Pointer, + E_MemberPointer, + E_Volatile, + E_Reference, + }; + enum { C_Member, C_Base }; + + auto diag = [&](int Reason) -> bool { + const Expr *E = S.Current->getExpr(OpPC); + S.FFDiag(E, diag::note_constexpr_bit_cast_invalid_type) + << static_cast<int>(IsToType) << (Reason == E_Reference) << Reason + << E->getSourceRange(); + return false; + }; + auto note = [&](int Construct, QualType NoteType, SourceRange NoteRange) { + S.Note(NoteRange.getBegin(), diag::note_constexpr_bit_cast_invalid_subtype) + << NoteType << Construct << T << NoteRange; + return false; + }; + + T = T.getCanonicalType(); + + if (T->isUnionType()) + return diag(E_Union); + if (T->isPointerType()) + return diag(E_Pointer); + if (T->isMemberPointerType()) + return diag(E_MemberPointer); + if (T.isVolatileQualified()) + return diag(E_Volatile); + + if (const RecordDecl *RD = T->getAsRecordDecl()) { + if (const auto *CXXRD = dyn_cast<CXXRecordDecl>(RD)) { + for (const CXXBaseSpecifier &BS : CXXRD->bases()) { + if (!CheckBitcastType(S, OpPC, BS.getType(), IsToType)) + return note(C_Base, BS.getType(), BS.getBeginLoc()); + } + } + for (const FieldDecl *FD : RD->fields()) { + if (FD->getType()->isReferenceType()) + return diag(E_Reference); + if (!CheckBitcastType(S, OpPC, FD->getType(), IsToType)) + return note(C_Member, FD->getType(), FD->getSourceRange()); + } + } + + if (T->isArrayType() && + !CheckBitcastType(S, OpPC, S.getCtx().getBaseElementType(T), IsToType)) + return false; + + return true; +} + +static bool readPointerToBuffer(const Context &Ctx, const Pointer &FromPtr, + BitTracker &Bits, bool ReturnOnUninit) { + const ASTContext &ASTCtx = Ctx.getASTContext(); + uint64_t PointerSizeInBits = + ASTCtx.getTargetInfo().getPointerWidth(LangAS::Default); + bool BigEndian = ASTCtx.getTargetInfo().isBigEndian(); + + return enumeratePointerFields( + FromPtr, Ctx, + [&](const Pointer &P, PrimType T, size_t BitOffset) -> bool { + if (!P.isInitialized()) { + Bits.markUninitializedUntil(BitOffset + + (primSize(T) * 8)); /// primSize() usage. + return ReturnOnUninit; + } + + assert(P.isInitialized()); + // nullptr_t is a PT_Ptr for us, but it's still not std::is_pointer_v. + if (T == PT_Ptr) { + assert(P.getType()->isNullPtrType()); + Bits.pushZeroes(PointerSizeInBits); + return true; + } + + unsigned ObjectReprBytes = + ASTCtx.getTypeSizeInChars(P.getType()).getQuantity(); + + CharUnits ObjectReprChars = ASTCtx.getTypeSizeInChars(P.getType()); + unsigned BitWidth; + if (const FieldDecl *FD = P.getField(); FD && FD->isBitField()) + BitWidth = FD->getBitWidthValue(ASTCtx); + else + BitWidth = ASTCtx.toBits(ObjectReprChars); + BITCAST_TYPE_SWITCH_WITH_FLOAT(T, { + T Val = P.deref<T>(); + unsigned ValueReprBytes = Val.valueReprBytes(ASTCtx); + assert(ObjectReprBytes >= ValueReprBytes); + + llvm::SmallVector<std::byte> Buff(ObjectReprChars.getQuantity()); + Val.bitcastToMemory(Buff.data()); + if (BigEndian) + swapBytes(Buff.data(), BitWidth / 8); + Bits.pushData(Buff.data(), BitOffset, BitWidth); ---------------- sethp wrote: I think this is going to mishandle cross-endian bit-fields. Consider: ```c++ struct Q { uint16_t q : 15 } ``` When the host & target endianness match, the mask for writing to `q` ought to be `0x7fff` (little endian) or `0xfffe` (big endian), both of which are contiguous runs and so neatly represented by a start + a length (as here). But, after byte-swapping, the write operation looks more like: ``` *(base + offsetof(Q, q)) = swapped & 0xff7f; // or 0xfeff ``` Which means the copy is no longer a contiguous run of bits. I ended up drawing a bunch of ASCII diagrams to try and sort it out, e.g. ``` // below, `x` is an invalid bit, and `p` is a padding bit // the addressable unit is assumed to be a byte (8 bits) // higher memory addresses are "down" // bytes are drawn MSBit -> LSBit (bit labels at the top) // the byte offset is on the left, and bit offset is on the right // // little-endian big-endian // // APInt(15, 0b+...-) // // 7 0 7 0 // 0 [.......-] 0 0 [x+......] 0 // 1 [x+......] 8 1 [.......-] 8 // // APInt(10, 0b+...-) // // 7 0 7 0 // 0 [.......-] 0 0 [xxxxxx+.] 0 // 1 [xxxxxx+.] 8 1 [.......-] 8 // // struct { uint16_t f : 15 } // // 7 0 7 0 // 0 [......-p] 0 0 [p+......] 0 // 1 [xxxxxx+.] 8 1 [.-xxxxxx] 8 ``` I went through a number of iterations on #74775, but I settled on the general operation being: https://github.com/llvm/llvm-project/blob/2e0d3f81e4f5b623bb476bfac0278cfc6d1bd4bc/clang/lib/AST/ExprConstant.cpp#L7042-L7062 Which is a bit pricey for what I imagined the common cases to be (full-word copies, host/target have matching little-endianness, and so on): so I ended up with a [`pushData`-shaped method](https://github.com/llvm/llvm-project/blob/2e0d3f81e4f5b623bb476bfac0278cfc6d1bd4bc/clang/lib/AST/ExprConstant.cpp#L7068-L7069) too, which is basically a big tower of asserts on top of a `memcpy` (for the data) and `memset` (for the validity bits). https://github.com/llvm/llvm-project/pull/68288 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits