https://github.com/tbaederr updated https://github.com/llvm/llvm-project/pull/142555
>From a98453657b881d8cacebac99551becf85168649f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbae...@redhat.com> Date: Tue, 3 Jun 2025 09:24:57 +0200 Subject: [PATCH] [clang][bytecode] Partially address string literal uniqueness This still leaves the case of the constexpr auto b3 = name1() == name1(); test from cxx20.cpp broken. --- clang/lib/AST/ByteCode/Interp.cpp | 45 ++++++++++++++++++++++++++++++ clang/lib/AST/ByteCode/Interp.h | 15 ++++++++++ clang/lib/AST/ByteCode/Pointer.cpp | 11 ++++++++ clang/lib/AST/ByteCode/Pointer.h | 1 + clang/test/AST/ByteCode/cxx11.cpp | 28 +++++++++++++++++++ clang/test/AST/ByteCode/cxx20.cpp | 4 +-- 6 files changed, 102 insertions(+), 2 deletions(-) diff --git a/clang/lib/AST/ByteCode/Interp.cpp b/clang/lib/AST/ByteCode/Interp.cpp index e454d9e3bc218..19c95b12b9198 100644 --- a/clang/lib/AST/ByteCode/Interp.cpp +++ b/clang/lib/AST/ByteCode/Interp.cpp @@ -1979,6 +1979,51 @@ bool DiagTypeid(InterpState &S, CodePtr OpPC) { return false; } +bool arePotentiallyOverlappingStringLiterals(const Pointer &LHS, + const Pointer &RHS) { + unsigned LHSOffset = LHS.getIndex(); + unsigned RHSOffset = RHS.getIndex(); + unsigned LHSLength = (LHS.getNumElems() - 1) * LHS.elemSize(); + unsigned RHSLength = (RHS.getNumElems() - 1) * RHS.elemSize(); + + StringRef LHSStr((const char *)LHS.atIndex(0).getRawAddress(), LHSLength); + StringRef RHSStr((const char *)RHS.atIndex(0).getRawAddress(), RHSLength); + int32_t IndexDiff = RHSOffset - LHSOffset; + if (IndexDiff < 0) { + if (static_cast<int32_t>(LHSLength) < -IndexDiff) + return false; + LHSStr = LHSStr.drop_front(-IndexDiff); + } else { + if (static_cast<int32_t>(RHSLength) < IndexDiff) + return false; + RHSStr = RHSStr.drop_front(IndexDiff); + } + + unsigned ShorterCharWidth; + StringRef Shorter; + StringRef Longer; + if (LHSLength < RHSLength) { + ShorterCharWidth = LHS.elemSize(); + Shorter = LHSStr; + Longer = RHSStr; + } else { + ShorterCharWidth = RHS.elemSize(); + Shorter = RHSStr; + Longer = LHSStr; + } + + // The null terminator isn't included in the string data, so check for it + // manually. If the longer string doesn't have a null terminator where the + // shorter string ends, they aren't potentially overlapping. + for (unsigned NullByte : llvm::seq(ShorterCharWidth)) { + if (Shorter.size() + NullByte >= Longer.size()) + break; + if (Longer[Shorter.size() + NullByte]) + return false; + } + return Shorter == Longer.take_front(Shorter.size()); +} + // https://github.com/llvm/llvm-project/issues/102513 #if defined(_MSC_VER) && !defined(__clang__) && !defined(NDEBUG) #pragma optimize("", off) diff --git a/clang/lib/AST/ByteCode/Interp.h b/clang/lib/AST/ByteCode/Interp.h index 5473733578d7e..1af3bdb42a3b9 100644 --- a/clang/lib/AST/ByteCode/Interp.h +++ b/clang/lib/AST/ByteCode/Interp.h @@ -1034,6 +1034,9 @@ static inline bool IsOpaqueConstantCall(const CallExpr *E) { Builtin == Builtin::BI__builtin_function_start); } +bool arePotentiallyOverlappingStringLiterals(const Pointer &LHS, + const Pointer &RHS); + template <> inline bool CmpHelperEQ<Pointer>(InterpState &S, CodePtr OpPC, CompareFn Fn) { using BoolT = PrimConv<PT_Bool>::T; @@ -1068,6 +1071,18 @@ inline bool CmpHelperEQ<Pointer>(InterpState &S, CodePtr OpPC, CompareFn Fn) { return true; } + // FIXME: The source check here isn't entirely correct. + if (LHS.pointsToStringLiteral() && RHS.pointsToStringLiteral() && + LHS.getFieldDesc()->asExpr() != RHS.getFieldDesc()->asExpr()) { + if (arePotentiallyOverlappingStringLiterals(LHS, RHS)) { + const SourceInfo &Loc = S.Current->getSource(OpPC); + S.FFDiag(Loc, diag::note_constexpr_literal_comparison) + << LHS.toDiagnosticString(S.getASTContext()) + << RHS.toDiagnosticString(S.getASTContext()); + return false; + } + } + if (Pointer::hasSameBase(LHS, RHS)) { if (LHS.inUnion() && RHS.inUnion()) { // If the pointers point into a union, things are a little more diff --git a/clang/lib/AST/ByteCode/Pointer.cpp b/clang/lib/AST/ByteCode/Pointer.cpp index 6c2566ba20bde..50453c72c582b 100644 --- a/clang/lib/AST/ByteCode/Pointer.cpp +++ b/clang/lib/AST/ByteCode/Pointer.cpp @@ -571,6 +571,17 @@ bool Pointer::pointsToLiteral() const { return E && !isa<MaterializeTemporaryExpr, StringLiteral>(E); } +bool Pointer::pointsToStringLiteral() const { + if (isZero() || !isBlockPointer()) + return false; + + if (block()->isDynamic()) + return false; + + const Expr *E = block()->getDescriptor()->asExpr(); + return E && isa<StringLiteral>(E); +} + std::optional<std::pair<Pointer, Pointer>> Pointer::computeSplitPoint(const Pointer &A, const Pointer &B) { if (!A.isBlockPointer() || !B.isBlockPointer()) diff --git a/clang/lib/AST/ByteCode/Pointer.h b/clang/lib/AST/ByteCode/Pointer.h index 479da09004685..0234ab02ab8f6 100644 --- a/clang/lib/AST/ByteCode/Pointer.h +++ b/clang/lib/AST/ByteCode/Pointer.h @@ -756,6 +756,7 @@ class Pointer { /// Whether this points to a block that's been created for a "literal lvalue", /// i.e. a non-MaterializeTemporaryExpr Expr. bool pointsToLiteral() const; + bool pointsToStringLiteral() const; /// Prints the pointer. void print(llvm::raw_ostream &OS) const; diff --git a/clang/test/AST/ByteCode/cxx11.cpp b/clang/test/AST/ByteCode/cxx11.cpp index 44725f13e6a58..b34e7823220e2 100644 --- a/clang/test/AST/ByteCode/cxx11.cpp +++ b/clang/test/AST/ByteCode/cxx11.cpp @@ -260,3 +260,31 @@ namespace ZeroSizeCmp { static_assert(&start != &end, ""); // both-error {{constant expression}} \ // both-note {{comparison of pointers '&start' and '&end' to unrelated zero-sized objects}} } + +namespace OverlappingStrings { + static_assert(+"foo" != +"bar", ""); + static_assert(&"xfoo"[1] != &"yfoo"[1], ""); + static_assert(+"foot" != +"foo", ""); + static_assert(+"foo\0bar" != +"foo\0baz", ""); + + +#define fold(x) (__builtin_constant_p(x) ? (x) : (x)) + static_assert(fold((const char*)u"A" != (const char*)"\0A\0x"), ""); + static_assert(fold((const char*)u"A" != (const char*)"A\0\0x"), ""); + static_assert(fold((const char*)u"AAA" != (const char*)"AAA\0\0x"), ""); + + constexpr const char *string = "hello"; + constexpr const char *also_string = string; + static_assert(string == string, ""); + static_assert(string == also_string, ""); + + + // These strings may overlap, and so the result of the comparison is unknown. + constexpr bool may_overlap_1 = +"foo" == +"foo"; // both-error {{}} both-note {{addresses of potentially overlapping literals}} + constexpr bool may_overlap_2 = +"foo" == +"foo\0bar"; // both-error {{}} both-note {{addresses of potentially overlapping literals}} + constexpr bool may_overlap_3 = +"foo" == &"bar\0foo"[4]; // both-error {{}} both-note {{addresses of potentially overlapping literals}} + constexpr bool may_overlap_4 = &"xfoo"[1] == &"xfoo"[1]; // both-error {{}} both-note {{addresses of potentially overlapping literals}} + + + +} diff --git a/clang/test/AST/ByteCode/cxx20.cpp b/clang/test/AST/ByteCode/cxx20.cpp index 0b2234ef83298..e0fb38e106102 100644 --- a/clang/test/AST/ByteCode/cxx20.cpp +++ b/clang/test/AST/ByteCode/cxx20.cpp @@ -122,8 +122,8 @@ static_assert(!b4); constexpr auto bar(const char *p) { return p + __builtin_strlen(p); } constexpr auto b5 = bar(p1) == p1; static_assert(!b5); -constexpr auto b6 = bar(p1) == ""; // ref-error {{must be initialized by a constant expression}} \ - // ref-note {{comparison of addresses of potentially overlapping literals}} +constexpr auto b6 = bar(p1) == ""; // both-error {{must be initialized by a constant expression}} \ + // both-note {{comparison of addresses of potentially overlapping literals}} constexpr auto b7 = bar(p1) + 1 == ""; // both-error {{must be initialized by a constant expression}} \ // both-note {{comparison against pointer '&"test1"[6]' that points past the end of a complete object has unspecified value}} _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits