https://github.com/tbaederr updated 
https://github.com/llvm/llvm-project/pull/142555

>From a98453657b881d8cacebac99551becf85168649f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbae...@redhat.com>
Date: Tue, 3 Jun 2025 09:24:57 +0200
Subject: [PATCH] [clang][bytecode] Partially address string literal uniqueness

This still leaves the case of the

constexpr auto b3 = name1() == name1();

test from cxx20.cpp broken.
---
 clang/lib/AST/ByteCode/Interp.cpp  | 45 ++++++++++++++++++++++++++++++
 clang/lib/AST/ByteCode/Interp.h    | 15 ++++++++++
 clang/lib/AST/ByteCode/Pointer.cpp | 11 ++++++++
 clang/lib/AST/ByteCode/Pointer.h   |  1 +
 clang/test/AST/ByteCode/cxx11.cpp  | 28 +++++++++++++++++++
 clang/test/AST/ByteCode/cxx20.cpp  |  4 +--
 6 files changed, 102 insertions(+), 2 deletions(-)

diff --git a/clang/lib/AST/ByteCode/Interp.cpp 
b/clang/lib/AST/ByteCode/Interp.cpp
index e454d9e3bc218..19c95b12b9198 100644
--- a/clang/lib/AST/ByteCode/Interp.cpp
+++ b/clang/lib/AST/ByteCode/Interp.cpp
@@ -1979,6 +1979,51 @@ bool DiagTypeid(InterpState &S, CodePtr OpPC) {
   return false;
 }
 
+bool arePotentiallyOverlappingStringLiterals(const Pointer &LHS,
+                                             const Pointer &RHS) {
+  unsigned LHSOffset = LHS.getIndex();
+  unsigned RHSOffset = RHS.getIndex();
+  unsigned LHSLength = (LHS.getNumElems() - 1) * LHS.elemSize();
+  unsigned RHSLength = (RHS.getNumElems() - 1) * RHS.elemSize();
+
+  StringRef LHSStr((const char *)LHS.atIndex(0).getRawAddress(), LHSLength);
+  StringRef RHSStr((const char *)RHS.atIndex(0).getRawAddress(), RHSLength);
+  int32_t IndexDiff = RHSOffset - LHSOffset;
+  if (IndexDiff < 0) {
+    if (static_cast<int32_t>(LHSLength) < -IndexDiff)
+      return false;
+    LHSStr = LHSStr.drop_front(-IndexDiff);
+  } else {
+    if (static_cast<int32_t>(RHSLength) < IndexDiff)
+      return false;
+    RHSStr = RHSStr.drop_front(IndexDiff);
+  }
+
+  unsigned ShorterCharWidth;
+  StringRef Shorter;
+  StringRef Longer;
+  if (LHSLength < RHSLength) {
+    ShorterCharWidth = LHS.elemSize();
+    Shorter = LHSStr;
+    Longer = RHSStr;
+  } else {
+    ShorterCharWidth = RHS.elemSize();
+    Shorter = RHSStr;
+    Longer = LHSStr;
+  }
+
+  // The null terminator isn't included in the string data, so check for it
+  // manually. If the longer string doesn't have a null terminator where the
+  // shorter string ends, they aren't potentially overlapping.
+  for (unsigned NullByte : llvm::seq(ShorterCharWidth)) {
+    if (Shorter.size() + NullByte >= Longer.size())
+      break;
+    if (Longer[Shorter.size() + NullByte])
+      return false;
+  }
+  return Shorter == Longer.take_front(Shorter.size());
+}
+
 // https://github.com/llvm/llvm-project/issues/102513
 #if defined(_MSC_VER) && !defined(__clang__) && !defined(NDEBUG)
 #pragma optimize("", off)
diff --git a/clang/lib/AST/ByteCode/Interp.h b/clang/lib/AST/ByteCode/Interp.h
index 5473733578d7e..1af3bdb42a3b9 100644
--- a/clang/lib/AST/ByteCode/Interp.h
+++ b/clang/lib/AST/ByteCode/Interp.h
@@ -1034,6 +1034,9 @@ static inline bool IsOpaqueConstantCall(const CallExpr 
*E) {
           Builtin == Builtin::BI__builtin_function_start);
 }
 
+bool arePotentiallyOverlappingStringLiterals(const Pointer &LHS,
+                                             const Pointer &RHS);
+
 template <>
 inline bool CmpHelperEQ<Pointer>(InterpState &S, CodePtr OpPC, CompareFn Fn) {
   using BoolT = PrimConv<PT_Bool>::T;
@@ -1068,6 +1071,18 @@ inline bool CmpHelperEQ<Pointer>(InterpState &S, CodePtr 
OpPC, CompareFn Fn) {
     return true;
   }
 
+  // FIXME: The source check here isn't entirely correct.
+  if (LHS.pointsToStringLiteral() && RHS.pointsToStringLiteral() &&
+      LHS.getFieldDesc()->asExpr() != RHS.getFieldDesc()->asExpr()) {
+    if (arePotentiallyOverlappingStringLiterals(LHS, RHS)) {
+      const SourceInfo &Loc = S.Current->getSource(OpPC);
+      S.FFDiag(Loc, diag::note_constexpr_literal_comparison)
+          << LHS.toDiagnosticString(S.getASTContext())
+          << RHS.toDiagnosticString(S.getASTContext());
+      return false;
+    }
+  }
+
   if (Pointer::hasSameBase(LHS, RHS)) {
     if (LHS.inUnion() && RHS.inUnion()) {
       // If the pointers point into a union, things are a little more
diff --git a/clang/lib/AST/ByteCode/Pointer.cpp 
b/clang/lib/AST/ByteCode/Pointer.cpp
index 6c2566ba20bde..50453c72c582b 100644
--- a/clang/lib/AST/ByteCode/Pointer.cpp
+++ b/clang/lib/AST/ByteCode/Pointer.cpp
@@ -571,6 +571,17 @@ bool Pointer::pointsToLiteral() const {
   return E && !isa<MaterializeTemporaryExpr, StringLiteral>(E);
 }
 
+bool Pointer::pointsToStringLiteral() const {
+  if (isZero() || !isBlockPointer())
+    return false;
+
+  if (block()->isDynamic())
+    return false;
+
+  const Expr *E = block()->getDescriptor()->asExpr();
+  return E && isa<StringLiteral>(E);
+}
+
 std::optional<std::pair<Pointer, Pointer>>
 Pointer::computeSplitPoint(const Pointer &A, const Pointer &B) {
   if (!A.isBlockPointer() || !B.isBlockPointer())
diff --git a/clang/lib/AST/ByteCode/Pointer.h b/clang/lib/AST/ByteCode/Pointer.h
index 479da09004685..0234ab02ab8f6 100644
--- a/clang/lib/AST/ByteCode/Pointer.h
+++ b/clang/lib/AST/ByteCode/Pointer.h
@@ -756,6 +756,7 @@ class Pointer {
   /// Whether this points to a block that's been created for a "literal 
lvalue",
   /// i.e. a non-MaterializeTemporaryExpr Expr.
   bool pointsToLiteral() const;
+  bool pointsToStringLiteral() const;
 
   /// Prints the pointer.
   void print(llvm::raw_ostream &OS) const;
diff --git a/clang/test/AST/ByteCode/cxx11.cpp 
b/clang/test/AST/ByteCode/cxx11.cpp
index 44725f13e6a58..b34e7823220e2 100644
--- a/clang/test/AST/ByteCode/cxx11.cpp
+++ b/clang/test/AST/ByteCode/cxx11.cpp
@@ -260,3 +260,31 @@ namespace ZeroSizeCmp {
   static_assert(&start != &end, ""); // both-error {{constant expression}} \
                                      // both-note {{comparison of pointers 
'&start' and '&end' to unrelated zero-sized objects}}
 }
+
+namespace OverlappingStrings {
+  static_assert(+"foo" != +"bar", "");
+  static_assert(&"xfoo"[1] != &"yfoo"[1], "");
+  static_assert(+"foot" != +"foo", "");
+  static_assert(+"foo\0bar" != +"foo\0baz", "");
+
+
+#define fold(x) (__builtin_constant_p(x) ? (x) : (x))
+  static_assert(fold((const char*)u"A" != (const char*)"\0A\0x"), "");
+  static_assert(fold((const char*)u"A" != (const char*)"A\0\0x"), "");
+  static_assert(fold((const char*)u"AAA" != (const char*)"AAA\0\0x"), "");
+
+  constexpr const char *string = "hello";
+  constexpr const char *also_string = string;
+  static_assert(string == string, "");
+  static_assert(string == also_string, "");
+
+
+  // These strings may overlap, and so the result of the comparison is unknown.
+  constexpr bool may_overlap_1 = +"foo" == +"foo"; // both-error {{}} 
both-note {{addresses of potentially overlapping literals}}
+  constexpr bool may_overlap_2 = +"foo" == +"foo\0bar"; // both-error {{}} 
both-note {{addresses of potentially overlapping literals}}
+  constexpr bool may_overlap_3 = +"foo" == &"bar\0foo"[4]; // both-error {{}} 
both-note {{addresses of potentially overlapping literals}}
+  constexpr bool may_overlap_4 = &"xfoo"[1] == &"xfoo"[1]; // both-error {{}} 
both-note {{addresses of potentially overlapping literals}}
+
+
+
+}
diff --git a/clang/test/AST/ByteCode/cxx20.cpp 
b/clang/test/AST/ByteCode/cxx20.cpp
index 0b2234ef83298..e0fb38e106102 100644
--- a/clang/test/AST/ByteCode/cxx20.cpp
+++ b/clang/test/AST/ByteCode/cxx20.cpp
@@ -122,8 +122,8 @@ static_assert(!b4);
 constexpr auto bar(const char *p) { return p + __builtin_strlen(p); }
 constexpr auto b5 = bar(p1) == p1;
 static_assert(!b5);
-constexpr auto b6 = bar(p1) == ""; // ref-error {{must be initialized by a 
constant expression}} \
-                                   // ref-note {{comparison of addresses of 
potentially overlapping literals}}
+constexpr auto b6 = bar(p1) == ""; // both-error {{must be initialized by a 
constant expression}} \
+                                   // both-note {{comparison of addresses of 
potentially overlapping literals}}
 constexpr auto b7 = bar(p1) + 1 == ""; // both-error {{must be initialized by 
a constant expression}} \
                                        // both-note {{comparison against 
pointer '&"test1"[6]' that points past the end of a complete object has 
unspecified value}}
 

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to