https://github.com/ilya-biryukov updated 
https://github.com/llvm/llvm-project/pull/131299

>From 1acb9b0717c0f55e59abca104bbb710375a67610 Mon Sep 17 00:00:00 2001
From: Ilya Biryukov <ibiryu...@google.com>
Date: Fri, 14 Mar 2025 10:53:09 +0100
Subject: [PATCH 1/3] [Format] Do not crash on non-null terminated strings

The `format` API receives a StringRef, but crashes whenever it is
non-null-terminated with the corresponding assertion:

```
FormatTests: llvm/lib/Support/MemoryBuffer.cpp:53:
void llvm::MemoryBuffer::init(const char *, const char *, bool):
Assertion `(!RequiresNullTerminator || BufEnd[0] == 0) && "Buffer is not null 
terminated!"' failed.
```

Ensure this does not happen by storing a copy of the inputs in
`std::string` that does have a null terminator.

This changes requires an extra copy of the `Content` in the
`SourceManagerForFile` APIs, but the costs of that copy should be
negligible in practice as the API is designed for convenience rather
than performance in the first place. E.g. running clang-format over
`Content` is much more expensive than the copy of the Content itself.

This copy could be avoided in most cases if we provide a constructor
that accepts `std::string` or null-terminated strings directly, but it
does not seem worth the effort.

An alternative fix would be to teach `SourceManager` to work with
non-null-terminated buffers, but given how much it is used, this would
be very complicated and is likely to incur some performance cost.
---
 clang/include/clang/Basic/SourceManager.h |  1 +
 clang/lib/Basic/SourceManager.cpp         | 10 ++++++++--
 clang/unittests/Format/FormatTest.cpp     | 11 +++++++++++
 3 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/clang/include/clang/Basic/SourceManager.h 
b/clang/include/clang/Basic/SourceManager.h
index e0f1ea435d54e..ec5803ff46290 100644
--- a/clang/include/clang/Basic/SourceManager.h
+++ b/clang/include/clang/Basic/SourceManager.h
@@ -2031,6 +2031,7 @@ class SourceManagerForFile {
   // The order of these fields are important - they should be in the same order
   // as they are created in `createSourceManagerForFile` so that they can be
   // deleted in the reverse order as they are created.
+  std::string ContentBuffer;
   std::unique_ptr<FileManager> FileMgr;
   std::unique_ptr<DiagnosticsEngine> Diagnostics;
   std::unique_ptr<SourceManager> SourceMgr;
diff --git a/clang/lib/Basic/SourceManager.cpp 
b/clang/lib/Basic/SourceManager.cpp
index b1f2180c1d462..4e351ec9089a9 100644
--- a/clang/lib/Basic/SourceManager.cpp
+++ b/clang/lib/Basic/SourceManager.cpp
@@ -2382,14 +2382,20 @@ size_t SourceManager::getDataStructureSizes() const {
 
 SourceManagerForFile::SourceManagerForFile(StringRef FileName,
                                            StringRef Content) {
+  // We copy to `std::string` for Context instead of StringRef because the
+  // SourceManager::getBufferData() works only with null-terminated buffers.
+  // And we still want to keep the API convenient.
+  ContentBuffer = Content.str();
+
   // This is referenced by `FileMgr` and will be released by `FileMgr` when it
   // is deleted.
   IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFileSystem(
       new llvm::vfs::InMemoryFileSystem);
+
   InMemoryFileSystem->addFile(
       FileName, 0,
-      llvm::MemoryBuffer::getMemBuffer(Content, FileName,
-                                       /*RequiresNullTerminator=*/false));
+      llvm::MemoryBuffer::getMemBuffer(ContentBuffer, FileName,
+                                       /*RequiresNullTerminator=*/true));
   // This is passed to `SM` as reference, so the pointer has to be referenced
   // in `Environment` so that `FileMgr` can out-live this function scope.
   FileMgr =
diff --git a/clang/unittests/Format/FormatTest.cpp 
b/clang/unittests/Format/FormatTest.cpp
index 9864e7ec1b2ec..54d0b13ab35c0 100644
--- a/clang/unittests/Format/FormatTest.cpp
+++ b/clang/unittests/Format/FormatTest.cpp
@@ -29096,6 +29096,17 @@ TEST_F(FormatTest, BreakBeforeClassName) {
                "    ArenaSafeUniquePtr {};");
 }
 
+TEST_F(FormatTest, DoesNotCrashOnNonNullTerminatedStringRefs) {
+  llvm::StringRef TwoLines = "namespace foo {}\n"
+                             "namespace bar {}";
+  llvm::StringRef FirstLine =
+      TwoLines.take_until([](char c) { return c == '\n'; });
+
+  // The internal API used to crash when passed a non-null-terminated 
StringRef.
+  // Check this does not happen anymore.
+  verifyFormat(FirstLine);
+}
+
 } // namespace
 } // namespace test
 } // namespace format

>From 9790cff796aee8f8cd40751a6be8bd7ded3c60a4 Mon Sep 17 00:00:00 2001
From: Ilya Biryukov <ibiryu...@google.com>
Date: Tue, 18 Mar 2025 19:01:56 +0100
Subject: [PATCH 2/3] fixup! [Format] Do not crash on non-null terminated
 strings

Use `verifyNoCrash`
---
 clang/unittests/Format/FormatTest.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/unittests/Format/FormatTest.cpp 
b/clang/unittests/Format/FormatTest.cpp
index 54d0b13ab35c0..1b8aacc79816c 100644
--- a/clang/unittests/Format/FormatTest.cpp
+++ b/clang/unittests/Format/FormatTest.cpp
@@ -29104,7 +29104,7 @@ TEST_F(FormatTest, 
DoesNotCrashOnNonNullTerminatedStringRefs) {
 
   // The internal API used to crash when passed a non-null-terminated 
StringRef.
   // Check this does not happen anymore.
-  verifyFormat(FirstLine);
+  verifyNoCrash(FirstLine);
 }
 
 } // namespace

>From ab056ca2ff3abd1577ebd64c903bf7061199b26c Mon Sep 17 00:00:00 2001
From: Ilya Biryukov <ibiryu...@google.com>
Date: Mon, 24 Mar 2025 12:18:34 +0100
Subject: [PATCH 3/3] remove redundant llvm:: prefix

---
 clang/unittests/Format/FormatTest.cpp | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/clang/unittests/Format/FormatTest.cpp 
b/clang/unittests/Format/FormatTest.cpp
index 1b8aacc79816c..af9107d0e5bf9 100644
--- a/clang/unittests/Format/FormatTest.cpp
+++ b/clang/unittests/Format/FormatTest.cpp
@@ -29097,10 +29097,9 @@ TEST_F(FormatTest, BreakBeforeClassName) {
 }
 
 TEST_F(FormatTest, DoesNotCrashOnNonNullTerminatedStringRefs) {
-  llvm::StringRef TwoLines = "namespace foo {}\n"
-                             "namespace bar {}";
-  llvm::StringRef FirstLine =
-      TwoLines.take_until([](char c) { return c == '\n'; });
+  StringRef TwoLines = "namespace foo {}\n"
+                       "namespace bar {}";
+  StringRef FirstLine = TwoLines.take_until([](char c) { return c == '\n'; });
 
   // The internal API used to crash when passed a non-null-terminated 
StringRef.
   // Check this does not happen anymore.

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to