https://github.com/ziqingluo-90 created https://github.com/llvm/llvm-project/pull/92031
A pair of `#pragma clang unsafe_buffer_usage begin/end` pragmas marks a warning-opt-out region. The begin and end locations (opt-out regions) are stored in preprocessor instances (PP) and will be queried by the `-Wunsafe-buffer-usage` analyzer. The commit adds serialization and de-serialization implementations for the stored regions. Basically, the serialized representation of the regions of a PP is a (ordered) sequence of source location encodings. During serialization, it only serializes regions of the current translation unit. Regions from loaded files are not serialized. During de-serialization, regions from loaded files are stored by their source files. When later one queries if a loaded location `l` is in an opt-out region, PP looks up regions by the source file of `l`. The reported issue at upstream: https://github.com/llvm/llvm-project/issues/90501 rdar://124035402 >From ac5aeb5c3a134d085320fc7fc5cf3f2c8c41a1f1 Mon Sep 17 00:00:00 2001 From: ziqingluo-90 <ziqing_...@apple.com> Date: Mon, 13 May 2024 13:31:21 -0700 Subject: [PATCH] fix safe buffer opt-out region serialization --- clang/include/clang/Lex/Preprocessor.h | 22 +++- .../include/clang/Serialization/ASTBitCodes.h | 3 + clang/lib/Lex/Preprocessor.cpp | 106 ++++++++++++++---- clang/lib/Serialization/ASTReader.cpp | 11 ++ clang/lib/Serialization/ASTWriter.cpp | 7 ++ clang/test/Modules/Inputs/SafeBuffers/base.h | 9 ++ .../SafeBuffers/safe_buffers_test.modulemap | 10 ++ .../Modules/Inputs/SafeBuffers/test_sub1.h | 20 ++++ .../Modules/Inputs/SafeBuffers/test_sub2.h | 11 ++ clang/test/Modules/safe_buffers_optout.cpp | 39 +++++++ ...unsafe-buffer-usage-pragma-pch-complex.cpp | 72 ++++++++++++ .../warn-unsafe-buffer-usage-pragma-pch.cpp | 27 +++++ 12 files changed, 314 insertions(+), 23 deletions(-) create mode 100644 clang/test/Modules/Inputs/SafeBuffers/base.h create mode 100644 clang/test/Modules/Inputs/SafeBuffers/safe_buffers_test.modulemap create mode 100644 clang/test/Modules/Inputs/SafeBuffers/test_sub1.h create mode 100644 clang/test/Modules/Inputs/SafeBuffers/test_sub2.h create mode 100644 clang/test/Modules/safe_buffers_optout.cpp create mode 100644 clang/test/SemaCXX/warn-unsafe-buffer-usage-pragma-pch-complex.cpp create mode 100644 clang/test/SemaCXX/warn-unsafe-buffer-usage-pragma-pch.cpp diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h index e89b4a2c5230e..8d6884ebe7597 100644 --- a/clang/include/clang/Lex/Preprocessor.h +++ b/clang/include/clang/Lex/Preprocessor.h @@ -2883,11 +2883,15 @@ class Preprocessor { /// otherwise. SourceLocation CurrentSafeBufferOptOutStart; // It is used to report the start location of an never-closed region. - // An ordered sequence of "-Wunsafe-buffer-usage" opt-out regions in one - // translation unit. Each region is represented by a pair of start and end - // locations. A region is "open" if its' start and end locations are + using SafeBufferOptOutMapTy = + SmallVector<std::pair<SourceLocation, SourceLocation>, 16>; + // An ordered sequence of "-Wunsafe-buffer-usage" opt-out regions in this + // translation unit. Each region is represented by a pair of start and + // end locations. A region is "open" if its' start and end locations are // identical. - SmallVector<std::pair<SourceLocation, SourceLocation>, 8> SafeBufferOptOutMap; + SafeBufferOptOutMapTy SafeBufferOptOutMap; + // `SafeBufferOptOutMap`s of loaded files: + llvm::DenseMap<FileID, SafeBufferOptOutMapTy> LoadedSafeBufferOptOutMap; public: /// \return true iff the given `Loc` is in a "-Wunsafe-buffer-usage" opt-out @@ -2918,6 +2922,16 @@ class Preprocessor { /// opt-out region bool isPPInSafeBufferOptOutRegion(SourceLocation &StartLoc); + /// \return a sequence of SourceLocations representing ordered opt-out regions + /// specified by + /// `\#pragma clang unsafe_buffer_usage begin/end`s of this translation unit. + SmallVector<SourceLocation, 64> serializeSafeBufferOptOutMap() const; + + /// \param SrcLocSeqs a sequence of SourceLocations deserialized from a + /// record of code `PP_UNSAFE_BUFFER_USAGE`. + void setDeserializedSafeBufferOptOutMap( + const SmallVectorImpl<SourceLocation> &SrcLocSeqs); + private: /// Helper functions to forward lexing to the actual lexer. They all share the /// same signature. diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h index dcfa4ac0c1967..d1a0eba943039 100644 --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -775,6 +775,9 @@ enum ASTRecordTypes { /// Record code for lexical and visible block for delayed namespace in /// reduced BMI. DELAYED_NAMESPACE_LEXICAL_VISIBLE_RECORD = 68, + + /// Record code for \#pragma clang unsafe_buffer_usage begin/end + PP_UNSAFE_BUFFER_USAGE = 69, }; /// Record types used within a source manager block. diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp index 0b70192743a39..6a41e3d4138aa 100644 --- a/clang/lib/Lex/Preprocessor.cpp +++ b/clang/lib/Lex/Preprocessor.cpp @@ -58,6 +58,7 @@ #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/Support/Capacity.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MemoryBuffer.h" @@ -1483,26 +1484,41 @@ void Preprocessor::emitFinalMacroWarning(const Token &Identifier, } bool Preprocessor::isSafeBufferOptOut(const SourceManager &SourceMgr, - const SourceLocation &Loc) const { - // Try to find a region in `SafeBufferOptOutMap` where `Loc` is in: - auto FirstRegionEndingAfterLoc = llvm::partition_point( - SafeBufferOptOutMap, - [&SourceMgr, - &Loc](const std::pair<SourceLocation, SourceLocation> &Region) { - return SourceMgr.isBeforeInTranslationUnit(Region.second, Loc); - }); + const SourceLocation &Loc) const { + // The lambda that tests if a `Loc` is in an opt-out region given one opt-out + // region map: + auto TestInMap = [&SourceMgr](const SafeBufferOptOutMapTy &Map, + const SourceLocation &Loc) -> bool { + // Try to find a region in `SafeBufferOptOutMap` where `Loc` is in: + auto FirstRegionEndingAfterLoc = llvm::partition_point( + Map, [&SourceMgr, + &Loc](const std::pair<SourceLocation, SourceLocation> &Region) { + return SourceMgr.isBeforeInTranslationUnit(Region.second, Loc); + }); + + if (FirstRegionEndingAfterLoc != Map.end()) { + // To test if the start location of the found region precedes `Loc`: + return SourceMgr.isBeforeInTranslationUnit( + FirstRegionEndingAfterLoc->first, Loc); + } + // If we do not find a region whose end location passes `Loc`, we want to + // check if the current region is still open: + if (!Map.empty() && Map.back().first == Map.back().second) + return SourceMgr.isBeforeInTranslationUnit(Map.back().first, Loc); + return false; + }; - if (FirstRegionEndingAfterLoc != SafeBufferOptOutMap.end()) { - // To test if the start location of the found region precedes `Loc`: - return SourceMgr.isBeforeInTranslationUnit(FirstRegionEndingAfterLoc->first, - Loc); - } - // If we do not find a region whose end location passes `Loc`, we want to - // check if the current region is still open: - if (!SafeBufferOptOutMap.empty() && - SafeBufferOptOutMap.back().first == SafeBufferOptOutMap.back().second) - return SourceMgr.isBeforeInTranslationUnit(SafeBufferOptOutMap.back().first, - Loc); + if (SourceMgr.isLocalSourceLocation(Loc)) + return TestInMap(SafeBufferOptOutMap, Loc); + + // Expand macros (if any) until it gets to a file location: + SourceLocation FLoc = SourceMgr.getFileLoc(Loc); + FileID FlocFID = SourceMgr.getDecomposedLoc(FLoc).first; + auto LoadedMap = LoadedSafeBufferOptOutMap.find(FlocFID); + + if (LoadedMap != LoadedSafeBufferOptOutMap.end()) + return TestInMap(LoadedMap->getSecond(), Loc); + // FIXME: think out if this is unreachable? return false; } @@ -1551,6 +1567,58 @@ bool Preprocessor::isPPInSafeBufferOptOutRegion(SourceLocation &StartLoc) { return InSafeBufferOptOutRegion; } +SmallVector<SourceLocation, 64> +Preprocessor::serializeSafeBufferOptOutMap() const { + assert(!InSafeBufferOptOutRegion && + "Attempt to serialize safe buffer opt-out regions before file being " + "completely preprocessed"); + + SmallVector<SourceLocation, 64> SrcSeq; + + for (const auto &[begin, end] : SafeBufferOptOutMap) { + SrcSeq.push_back(begin); + SrcSeq.push_back(end); + } + // Only `SafeBufferOptOutMap` gets serialized. No need to serialize + // `LoadedSafeBufferOptOutMap` because if this TU loads a pch/module, every + // pch/module in the pch-chain/module-DAG will be loaded one by one in order. + // It means that for each loading pch/module m, it just needs to load m's own + // `SafeBufferOptOutMap`. + return SrcSeq; +} + +void Preprocessor::setDeserializedSafeBufferOptOutMap( + const SmallVectorImpl<SourceLocation> &SourceLocations) { + auto It = SourceLocations.begin(); + + assert(SourceLocations.size() % 2 == 0 && + "ill-formed SourceLocation sequence"); + while (It != SourceLocations.end()) { + SourceLocation begin = *It++; + SourceLocation end = *It++; + SourceLocation FileLoc = SourceMgr.getFileLoc(begin); + FileID FID = SourceMgr.getDecomposedLoc(FileLoc).first; + + if (FID.isInvalid()) { + // I suppose this should not happen: + assert(false && "Attempted to read a safe buffer opt-out region whose " + "begin location is associated to an invalid File ID."); + break; + } + assert(!SourceMgr.isLocalFileID(FID) && "Expected a pch/module file"); + // Here we assume that + // `SourceMgr.getFileLoc(begin) == SourceMgr.getFileLoc(end)`. + // Though it may not hold in very rare and strange cases, i.e., a pair of + // opt-out pragams written in different files but are always used in a way + // that they match in a TU (otherwise PP will complaint). + // FIXME: PP should complaint about cross file safe buffer opt-out pragmas. + assert(FID == SourceMgr.getDecomposedLoc(SourceMgr.getFileLoc(end)).first && + "Maybe a safe buffer opt-out region starts and ends at different " + "files."); + LoadedSafeBufferOptOutMap[FID].emplace_back(begin, end); + } +} + ModuleLoader::~ModuleLoader() = default; CommentHandler::~CommentHandler() = default; diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 43b69045bb054..4e22b3e1187c0 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -3586,6 +3586,17 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F, break; } + case PP_UNSAFE_BUFFER_USAGE: { + if (!Record.empty()) { + SmallVector<SourceLocation, 64> SrcLocs; + unsigned Idx = 0; + while (Idx < Record.size()) + SrcLocs.push_back(ReadSourceLocation(F, Record, Idx)); + PP.setDeserializedSafeBufferOptOutMap(SrcLocs); + } + break; + } + case PP_CONDITIONAL_STACK: if (!Record.empty()) { unsigned Idx = 0, End = Record.size() - 1; diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index 30195868ca996..3203173640d18 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -911,6 +911,7 @@ void ASTWriter::WriteBlockInfoBlock() { RECORD(PP_CONDITIONAL_STACK); RECORD(DECLS_TO_CHECK_FOR_DEFERRED_DIAGS); RECORD(PP_ASSUME_NONNULL_LOC); + RECORD(PP_UNSAFE_BUFFER_USAGE); // SourceManager Block. BLOCK(SOURCE_MANAGER_BLOCK); @@ -2439,6 +2440,12 @@ void ASTWriter::WritePreprocessor(const Preprocessor &PP, bool IsModule) { Record.clear(); } + // Write the safe buffer opt-out region map in PP + for (SourceLocation &S : PP.serializeSafeBufferOptOutMap()) + AddSourceLocation(std::move(S), Record); + Stream.EmitRecord(PP_UNSAFE_BUFFER_USAGE, Record); + Record.clear(); + // Enter the preprocessor block. Stream.EnterSubblock(PREPROCESSOR_BLOCK_ID, 3); diff --git a/clang/test/Modules/Inputs/SafeBuffers/base.h b/clang/test/Modules/Inputs/SafeBuffers/base.h new file mode 100644 index 0000000000000..660df17998a48 --- /dev/null +++ b/clang/test/Modules/Inputs/SafeBuffers/base.h @@ -0,0 +1,9 @@ +#ifdef __cplusplus +int base(int *p) { + int x = p[5]; +#pragma clang unsafe_buffer_usage begin + int y = p[5]; +#pragma clang unsafe_buffer_usage end + return x + y; +} +#endif diff --git a/clang/test/Modules/Inputs/SafeBuffers/safe_buffers_test.modulemap b/clang/test/Modules/Inputs/SafeBuffers/safe_buffers_test.modulemap new file mode 100644 index 0000000000000..a4826c6514b98 --- /dev/null +++ b/clang/test/Modules/Inputs/SafeBuffers/safe_buffers_test.modulemap @@ -0,0 +1,10 @@ +module safe_buffers_test_base { + header "base.h" +} + +module safe_buffers_test_optout { + explicit module test_sub1 { header "test_sub1.h" } + explicit module test_sub2 { textual header "test_sub2.h" } + use safe_buffers_test_base +} + diff --git a/clang/test/Modules/Inputs/SafeBuffers/test_sub1.h b/clang/test/Modules/Inputs/SafeBuffers/test_sub1.h new file mode 100644 index 0000000000000..c2b105c6e748a --- /dev/null +++ b/clang/test/Modules/Inputs/SafeBuffers/test_sub1.h @@ -0,0 +1,20 @@ +#include "base.h" + +#ifdef __cplusplus +int sub1(int *p) { + int x = p[5]; +#pragma clang unsafe_buffer_usage begin + int y = p[5]; +#pragma clang unsafe_buffer_usage end + return x + y; +} + +template <typename T> +T sub1_T(T *p) { + T x = p[5]; +#pragma clang unsafe_buffer_usage begin + T y = p[5]; +#pragma clang unsafe_buffer_usage end + return x + y; +} +#endif diff --git a/clang/test/Modules/Inputs/SafeBuffers/test_sub2.h b/clang/test/Modules/Inputs/SafeBuffers/test_sub2.h new file mode 100644 index 0000000000000..11eb9591ab600 --- /dev/null +++ b/clang/test/Modules/Inputs/SafeBuffers/test_sub2.h @@ -0,0 +1,11 @@ +#include "base.h" + +#ifdef __cplusplus +int sub2(int *p) { + int x = p[5]; +#pragma clang unsafe_buffer_usage begin + int y = p[5]; +#pragma clang unsafe_buffer_usage end + return x + y; +} +#endif diff --git a/clang/test/Modules/safe_buffers_optout.cpp b/clang/test/Modules/safe_buffers_optout.cpp new file mode 100644 index 0000000000000..dd8b1cca5ec89 --- /dev/null +++ b/clang/test/Modules/safe_buffers_optout.cpp @@ -0,0 +1,39 @@ +// RUN: rm -rf %t +// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -emit-module -fmodule-name=safe_buffers_test_base -x c++ %S/Inputs/SafeBuffers/safe_buffers_test.modulemap -std=c++20\ +// RUN: -o %t/safe_buffers_test_base.pcm -Wunsafe-buffer-usage +// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -emit-module -fmodule-name=safe_buffers_test_optout -x c++ %S/Inputs/SafeBuffers/safe_buffers_test.modulemap -std=c++20\ +// RUN: -o %t/safe_buffers_test_optout.pcm -fmodule-file=%t/safe_buffers_test_base.pcm -Wunsafe-buffer-usage +// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -verify -fmodule-file=%t/safe_buffers_test_optout.pcm -I %S/Inputs/SafeBuffers %s\ +// RUN: -std=c++20 -Wunsafe-buffer-usage + +// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -emit-module -fmodules-cache-path=%t -fmodule-name=safe_buffers_test_base -x c++\ +// RUN: %S/Inputs/SafeBuffers/safe_buffers_test.modulemap -std=c++20 -Wunsafe-buffer-usage +// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -emit-module -fmodules-cache-path=%t -fmodule-name=safe_buffers_test_optout -x c++\ +// RUN: %S/Inputs/SafeBuffers/safe_buffers_test.modulemap -std=c++20 -Wunsafe-buffer-usage +// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -verify -fmodules-cache-path=%t -I %S/Inputs/SafeBuffers %s -x c++\ +// RUN: -std=c++20 -Wunsafe-buffer-usage + +#include "test_sub1.h" +#include "test_sub2.h" + +// Testing safe buffers opt-out region serialization with modules: this +// file loads 2 submodules from top-level module +// `safe_buffers_test_optout`, which uses another top-level module +// `safe_buffers_test_base`. (So the module dependencies form a DAG.) + +// expected-warning@base.h:3{{unsafe buffer access}} +// expected-note@base.h:3{{pass -fsafe-buffer-usage-suggestions to receive code hardening suggestions}} +// expected-warning@test_sub1.h:5{{unsafe buffer access}} +// expected-note@test_sub1.h:5{{pass -fsafe-buffer-usage-suggestions to receive code hardening suggestions}} +// expected-warning@test_sub1.h:14{{unsafe buffer access}} +// expected-note@test_sub1.h:14{{pass -fsafe-buffer-usage-suggestions to receive code hardening suggestions}} +// expected-warning@test_sub2.h:5{{unsafe buffer access}} +// expected-note@test_sub2.h:5{{pass -fsafe-buffer-usage-suggestions to receive code hardening suggestions}} +int foo(int * p) { + int x = p[5]; // expected-warning{{unsafe buffer access}} expected-note{{pass -fsafe-buffer-usage-suggestions to receive code hardening suggestions}} +#pragma clang unsafe_buffer_usage begin + int y = p[5]; +#pragma clang unsafe_buffer_usage end + sub1_T(p); // instantiate template + return base(p) + sub1(p) + sub2(p); +} diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-pragma-pch-complex.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-pragma-pch-complex.cpp new file mode 100644 index 0000000000000..04ac77b3f342e --- /dev/null +++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-pragma-pch-complex.cpp @@ -0,0 +1,72 @@ +// A more complex example than warn-unsafe-buffer-usage-pragma-pch.cpp: +// MAIN - includes INC_H_1 +// \ loads PCH_H_1 - includes INC_H_2 +// \ loads PCH_H_2 + +// Test with PCH +// RUN: %clang_cc1 -Wno-unused-value -std=c++20 -emit-pch -o %t-1 -DPCH_H_2 %s +// RUN: %clang_cc1 -Wno-unused-value -std=c++20 -include-pch %t-1 -emit-pch -o %t-2 -DPCH_H_1 %s +// RUN: %clang_cc1 -Wno-unused-value -Wunsafe-buffer-usage -std=c++20 -include-pch %t-2 -DMAIN -verify %s + +#define UNSAFE_BEGIN _Pragma("clang unsafe_buffer_usage begin") +#define UNSAFE_END _Pragma("clang unsafe_buffer_usage end") + + +#ifdef INC_H_1 +#undef INC_H_1 +int a(int *s) { + s[2]; // <- expected warning here +#pragma clang unsafe_buffer_usage begin + return s[1]; +#pragma clang unsafe_buffer_usage end +} +#endif + +#ifdef INC_H_2 +#undef INC_H_2 +int b(int *s) { + s[2]; // <- expected warning here +#pragma clang unsafe_buffer_usage begin + return s[1]; +#pragma clang unsafe_buffer_usage end +} +#endif + +#ifdef PCH_H_1 +#undef PCH_H_1 +#define INC_H_2 +#include "warn-unsafe-buffer-usage-pragma-pch-complex.cpp" + +int c(int *s) { + s[2]; // <- expected warning here +UNSAFE_BEGIN + return s[1]; +UNSAFE_END +} +#endif + +#ifdef PCH_H_2 +#undef PCH_H_2 +int d(int *s) { + s[2]; // <- expected warning here +#pragma clang unsafe_buffer_usage begin + return s[1]; +#pragma clang unsafe_buffer_usage end +} +#endif + +#ifdef MAIN +#undef MAIN +#define INC_H_1 +#include "warn-unsafe-buffer-usage-pragma-pch-complex.cpp" + +// expected-warning@-45{{unsafe buffer access}} expected-note@-45{{pass -fsafe-buffer-usage-suggestions to receive code hardening suggestions}} +// expected-warning@-36{{unsafe buffer access}} expected-note@-36{{pass -fsafe-buffer-usage-suggestions to receive code hardening suggestions}} +// expected-warning@-24{{unsafe buffer access}} expected-note@-24{{pass -fsafe-buffer-usage-suggestions to receive code hardening suggestions}} +// expected-warning@-15{{unsafe buffer access}} expected-note@-15{{pass -fsafe-buffer-usage-suggestions to receive code hardening suggestions}} +int main() { + int s[] = {1, 2, 3}; + return a(s) + b(s) + c(s) + d(s); +} +#undef MAIN +#endif diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-pragma-pch.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-pragma-pch.cpp new file mode 100644 index 0000000000000..abd3f0ffe9565 --- /dev/null +++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-pragma-pch.cpp @@ -0,0 +1,27 @@ +// The original example from https://github.com/llvm/llvm-project/issues/90501 + +// Test without PCH +// RUN: %clang_cc1 -Wno-unused-value -Wunsafe-buffer-usage -std=c++20 -include %s -verify %s +// Test with PCH +// RUN: %clang_cc1 -Wno-unused-value -std=c++20 -emit-pch -o %t %s +// RUN: %clang_cc1 -Wno-unused-value -Wunsafe-buffer-usage -std=c++20 -include-pch %t -verify %s + +#ifndef A_H +#define A_H + +int a(int *s) { + s[2]; // <- expected warning here +#pragma clang unsafe_buffer_usage begin + return s[1]; +#pragma clang unsafe_buffer_usage end +} + +#else +// expected-warning@-7{{unsafe buffer access}} +// expected-note@-8{{pass -fsafe-buffer-usage-suggestions to receive code hardening suggestions}} +int main() { + int s[] = {1, 2, 3}; + return a(s); +} + +#endif _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits