akyrtzi created this revision. Herald added a subscriber: mgorny. Herald added a project: All. akyrtzi requested review of this revision. Herald added a project: clang. Herald added a subscriber: cfe-commits.
Directive `dependency_directives_scan::tokens_present_before_eof` is introduced to indicate there were tokens present before the last scanned dependency directive and EOF. This is useful to ensure we correctly identify the macro guards when lexing using the dependency directives. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D133357 Files: clang/include/clang/Lex/DependencyDirectivesScanner.h clang/lib/Lex/DependencyDirectivesScanner.cpp clang/lib/Lex/Lexer.cpp clang/unittests/Lex/CMakeLists.txt clang/unittests/Lex/DependencyDirectivesScannerTest.cpp clang/unittests/Lex/PPDependencyDirectivesTest.cpp
Index: clang/unittests/Lex/PPDependencyDirectivesTest.cpp =================================================================== --- /dev/null +++ clang/unittests/Lex/PPDependencyDirectivesTest.cpp @@ -0,0 +1,150 @@ +//===- unittests/Lex/PPDependencyDirectivesTest.cpp -------------------------=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Basic/Diagnostic.h" +#include "clang/Basic/DiagnosticOptions.h" +#include "clang/Basic/FileManager.h" +#include "clang/Basic/LangOptions.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Basic/TargetInfo.h" +#include "clang/Basic/TargetOptions.h" +#include "clang/Lex/DependencyDirectivesScanner.h" +#include "clang/Lex/HeaderSearch.h" +#include "clang/Lex/HeaderSearchOptions.h" +#include "clang/Lex/ModuleLoader.h" +#include "clang/Lex/Preprocessor.h" +#include "clang/Lex/PreprocessorOptions.h" +#include "llvm/Testing/Support/Error.h" +#include "gtest/gtest.h" + +using namespace clang; + +namespace { + +// The test fixture. +class PPDependencyDirectivesTest : public ::testing::Test { +protected: + PPDependencyDirectivesTest() + : FileMgr(FileMgrOpts), DiagID(new DiagnosticIDs()), + Diags(DiagID, new DiagnosticOptions, new IgnoringDiagConsumer()), + SourceMgr(Diags, FileMgr), TargetOpts(new TargetOptions) { + TargetOpts->Triple = "x86_64-apple-macos12"; + Target = TargetInfo::CreateTargetInfo(Diags, TargetOpts); + } + + FileSystemOptions FileMgrOpts; + FileManager FileMgr; + IntrusiveRefCntPtr<DiagnosticIDs> DiagID; + DiagnosticsEngine Diags; + SourceManager SourceMgr; + LangOptions LangOpts; + std::shared_ptr<TargetOptions> TargetOpts; + IntrusiveRefCntPtr<TargetInfo> Target; +}; + +class IncludeCollector : public PPCallbacks { +public: + Preprocessor &PP; + SmallVectorImpl<StringRef> &IncludedFiles; + + IncludeCollector(Preprocessor &PP, SmallVectorImpl<StringRef> &IncludedFiles) + : PP(PP), IncludedFiles(IncludedFiles) {} + + void LexedFileChanged(FileID FID, LexedFileChangeReason Reason, + SrcMgr::CharacteristicKind FileType, FileID PrevFID, + SourceLocation Loc) override { + if (Reason != LexedFileChangeReason::EnterFile) + return; + if (FID == PP.getPredefinesFileID()) + return; + StringRef Filename = + PP.getSourceManager().getSLocEntry(FID).getFile().getName(); + IncludedFiles.push_back(Filename); + } +}; + +TEST_F(PPDependencyDirectivesTest, MacroGuard) { + // "head1.h" has a macro guard and should only be included once. + // "head2.h" and "head3.h" have tokens following the macro check, they should + // be included multiple times. + + auto VFS = new llvm::vfs::InMemoryFileSystem(); + VFS->addFile( + "head1.h", 0, + llvm::MemoryBuffer::getMemBuffer("#ifndef H1_H\n#define H1_H\n#endif\n")); + VFS->addFile( + "head2.h", 0, + llvm::MemoryBuffer::getMemBuffer("#ifndef H2_H\n#define H2_H\n#endif\n\n" + "extern int foo;\n")); + VFS->addFile("head3.h", 0, + llvm::MemoryBuffer::getMemBuffer( + "#ifndef H3_H\n#define H3_H\n#endif\n\n" + "#ifdef SOMEMAC\nextern int foo;\n#endif\n")); + VFS->addFile("main.c", 0, + llvm::MemoryBuffer::getMemBuffer( + "#include \"head1.h\"\n#include \"head1.h\"\n" + "#include \"head2.h\"\n#include \"head2.h\"\n" + "#include \"head3.h\"\n#include \"head3.h\"\n")); + FileMgr.setVirtualFileSystem(VFS); + + Optional<FileEntryRef> FE; + ASSERT_THAT_ERROR(FileMgr.getFileRef("main.c").moveInto(FE), + llvm::Succeeded()); + SourceMgr.setMainFileID( + SourceMgr.createFileID(*FE, SourceLocation(), SrcMgr::C_User)); + + struct DepDirectives { + SmallVector<dependency_directives_scan::Token> Tokens; + SmallVector<dependency_directives_scan::Directive> Directives; + }; + SmallVector<std::unique_ptr<DepDirectives>> DepDirectivesObjects; + + auto getDependencyDirectives = [&](FileEntryRef File) + -> Optional<ArrayRef<dependency_directives_scan::Directive>> { + DepDirectivesObjects.push_back(std::make_unique<DepDirectives>()); + StringRef Input = (*FileMgr.getBufferForFile(File))->getBuffer(); + SmallVector<dependency_directives_scan::Token> Tokens; + SmallVector<dependency_directives_scan::Directive> Directives; + bool Err = scanSourceForDependencyDirectives( + Input, DepDirectivesObjects.back()->Tokens, + DepDirectivesObjects.back()->Directives); + EXPECT_FALSE(Err); + return llvm::makeArrayRef(DepDirectivesObjects.back()->Directives); + }; + + auto PPOpts = std::make_shared<PreprocessorOptions>(); + PPOpts->DependencyDirectivesForFile = [&](FileEntryRef File) + -> Optional<ArrayRef<dependency_directives_scan::Directive>> { + return getDependencyDirectives(File); + }; + + TrivialModuleLoader ModLoader; + HeaderSearch HeaderInfo(std::make_shared<HeaderSearchOptions>(), SourceMgr, + Diags, LangOpts, Target.get()); + Preprocessor PP(PPOpts, Diags, LangOpts, SourceMgr, HeaderInfo, ModLoader, + /*IILookup =*/nullptr, + /*OwnsHeaderSearch =*/false); + PP.Initialize(*Target); + + SmallVector<StringRef> IncludedFiles; + PP.addPPCallbacks(std::make_unique<IncludeCollector>(PP, IncludedFiles)); + PP.EnterMainSourceFile(); + while (true) { + Token tok; + PP.Lex(tok); + if (tok.is(tok::eof)) + break; + } + + SmallVector<StringRef> ExpectedIncludes{ + "main.c", "./head1.h", "./head2.h", "./head2.h", "./head3.h", "./head3.h", + }; + EXPECT_EQ(IncludedFiles, ExpectedIncludes); +} + +} // anonymous namespace Index: clang/unittests/Lex/DependencyDirectivesScannerTest.cpp =================================================================== --- clang/unittests/Lex/DependencyDirectivesScannerTest.cpp +++ clang/unittests/Lex/DependencyDirectivesScannerTest.cpp @@ -25,7 +25,8 @@ return true; raw_svector_ostream OS(Out); - printDependencyDirectivesAsSource(Input, Directives, OS); + printDependencyDirectivesAsSource(Input, Directives, OS, + /*PrintMarkerForTokensBeforeEOF*/ true); if (!Out.empty() && Out.back() != '\n') Out.push_back('\n'); Out.push_back('\0'); @@ -57,10 +58,11 @@ ASSERT_FALSE(minimizeSourceToDependencyDirectives("abc def\nxyz", Out, Tokens, Directives)); - EXPECT_TRUE(Out.empty()); + EXPECT_STREQ("<TokBEOF>\n", Out.data()); EXPECT_TRUE(Tokens.empty()); - ASSERT_EQ(1u, Directives.size()); - ASSERT_EQ(pp_eof, Directives.back().Kind); + ASSERT_EQ(2u, Directives.size()); + EXPECT_EQ(tokens_present_before_eof, Directives[0].Kind); + EXPECT_EQ(pp_eof, Directives[1].Kind); } TEST(MinimizeSourceToDependencyDirectivesTest, AllTokens) { @@ -451,7 +453,7 @@ SmallVector<char, 128> Out; ASSERT_FALSE(minimizeSourceToDependencyDirectives("#pragma A\n", Out)); - EXPECT_STREQ("", Out.data()); + EXPECT_STREQ("<TokBEOF>\n", Out.data()); ASSERT_FALSE(minimizeSourceToDependencyDirectives( "#pragma push_macro(\"MACRO\")\n", Out)); @@ -470,15 +472,15 @@ EXPECT_STREQ("#pragma include_alias(<A>, <B>)\n", Out.data()); ASSERT_FALSE(minimizeSourceToDependencyDirectives("#pragma clang\n", Out)); - EXPECT_STREQ("", Out.data()); + EXPECT_STREQ("<TokBEOF>\n", Out.data()); ASSERT_FALSE( minimizeSourceToDependencyDirectives("#pragma clang module\n", Out)); - EXPECT_STREQ("", Out.data()); + EXPECT_STREQ("<TokBEOF>\n", Out.data()); ASSERT_FALSE(minimizeSourceToDependencyDirectives( "#pragma clang module impor\n", Out)); - EXPECT_STREQ("", Out.data()); + EXPECT_STREQ("<TokBEOF>\n", Out.data()); ASSERT_FALSE(minimizeSourceToDependencyDirectives( "#pragma clang module import\n", Out)); @@ -663,7 +665,7 @@ "#error \\\n#include <t.h>\n", }) { ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out)); - EXPECT_STREQ("", Out.data()); + EXPECT_STREQ("<TokBEOF>\n", Out.data()); } for (auto Source : { @@ -767,7 +769,8 @@ )"; ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out)); EXPECT_STREQ( - "#if NEVER_ENABLED\n#define why(fmt,...) #error don't try me\n#endif\n", + "#if NEVER_ENABLED\n#define why(fmt,...) #error don't try me\n#endif\n" + "<TokBEOF>\n", Out.data()); Source = R"(#if NEVER_ENABLED @@ -778,7 +781,8 @@ )"; ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out)); EXPECT_STREQ( - "#if NEVER_ENABLED\n#define why(fmt,...) \"quote dropped\n#endif\n", + "#if NEVER_ENABLED\n#define why(fmt,...) \"quote dropped\n#endif\n" + "<TokBEOF>\n", Out.data()); } @@ -799,11 +803,11 @@ StringRef Source = "#define X '\\ \t\nx'\nvoid foo() {}"; ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out)); - EXPECT_STREQ("#define X '\\ \t\nx'\n", Out.data()); + EXPECT_STREQ("#define X '\\ \t\nx'\n<TokBEOF>\n", Out.data()); Source = "#define X \"\\ \r\nx\"\nvoid foo() {}"; ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out)); - EXPECT_STREQ("#define X \"\\ \r\nx\"\n", Out.data()); + EXPECT_STREQ("#define X \"\\ \r\nx\"\n<TokBEOF>\n", Out.data()); Source = "#define X \"\\ \r\nx\n#include <x>\n"; ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out)); @@ -848,11 +852,56 @@ "exp\\\nort import:l[[rename]];" "import<<=3;import a b d e d e f e;" "import foo[[no_unique_address]];import foo();" - "import f(:sefse);import f(->a=3);\n", + "import f(:sefse);import f(->a=3);" + "<TokBEOF>\n", Out.data()); - ASSERT_EQ(Directives.size(), 10u); + ASSERT_EQ(Directives.size(), 11u); EXPECT_EQ(Directives[0].Kind, pp_include); EXPECT_EQ(Directives[1].Kind, cxx_export_module_decl); } +TEST(MinimizeSourceToDependencyDirectivesTest, TokensBeforeEOF) { + SmallString<128> Out; + + StringRef Source = R"( + #define A + #ifdef B + int x; + #endif + )"; + ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out)); + EXPECT_STREQ("#define A\n<TokBEOF>\n", Out.data()); + + Source = R"( + #ifndef A + #define A + #endif // some comment + + // other comment + )"; + ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out)); + EXPECT_STREQ("#ifndef A\n#define A\n#endif\n", Out.data()); + + Source = R"( + #ifndef A + #define A + #endif /* some comment + + */ + )"; + ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out)); + EXPECT_STREQ("#ifndef A\n#define A\n#endif\n", Out.data()); + + Source = R"( + #ifndef A + #define A + #endif /* some comment + + */ + int x; + )"; + ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out)); + EXPECT_STREQ("#ifndef A\n#define A\n#endif\n<TokBEOF>\n", Out.data()); +} + } // end anonymous namespace Index: clang/unittests/Lex/CMakeLists.txt =================================================================== --- clang/unittests/Lex/CMakeLists.txt +++ clang/unittests/Lex/CMakeLists.txt @@ -9,6 +9,7 @@ LexerTest.cpp PPCallbacksTest.cpp PPConditionalDirectiveRecordTest.cpp + PPDependencyDirectivesTest.cpp PPMemoryAllocationsTest.cpp ) @@ -19,4 +20,6 @@ clangLex clangParse clangSema + + LLVMTestingSupport ) Index: clang/lib/Lex/Lexer.cpp =================================================================== --- clang/lib/Lex/Lexer.cpp +++ clang/lib/Lex/Lexer.cpp @@ -4323,6 +4323,8 @@ while (NextDepDirectiveTokenIndex == DepDirectives.front().Tokens.size()) { if (DepDirectives.front().Kind == pp_eof) return LexEndOfFile(Result, BufferEnd); + if (DepDirectives.front().Kind == tokens_present_before_eof) + MIOpt.ReadToken(); NextDepDirectiveTokenIndex = 0; DepDirectives = DepDirectives.drop_front(); } @@ -4398,6 +4400,7 @@ case cxx_import_decl: case cxx_export_module_decl: case cxx_export_import_decl: + case tokens_present_before_eof: break; case pp_if: case pp_ifdef: Index: clang/lib/Lex/DependencyDirectivesScanner.cpp =================================================================== --- clang/lib/Lex/DependencyDirectivesScanner.cpp +++ clang/lib/Lex/DependencyDirectivesScanner.cpp @@ -87,6 +87,9 @@ dependency_directives_scan::Token &lexIncludeFilename(const char *&First, const char *const End); + void skipLine(const char *&First, const char *const End); + void skipDirective(StringRef Name, const char *&First, const char *const End); + /// Lexes next token and if it is identifier returns its string, otherwise /// it skips the current line and returns \p None. /// @@ -150,6 +153,7 @@ DiagnosticsEngine *Diags; SourceLocation InputSourceLoc; + const char *LastTokenPtr = nullptr; /// Keeps track of the tokens for the currently lexed directive. Once a /// directive is fully lexed and "committed" then the tokens get appended to /// \p Tokens and \p CurDirToks is cleared for the next directive. @@ -364,7 +368,7 @@ return (Cur + 1) < End && isAsciiIdentifierContinue(*(Cur + 1)); } -static void skipLine(const char *&First, const char *const End) { +void Scanner::skipLine(const char *&First, const char *const End) { for (;;) { assert(First <= End); if (First == End) @@ -379,6 +383,7 @@ // Iterate over strings correctly to avoid comments and newlines. if (*First == '"' || (*First == '\'' && !isQuoteCppDigitSeparator(Start, First, End))) { + LastTokenPtr = First; if (isRawStringLiteral(Start, First)) skipRawString(First, End); else @@ -388,6 +393,7 @@ // Iterate over comments correctly. if (*First != '/' || End - First < 2) { + LastTokenPtr = First; ++First; continue; } @@ -399,6 +405,7 @@ } if (First[1] != '*') { + LastTokenPtr = First; ++First; continue; } @@ -416,8 +423,8 @@ } } -static void skipDirective(StringRef Name, const char *&First, - const char *const End) { +void Scanner::skipDirective(StringRef Name, const char *&First, + const char *const End) { if (llvm::StringSwitch<bool>(Name) .Case("warning", true) .Case("error", true) @@ -710,6 +717,8 @@ return false; } + LastTokenPtr = First; + TheLexer.seek(getOffsetAt(First), /*IsAtStartOfLine*/ true); auto ScEx1 = make_scope_exit([&]() { @@ -803,6 +812,9 @@ if (!Error) { // Add an EOF on success. + if (LastTokenPtr && + (Tokens.empty() || LastTokenPtr > Input.begin() + Tokens.back().Offset)) + pushDirective(tokens_present_before_eof); pushDirective(pp_eof); } @@ -828,7 +840,7 @@ void clang::printDependencyDirectivesAsSource( StringRef Source, ArrayRef<dependency_directives_scan::Directive> Directives, - llvm::raw_ostream &OS) { + llvm::raw_ostream &OS, bool PrintMarkerForTokensBeforeEOF) { // Add a space separator where it is convenient for testing purposes. auto needsSpaceSeparator = [](tok::TokenKind Prev, @@ -851,6 +863,9 @@ }; for (const dependency_directives_scan::Directive &Directive : Directives) { + if (Directive.Kind == tokens_present_before_eof && + PrintMarkerForTokensBeforeEOF) + OS << "<TokBEOF>"; Optional<tok::TokenKind> PrevTokenKind; for (const dependency_directives_scan::Token &Tok : Directive.Tokens) { if (PrevTokenKind && needsSpaceSeparator(*PrevTokenKind, Tok)) Index: clang/include/clang/Lex/DependencyDirectivesScanner.h =================================================================== --- clang/include/clang/Lex/DependencyDirectivesScanner.h +++ clang/include/clang/Lex/DependencyDirectivesScanner.h @@ -82,6 +82,9 @@ cxx_import_decl, cxx_export_module_decl, cxx_export_import_decl, + /// Indicates that there are tokens present between the last scanned directive + /// and eof. The \p Directive::Tokens array will be empty for this kind. + tokens_present_before_eof, pp_eof, }; @@ -123,13 +126,16 @@ /// \param Directives The previously scanned dependency /// directives. /// \param OS the stream to print the dependency directives on. +/// \param PrintMarkerForTokensBeforeEOF if true also prints +/// \p dependency_directives_scan::tokens_present_before_eof, otherwise this +/// directive will be ignored. /// /// This is used primarily for testing purposes, during dependency scanning the /// \p Lexer uses the tokens directly, not their printed version. void printDependencyDirectivesAsSource( StringRef Source, ArrayRef<dependency_directives_scan::Directive> Directives, - llvm::raw_ostream &OS); + llvm::raw_ostream &OS, bool PrintMarkerForTokensBeforeEOF = false); } // end namespace clang
_______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
