https://github.com/MythreyaK created https://github.com/llvm/llvm-project/pull/151624
Currently a draft PR. Change to insert headers only in the global module fragment after skipping comments and any preprocessor directives. >From 1c1f7cca259e4da191e4ce6113a955e891c71692 Mon Sep 17 00:00:00 2001 From: Mythreya Kuricheti <g...@mythreya.dev> Date: Thu, 31 Jul 2025 19:14:22 -0700 Subject: [PATCH] [libtooling] Insert headers in global module fragment --- .../lib/Tooling/Inclusions/HeaderIncludes.cpp | 129 +++++++++++++----- .../unittests/Tooling/HeaderIncludesTest.cpp | 59 ++++++++ 2 files changed, 153 insertions(+), 35 deletions(-) diff --git a/clang/lib/Tooling/Inclusions/HeaderIncludes.cpp b/clang/lib/Tooling/Inclusions/HeaderIncludes.cpp index 2b5a293b35841..1e8ec5a88a0ff 100644 --- a/clang/lib/Tooling/Inclusions/HeaderIncludes.cpp +++ b/clang/lib/Tooling/Inclusions/HeaderIncludes.cpp @@ -22,6 +22,8 @@ LangOptions createLangOpts() { LangOpts.CPlusPlus = 1; LangOpts.CPlusPlus11 = 1; LangOpts.CPlusPlus14 = 1; + LangOpts.CPlusPlus20 = 1; + LangOpts.CPlusPlusModules = 1; LangOpts.LineComment = 1; LangOpts.CXXOperatorNames = 1; LangOpts.Bool = 1; @@ -68,53 +70,110 @@ bool checkAndConsumeDirectiveWithName( return Matched; } +bool checkAndConsumeDirective(Lexer &Lex, StringRef Name, Token &Tok) { + bool Matched = Tok.is(tok::hash) && !Lex.LexFromRawLexer(Tok) && + Tok.is(tok::raw_identifier) && !Lex.LexFromRawLexer(Tok); + return Matched; +} + +// Check and consume "module;" directive +bool checkAndConsumeModuleDecl(Lexer &Lex, Token &Tok) { + bool Matched = Tok.is(tok::raw_identifier) && Tok.getRawIdentifier() == "module" + && !Lex.LexFromRawLexer(Tok) + && Tok.is(tok::semi) && Lex.LexFromRawLexer(Tok); + return Matched; +} + void skipComments(Lexer &Lex, Token &Tok) { while (Tok.is(tok::comment)) if (Lex.LexFromRawLexer(Tok)) return; } +// skip to the global module fragment and skip preprocessor ifdefs +unsigned skipToGlobalModuleFragmentAfterPP(StringRef FileName, + StringRef Code, + const IncludeStyle &Style) { + return getOffsetAfterTokenSequence(FileName, Code, Style, + [](const SourceManager& SM, Lexer& Lex, Token& Tok) -> unsigned { + // skip all comments at start of file + skipComments(Lex, Tok); + + // skip module fragment + // TODO: handle pragma once + checkAndConsumeModuleDecl(Lex, Tok); + + while(checkAndConsumeDirectiveWithName(Lex, "ifndef", Tok)) { + skipComments(Lex, Tok); + if (checkAndConsumeDirectiveWithName(Lex, "define", Tok)) { + skipComments(Lex, Tok); + checkAndConsumeDirective(Lex, "endif", Tok); + } + skipComments(Lex, Tok); + } + + return SM.getFileOffset(Tok.getLocation()); + }); +} + // Returns the offset after header guard directives and any comments // before/after header guards (e.g. #ifndef/#define pair, #pragma once). If no // header guard is present in the code, this will return the offset after -// skipping all comments from the start of the code. -unsigned getOffsetAfterHeaderGuardsAndComments(StringRef FileName, +// skipping all comments from the start of the code. Also ensures that the +// offset returned is always in the global module fragment, if modules are in use. +unsigned getMinHeaderInsertOffset(StringRef FileName, StringRef Code, const IncludeStyle &Style) { // \p Consume returns location after header guard or 0 if no header guard is // found. - auto ConsumeHeaderGuardAndComment = - [&](std::function<unsigned(const SourceManager &SM, Lexer &Lex, - Token Tok)> - Consume) { - return getOffsetAfterTokenSequence( - FileName, Code, Style, - [&Consume](const SourceManager &SM, Lexer &Lex, Token Tok) { - skipComments(Lex, Tok); - unsigned InitialOffset = SM.getFileOffset(Tok.getLocation()); - return std::max(InitialOffset, Consume(SM, Lex, Tok)); - }); - }; - return std::max( - // #ifndef/#define - ConsumeHeaderGuardAndComment( - [](const SourceManager &SM, Lexer &Lex, Token Tok) -> unsigned { - if (checkAndConsumeDirectiveWithName(Lex, "ifndef", Tok)) { - skipComments(Lex, Tok); - if (checkAndConsumeDirectiveWithName(Lex, "define", Tok) && - Tok.isAtStartOfLine()) - return SM.getFileOffset(Tok.getLocation()); - } - return 0; - }), - // #pragma once - ConsumeHeaderGuardAndComment( - [](const SourceManager &SM, Lexer &Lex, Token Tok) -> unsigned { - if (checkAndConsumeDirectiveWithName(Lex, "pragma", Tok, - StringRef("once"))) - return SM.getFileOffset(Tok.getLocation()); - return 0; - })); + // auto ConsumeHeaderGuardAndComment = + // [&](std::function<unsigned(const SourceManager &SM, Lexer &Lex, + // Token Tok)> + // Consume) { + // return getOffsetAfterTokenSequence( + // FileName, Code, Style, + // [&Consume](const SourceManager &SM, Lexer &Lex, Token Tok) { + // skipComments(Lex, Tok); + // unsigned InitialOffset = SM.getFileOffset(Tok.getLocation()); + // return std::max(InitialOffset, Consume(SM, Lex, Tok)); + // }); + // }; + + // auto ModuleDecl = ConsumeHeaderGuardAndComment( + // [](const SourceManager &SM, Lexer &Lex, Token Tok) -> unsigned { + // skipComments(Lex, Tok); + + // // fprintf(stderr, "before kw_module %d\n", SM.getFileOffset(Tok.getLocation())); + // if (checkAndConsumeModuleDecl(Lex, Tok)) { + // // fprintf(stderr, "after kw_module %d\n", SM.getFileOffset(Tok.getLocation())); + // return SM.getFileOffset(Tok.getLocation()); + // } + // return 0; + // }); + + return skipToGlobalModuleFragmentAfterPP(FileName, Code, Style); + + // auto PreprocessorOffset = std::max( + // // #ifndef/#define + // ConsumeHeaderGuardAndComment( + // [](const SourceManager &SM, Lexer &Lex, Token Tok) -> unsigned { + // if (checkAndConsumeDirectiveWithName(Lex, "ifndef", Tok)) { + // skipComments(Lex, Tok); + // if (checkAndConsumeDirectiveWithName(Lex, "define", Tok) && + // Tok.isAtStartOfLine()) + // return SM.getFileOffset(Tok.getLocation()); + // } + // return 0; + // }), + // // #pragma once + // ConsumeHeaderGuardAndComment( + // [](const SourceManager &SM, Lexer &Lex, Token Tok) -> unsigned { + // if (checkAndConsumeDirectiveWithName(Lex, "pragma", Tok, + // StringRef("once"))) + // return SM.getFileOffset(Tok.getLocation()); + // return 0; + // })); + // return std::max(ModuleDecl, PreprocessorOffset); } // Check if a sequence of tokens is like @@ -281,7 +340,7 @@ HeaderIncludes::HeaderIncludes(StringRef FileName, StringRef Code, const IncludeStyle &Style) : FileName(FileName), Code(Code), FirstIncludeOffset(-1), MinInsertOffset( - getOffsetAfterHeaderGuardsAndComments(FileName, Code, Style)), + getMinHeaderInsertOffset(FileName, Code, Style)), MaxInsertOffset(MinInsertOffset + getMaxHeaderInsertionOffset( FileName, Code.drop_front(MinInsertOffset), Style)), diff --git a/clang/unittests/Tooling/HeaderIncludesTest.cpp b/clang/unittests/Tooling/HeaderIncludesTest.cpp index 929156a11d0d9..c0f7f0f958898 100644 --- a/clang/unittests/Tooling/HeaderIncludesTest.cpp +++ b/clang/unittests/Tooling/HeaderIncludesTest.cpp @@ -594,6 +594,65 @@ TEST_F(HeaderIncludesTest, CanDeleteAfterCode) { EXPECT_EQ(Expected, remove(Code, "\"b.h\"")); } +TEST_F(HeaderIncludesTest, InsertInGlobalModuleFragment) { + // Ensure header insertions go only in the global module fragment + std::string Code = R"cpp(// comments + +// more comments + +module; +export module foo; + +int main() { + std::vector<int> ints {}; +})cpp"; + std::string Expected = R"cpp(module; +#include <vector> +export module foo; + +int main() { + std::vector<int> ints {}; +})cpp"; + + auto InsertedCode = insert(Code, "<vector>"); + fprintf(stderr, "[[\n%s\n]]\n", InsertedCode.c_str()); +} + +TEST_F(HeaderIncludesTest, InsertInGlobalModuleFragmentWithPP) { + // Ensure header insertions go only in the global module fragment + std::string Code = R"cpp(// comments + +// more comments + +// some more comments + +#ifndef MACRO_NAME +#define MACRO_NAME +#endif + +// comment + +#ifndef MACRO_NAME +#define MACRO_NAME +#endif + +// more comment + +int main() { + std::vector<int> ints {}; +})cpp"; + std::string Expected = R"cpp(module; +#include <vector> +export module foo; + +int main() { + std::vector<int> ints {}; +})cpp"; + + auto InsertedCode = insert(Code, "<vector>"); + fprintf(stderr, "[[\n%s\n]]\n", InsertedCode.c_str()); +} + } // namespace } // namespace tooling } // namespace clang _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits