https://github.com/kparzysz updated https://github.com/llvm/llvm-project/pull/87627
>From f4917dcf99664442d262226cd1ce1058646d7a55 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek <krzysztof.parzys...@amd.com> Date: Wed, 3 Apr 2024 17:09:24 -0500 Subject: [PATCH 1/6] [flang][Frontend] Implement printing defined macros via -dM This should work the same way as in clang. --- clang/include/clang/Driver/Options.td | 2 +- clang/lib/Driver/ToolChains/Flang.cpp | 5 +- .../flang/Frontend/PreprocessorOptions.h | 3 + flang/include/flang/Parser/parsing.h | 5 + flang/lib/Frontend/CompilerInvocation.cpp | 1 + flang/lib/Frontend/FrontendActions.cpp | 4 +- flang/lib/Parser/parsing.cpp | 18 ++- flang/lib/Parser/preprocessor.cpp | 128 ++++++++++++++++++ flang/lib/Parser/preprocessor.h | 6 + flang/test/Driver/driver-help-hidden.f90 | 1 + flang/test/Driver/driver-help.f90 | 2 + flang/test/Preprocessing/show-macros1.F90 | 14 ++ flang/test/Preprocessing/show-macros2.F90 | 6 + flang/test/Preprocessing/show-macros3.F90 | 10 ++ 14 files changed, 196 insertions(+), 9 deletions(-) create mode 100644 flang/test/Preprocessing/show-macros1.F90 create mode 100644 flang/test/Preprocessing/show-macros2.F90 create mode 100644 flang/test/Preprocessing/show-macros3.F90 diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index c3e90a70925b78..b1ed29cb1cbc44 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1446,7 +1446,7 @@ def dD : Flag<["-"], "dD">, Group<d_Group>, Visibility<[ClangOption, CC1Option]> def dI : Flag<["-"], "dI">, Group<d_Group>, Visibility<[ClangOption, CC1Option]>, HelpText<"Print include directives in -E mode in addition to normal output">, MarshallingInfoFlag<PreprocessorOutputOpts<"ShowIncludeDirectives">>; -def dM : Flag<["-"], "dM">, Group<d_Group>, Visibility<[ClangOption, CC1Option]>, +def dM : Flag<["-"], "dM">, Group<d_Group>, Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>, HelpText<"Print macro definitions in -E mode instead of normal output">; def dead__strip : Flag<["-"], "dead_strip">; def dependency_file : Separate<["-"], "dependency-file">, diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp index 70daa699e3a949..bfd07addfca811 100644 --- a/clang/lib/Driver/ToolChains/Flang.cpp +++ b/clang/lib/Driver/ToolChains/Flang.cpp @@ -688,7 +688,10 @@ void Flang::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back(Args.MakeArgString(TripleStr)); if (isa<PreprocessJobAction>(JA)) { - CmdArgs.push_back("-E"); + CmdArgs.push_back("-E"); + if (Args.getLastArg(options::OPT_dM)) { + CmdArgs.push_back("-dM"); + } } else if (isa<CompileJobAction>(JA) || isa<BackendJobAction>(JA)) { if (JA.getType() == types::TY_Nothing) { CmdArgs.push_back("-fsyntax-only"); diff --git a/flang/include/flang/Frontend/PreprocessorOptions.h b/flang/include/flang/Frontend/PreprocessorOptions.h index b2e9ac0e963b73..13a91ee9a184f8 100644 --- a/flang/include/flang/Frontend/PreprocessorOptions.h +++ b/flang/include/flang/Frontend/PreprocessorOptions.h @@ -56,6 +56,9 @@ struct PreprocessorOptions { // -fno-reformat: Emit cooked character stream as -E output bool noReformat{false}; + // -dM: Show macro definitions with -dM -E + bool showMacros{false}; + void addMacroDef(llvm::StringRef name) { macros.emplace_back(std::string(name), false); } diff --git a/flang/include/flang/Parser/parsing.h b/flang/include/flang/Parser/parsing.h index e80d8f724ac8f4..14891c44dacafd 100644 --- a/flang/include/flang/Parser/parsing.h +++ b/flang/include/flang/Parser/parsing.h @@ -16,6 +16,7 @@ #include "provenance.h" #include "flang/Common/Fortran-features.h" #include "llvm/Support/raw_ostream.h" +#include <memory> #include <optional> #include <string> #include <utility> @@ -23,6 +24,8 @@ namespace Fortran::parser { +class Preprocessor; + struct Options { Options() {} @@ -59,6 +62,7 @@ class Parsing { const SourceFile *Prescan(const std::string &path, Options); void EmitPreprocessedSource( llvm::raw_ostream &, bool lineDirectives = true) const; + void EmitPreprocessorMacros(llvm::raw_ostream &) const; void DumpCookedChars(llvm::raw_ostream &) const; void DumpProvenance(llvm::raw_ostream &) const; void DumpParsingLog(llvm::raw_ostream &) const; @@ -83,6 +87,7 @@ class Parsing { const char *finalRestingPlace_{nullptr}; std::optional<Program> parseTree_; ParsingLog log_; + std::unique_ptr<Preprocessor> preprocessor_; }; } // namespace Fortran::parser #endif // FORTRAN_PARSER_PARSING_H_ diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp index c830c7af2462c9..8ce6ab7baf4812 100644 --- a/flang/lib/Frontend/CompilerInvocation.cpp +++ b/flang/lib/Frontend/CompilerInvocation.cpp @@ -772,6 +772,7 @@ static void parsePreprocessorArgs(Fortran::frontend::PreprocessorOptions &opts, opts.noReformat = args.hasArg(clang::driver::options::OPT_fno_reformat); opts.noLineDirectives = args.hasArg(clang::driver::options::OPT_P); + opts.showMacros = args.hasArg(clang::driver::options::OPT_dM); } /// Parses all semantic related arguments and populates the variables diff --git a/flang/lib/Frontend/FrontendActions.cpp b/flang/lib/Frontend/FrontendActions.cpp index 849b3c8e4dc027..8f251997ed401b 100644 --- a/flang/lib/Frontend/FrontendActions.cpp +++ b/flang/lib/Frontend/FrontendActions.cpp @@ -399,7 +399,9 @@ void PrintPreprocessedAction::executeAction() { // Format or dump the prescanner's output CompilerInstance &ci = this->getInstance(); - if (ci.getInvocation().getPreprocessorOpts().noReformat) { + if (ci.getInvocation().getPreprocessorOpts().showMacros) { + ci.getParsing().EmitPreprocessorMacros(outForPP); + } else if (ci.getInvocation().getPreprocessorOpts().noReformat) { ci.getParsing().DumpCookedChars(outForPP); } else { ci.getParsing().EmitPreprocessedSource( diff --git a/flang/lib/Parser/parsing.cpp b/flang/lib/Parser/parsing.cpp index a55d33bf6b91d6..ec008be1fcea9d 100644 --- a/flang/lib/Parser/parsing.cpp +++ b/flang/lib/Parser/parsing.cpp @@ -60,20 +60,20 @@ const SourceFile *Parsing::Prescan(const std::string &path, Options options) { } } - Preprocessor preprocessor{allSources}; + preprocessor_ = std::make_unique<Preprocessor>(allSources); if (!options.predefinitions.empty()) { - preprocessor.DefineStandardMacros(); + preprocessor_->DefineStandardMacros(); for (const auto &predef : options.predefinitions) { if (predef.second) { - preprocessor.Define(predef.first, *predef.second); + preprocessor_->Define(predef.first, *predef.second); } else { - preprocessor.Undefine(predef.first); + preprocessor_->Undefine(predef.first); } } } currentCooked_ = &allCooked_.NewCookedSource(); Prescanner prescanner{ - messages_, *currentCooked_, preprocessor, options.features}; + messages_, *currentCooked_, *preprocessor_, options.features}; prescanner.set_fixedForm(options.isFixedForm) .set_fixedFormColumnLimit(options.fixedFormColumns) .AddCompilerDirectiveSentinel("dir$"); @@ -87,7 +87,7 @@ const SourceFile *Parsing::Prescan(const std::string &path, Options options) { if (options.features.IsEnabled(LanguageFeature::CUDA)) { prescanner.AddCompilerDirectiveSentinel("$cuf"); prescanner.AddCompilerDirectiveSentinel("@cuf"); - preprocessor.Define("_CUDA", "1"); + preprocessor_->Define("_CUDA", "1"); } ProvenanceRange range{allSources.AddIncludedFile( *sourceFile, ProvenanceRange{}, options.isModuleFile)}; @@ -107,6 +107,12 @@ const SourceFile *Parsing::Prescan(const std::string &path, Options options) { return sourceFile; } +void Parsing::EmitPreprocessorMacros(llvm::raw_ostream &out) const { + if (preprocessor_) { + preprocessor_->PrintMacros(out); + } +} + void Parsing::EmitPreprocessedSource( llvm::raw_ostream &out, bool lineDirectives) const { const std::string *sourcePath{nullptr}; diff --git a/flang/lib/Parser/preprocessor.cpp b/flang/lib/Parser/preprocessor.cpp index 515b8f62daf9ad..37e357e91d6f45 100644 --- a/flang/lib/Parser/preprocessor.cpp +++ b/flang/lib/Parser/preprocessor.cpp @@ -11,6 +11,9 @@ #include "flang/Common/idioms.h" #include "flang/Parser/characters.h" #include "flang/Parser/message.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Support/raw_ostream.h" #include <algorithm> #include <cinttypes> @@ -46,6 +49,110 @@ bool Definition::set_isDisabled(bool disable) { return was; } +void Definition::Print( + llvm::raw_ostream &out, llvm::StringRef macroName) const { + if (isDisabled_) { + return; + } + if (!isFunctionLike_) { + // If it's not a function-like macro, then just print the replacement. + out << ' ' << replacement_.ToString(); + return; + } + + // The sequence of characters from which argument names will be created. + static llvm::StringRef charSeq{"ABCDEFGHIJKLMNOPQRSTUVWXYZ"}; + + auto couldCollide = [&](llvm::StringRef str) { + return !str.empty() && llvm::all_of(str, [&](char c) { + return charSeq.find(c) != llvm::StringRef::npos; + }); + }; + + // For function-like macros we need to invent valid argument names (they + // are represented as ~A, ~B, ...). These invented names cannot collide + // with any other tokens in the macro definitions. + llvm::SmallSet<std::string, 10> usedNames; + for (size_t i{0}, e{replacement_.SizeInTokens()}; i != e; ++i) { + std::string tok{replacement_.TokenAt(i).ToString()}; + if (tok.empty()) { + continue; + } + // The generated names will only use characters from `charSeq`, so + // collect names that could collide, and ignore others. + if (couldCollide(tok)) { + usedNames.insert(tok); + } + } + if (couldCollide(macroName)) { + usedNames.insert(macroName.str()); + } + + // Given a string that is either empty, or composed from characters + // from `charSeq`, create the next string in the lexicographical + // order. + auto getNextString = [&](llvm::StringRef str) { + if (str.empty()) { + return charSeq.take_front().str(); + } + if (str.back() == charSeq.back()) { + return (llvm::Twine(str) + charSeq.take_front()).str(); + } + size_t idx{charSeq.find(str.back())}; + return (llvm::Twine(str.drop_back()) + charSeq.substr(idx + 1, 1)).str(); + }; + + // Generate consecutive arg names, until we get one that works + // (i.e. doesn't collide with existing names). Give up after 4096 + // attempts. + auto genArgName = [&](std::string name) { + for (size_t x{0}; x != 4096; ++x) { + name = getNextString(name); + if (!usedNames.contains(name)) + return name; + } + return std::string(); + }; + + std::string nextName; + llvm::SmallVector<std::string> argNames; + for (size_t i{0}; i != argumentCount_; ++i) { + nextName = genArgName(nextName); + if (nextName.empty()) { + out << " // unable to print"; + return; + } + argNames.push_back(nextName); + } + + // Finally, print the macro. + out << '('; + for (size_t i{0}; i != argumentCount_; ++i) { + if (i != 0) { + out << ", "; + } + out << argNames[i]; + } + if (isVariadic_) { + out << ", ..."; + } + out << ") "; + + for (size_t i{0}, e{replacement_.SizeInTokens()}; i != e; ++i) { + std::string tok{replacement_.TokenAt(i).ToString()}; + if (tok.size() >= 2 && tok[0] == '~') { + // This should be an argument name. The `Tokenize` function only + // generates a single character. + size_t idx{static_cast<size_t>(tok[1] - 'A')}; + if (idx < argumentCount_) { + out << argNames[idx]; + continue; + } + } + out << tok; + } +} + static bool IsLegalIdentifierStart(const CharBlock &cpl) { return cpl.size() > 0 && IsLegalIdentifierStart(cpl[0]); } @@ -713,6 +820,27 @@ void Preprocessor::Directive(const TokenSequence &dir, Prescanner &prescanner) { } } +void Preprocessor::PrintMacros(llvm::raw_ostream &out) const { + // Sort the entries by macro name. + llvm::SmallVector<decltype(definitions_)::const_iterator> entries; + for (auto it{definitions_.begin()}, e{definitions_.end()}; it != e; ++it) { + entries.push_back(it); + } + llvm::sort(entries, [](const auto it1, const auto it2) { + return it1->first.ToString() < it2->first.ToString(); + }); + + for (auto &&it : entries) { + const auto &[name, def]{*it}; + if (def.isDisabled()) { + continue; + } + out << "#define " << name; + def.Print(out, name.ToString()); + out << '\n'; + } +} + CharBlock Preprocessor::SaveTokenAsName(const CharBlock &t) { names_.push_back(t.ToString()); return {names_.back().data(), names_.back().size()}; diff --git a/flang/lib/Parser/preprocessor.h b/flang/lib/Parser/preprocessor.h index b61f1577727beb..b4177766f81c4b 100644 --- a/flang/lib/Parser/preprocessor.h +++ b/flang/lib/Parser/preprocessor.h @@ -18,6 +18,8 @@ #include "token-sequence.h" #include "flang/Parser/char-block.h" #include "flang/Parser/provenance.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/raw_ostream.h" #include <cstddef> #include <list> #include <stack> @@ -49,6 +51,8 @@ class Definition { TokenSequence Apply(const std::vector<TokenSequence> &args, Prescanner &); + void Print(llvm::raw_ostream &out, llvm::StringRef macroName = "") const; + private: static TokenSequence Tokenize(const std::vector<std::string> &argNames, const TokenSequence &token, std::size_t firstToken, std::size_t tokens); @@ -89,6 +93,8 @@ class Preprocessor { // Implements a preprocessor directive. void Directive(const TokenSequence &, Prescanner &); + void PrintMacros(llvm::raw_ostream &out) const; + private: enum class IsElseActive { No, Yes }; enum class CanDeadElseAppear { No, Yes }; diff --git a/flang/test/Driver/driver-help-hidden.f90 b/flang/test/Driver/driver-help-hidden.f90 index bf3660d57cbb4f..fd2b0e41e38c56 100644 --- a/flang/test/Driver/driver-help-hidden.f90 +++ b/flang/test/Driver/driver-help-hidden.f90 @@ -21,6 +21,7 @@ ! CHECK-NEXT: -ccc-print-phases Dump list of actions to perform ! CHECK-NEXT: -cpp Enable predefined and command line preprocessor macros ! CHECK-NEXT: -c Only run preprocess, compile, and assemble steps +! CHECK-NEXT: -dM Print macro definitions in -E mode instead of normal output ! CHECK-NEXT: -dumpmachine Display the compiler's target processor ! CHECK-NEXT: -dumpversion Display the version of the compiler ! CHECK-NEXT: -D <macro>=<value> Define <macro> to <value> (or 1 if <value> omitted) diff --git a/flang/test/Driver/driver-help.f90 b/flang/test/Driver/driver-help.f90 index b4280a454e3128..368cab97d8547f 100644 --- a/flang/test/Driver/driver-help.f90 +++ b/flang/test/Driver/driver-help.f90 @@ -17,6 +17,7 @@ ! HELP-NEXT: -### Print (but do not run) the commands to run for this compilation ! HELP-NEXT: -cpp Enable predefined and command line preprocessor macros ! HELP-NEXT: -c Only run preprocess, compile, and assemble steps +! HELP-NEXT: -dM Print macro definitions in -E mode instead of normal output ! HELP-NEXT: -dumpmachine Display the compiler's target processor ! HELP-NEXT: -dumpversion Display the version of the compiler ! HELP-NEXT: -D <macro>=<value> Define <macro> to <value> (or 1 if <value> omitted) @@ -152,6 +153,7 @@ ! HELP-FC1-NEXT:OPTIONS: ! HELP-FC1-NEXT: -cpp Enable predefined and command line preprocessor macros ! HELP-FC1-NEXT: --dependent-lib=<value> Add dependent library +! HELP-FC1-NEXT: -dM Print macro definitions in -E mode instead of normal output ! HELP-FC1-NEXT: -D <macro>=<value> Define <macro> to <value> (or 1 if <value> omitted) ! HELP-FC1-NEXT: -emit-fir Build the parse tree, then lower it to FIR ! HELP-FC1-NEXT: -emit-hlfir Build the parse tree, then lower it to HLFIR diff --git a/flang/test/Preprocessing/show-macros1.F90 b/flang/test/Preprocessing/show-macros1.F90 new file mode 100644 index 00000000000000..8e3d59a7849f70 --- /dev/null +++ b/flang/test/Preprocessing/show-macros1.F90 @@ -0,0 +1,14 @@ +! RUN: %flang -dM -E -o - %s | FileCheck %s + +! Check the default macros. Omit certain ones such as __LINE__ +! or __FILE__, or target-specific ones, like __x86_64__. + +! Macros are printed in the alphabetical order. + +! CHECK: #define __DATE__ +! CHECK: #define __TIME__ +! CHECK: #define __flang__ +! CHECK: #define __flang_major__ +! CHECK: #define __flang_minor__ +! CHECK: #define __flang_patchlevel__ + diff --git a/flang/test/Preprocessing/show-macros2.F90 b/flang/test/Preprocessing/show-macros2.F90 new file mode 100644 index 00000000000000..baf52ba8161f11 --- /dev/null +++ b/flang/test/Preprocessing/show-macros2.F90 @@ -0,0 +1,6 @@ +! RUN: %flang -DFOO -DBAR=FOO -dM -E -o - %s | FileCheck %s + +! Check command line definitions + +! CHECK: #define BAR FOO +! CHECK: #define FOO 1 diff --git a/flang/test/Preprocessing/show-macros3.F90 b/flang/test/Preprocessing/show-macros3.F90 new file mode 100644 index 00000000000000..4b07fcf2f505db --- /dev/null +++ b/flang/test/Preprocessing/show-macros3.F90 @@ -0,0 +1,10 @@ +! RUN: %flang -dM -E -o - %s | FileCheck %s + +! Variadic macro +#define FOO1(X, Y, ...) bar(bar(X, Y), __VA_ARGS__) +! CHECK: #define FOO1(A, B, ...) bar(bar(A, B), __VA_ARGS__) + +! Macro parameter names are synthesized, starting from 'A', B', etc. +! Make sure the generated names do not collide with existing identifiers. +#define FOO2(X, Y) (A + X + C + Y) +! CHECK: #define FOO2(B, D) (A + B + C + D) >From 7bab3bea49cdecddf2df2f8c5270b4cc274ab60f Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek <krzysztof.parzys...@amd.com> Date: Thu, 4 Apr 2024 11:53:13 -0500 Subject: [PATCH 2/6] Address review comments --- flang/lib/Parser/preprocessor.cpp | 65 ++++++++++++++----------------- 1 file changed, 30 insertions(+), 35 deletions(-) diff --git a/flang/lib/Parser/preprocessor.cpp b/flang/lib/Parser/preprocessor.cpp index 37e357e91d6f45..03305024d6932b 100644 --- a/flang/lib/Parser/preprocessor.cpp +++ b/flang/lib/Parser/preprocessor.cpp @@ -12,7 +12,6 @@ #include "flang/Parser/characters.h" #include "flang/Parser/message.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/raw_ostream.h" #include <algorithm> @@ -24,6 +23,7 @@ #include <optional> #include <set> #include <utility> +#include <vector> namespace Fortran::parser { @@ -51,9 +51,6 @@ bool Definition::set_isDisabled(bool disable) { void Definition::Print( llvm::raw_ostream &out, llvm::StringRef macroName) const { - if (isDisabled_) { - return; - } if (!isFunctionLike_) { // If it's not a function-like macro, then just print the replacement. out << ' ' << replacement_.ToString(); @@ -63,16 +60,16 @@ void Definition::Print( // The sequence of characters from which argument names will be created. static llvm::StringRef charSeq{"ABCDEFGHIJKLMNOPQRSTUVWXYZ"}; - auto couldCollide = [&](llvm::StringRef str) { + auto couldCollide{[&](llvm::StringRef str) { return !str.empty() && llvm::all_of(str, [&](char c) { return charSeq.find(c) != llvm::StringRef::npos; }); - }; + }}; // For function-like macros we need to invent valid argument names (they // are represented as ~A, ~B, ...). These invented names cannot collide // with any other tokens in the macro definitions. - llvm::SmallSet<std::string, 10> usedNames; + std::set<std::string> usedNames; for (size_t i{0}, e{replacement_.SizeInTokens()}; i != e; ++i) { std::string tok{replacement_.TokenAt(i).ToString()}; if (tok.empty()) { @@ -91,7 +88,7 @@ void Definition::Print( // Given a string that is either empty, or composed from characters // from `charSeq`, create the next string in the lexicographical // order. - auto getNextString = [&](llvm::StringRef str) { + auto getNextString{[&](llvm::StringRef str) { if (str.empty()) { return charSeq.take_front().str(); } @@ -100,22 +97,22 @@ void Definition::Print( } size_t idx{charSeq.find(str.back())}; return (llvm::Twine(str.drop_back()) + charSeq.substr(idx + 1, 1)).str(); - }; + }}; // Generate consecutive arg names, until we get one that works // (i.e. doesn't collide with existing names). Give up after 4096 // attempts. - auto genArgName = [&](std::string name) { + auto genArgName{[&](std::string name) { for (size_t x{0}; x != 4096; ++x) { name = getNextString(name); - if (!usedNames.contains(name)) + if (usedNames.count(name) == 0) return name; } return std::string(); - }; + }}; std::string nextName; - llvm::SmallVector<std::string> argNames; + std::vector<std::string> argNames; for (size_t i{0}; i != argumentCount_; ++i) { nextName = genArgName(nextName); if (nextName.empty()) { @@ -138,18 +135,22 @@ void Definition::Print( } out << ") "; + auto getArgumentIndex{[&](llvm::StringRef name) -> size_t { + if (name.size() >= 2 && name[0] == '~') { + // `name` should be an argument name. The `Tokenize` function only + // generates a single character. + return static_cast<size_t>(name[1] - 'A'); + } + return argumentCount_; + }}; + for (size_t i{0}, e{replacement_.SizeInTokens()}; i != e; ++i) { std::string tok{replacement_.TokenAt(i).ToString()}; - if (tok.size() >= 2 && tok[0] == '~') { - // This should be an argument name. The `Tokenize` function only - // generates a single character. - size_t idx{static_cast<size_t>(tok[1] - 'A')}; - if (idx < argumentCount_) { - out << argNames[idx]; - continue; - } + if (size_t idx = getArgumentIndex(tok); idx < argumentCount_) { + out << argNames[idx]; + } else { + out << tok; } - out << tok; } } @@ -821,22 +822,16 @@ void Preprocessor::Directive(const TokenSequence &dir, Prescanner &prescanner) { } void Preprocessor::PrintMacros(llvm::raw_ostream &out) const { - // Sort the entries by macro name. - llvm::SmallVector<decltype(definitions_)::const_iterator> entries; - for (auto it{definitions_.begin()}, e{definitions_.end()}; it != e; ++it) { - entries.push_back(it); + // std::set is ordered. Use that to print the macros in an + // alphabetical order. + std::set<std::string> macroNames; + for (const auto &[name, _] : definitions_) { + macroNames.insert(name.ToString()); } - llvm::sort(entries, [](const auto it1, const auto it2) { - return it1->first.ToString() < it2->first.ToString(); - }); - for (auto &&it : entries) { - const auto &[name, def]{*it}; - if (def.isDisabled()) { - continue; - } + for (const std::string &name : macroNames) { out << "#define " << name; - def.Print(out, name.ToString()); + definitions_.at(name).Print(out, name); out << '\n'; } } >From b3993e96744fe0ef15461b38316b28f75fc91b01 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek <krzysztof.parzys...@amd.com> Date: Thu, 4 Apr 2024 13:18:51 -0500 Subject: [PATCH 3/6] Store argument names, remove argument name generation --- flang/lib/Parser/preprocessor.cpp | 105 ++++------------------ flang/lib/Parser/preprocessor.h | 8 +- flang/test/Preprocessing/show-macros3.F90 | 9 +- 3 files changed, 29 insertions(+), 93 deletions(-) diff --git a/flang/lib/Parser/preprocessor.cpp b/flang/lib/Parser/preprocessor.cpp index 03305024d6932b..30f5ff4df8f5d1 100644 --- a/flang/lib/Parser/preprocessor.cpp +++ b/flang/lib/Parser/preprocessor.cpp @@ -32,10 +32,9 @@ Definition::Definition( : replacement_{Tokenize({}, repl, firstToken, tokens)} {} Definition::Definition(const std::vector<std::string> &argNames, - const TokenSequence &repl, std::size_t firstToken, std::size_t tokens, - bool isVariadic) - : isFunctionLike_{true}, - argumentCount_(argNames.size()), isVariadic_{isVariadic}, + const TokenSequence &repl, std::size_t firstToken, + std::size_t tokens, bool isVariadic) + : isFunctionLike_{true}, isVariadic_{isVariadic}, argNames_{argNames}, replacement_{Tokenize(argNames, repl, firstToken, tokens)} {} Definition::Definition(const std::string &predefined, AllSources &sources) @@ -57,97 +56,24 @@ void Definition::Print( return; } - // The sequence of characters from which argument names will be created. - static llvm::StringRef charSeq{"ABCDEFGHIJKLMNOPQRSTUVWXYZ"}; + size_t argCount{argumentCount()}; - auto couldCollide{[&](llvm::StringRef str) { - return !str.empty() && llvm::all_of(str, [&](char c) { - return charSeq.find(c) != llvm::StringRef::npos; - }); - }}; - - // For function-like macros we need to invent valid argument names (they - // are represented as ~A, ~B, ...). These invented names cannot collide - // with any other tokens in the macro definitions. - std::set<std::string> usedNames; - for (size_t i{0}, e{replacement_.SizeInTokens()}; i != e; ++i) { - std::string tok{replacement_.TokenAt(i).ToString()}; - if (tok.empty()) { - continue; - } - // The generated names will only use characters from `charSeq`, so - // collect names that could collide, and ignore others. - if (couldCollide(tok)) { - usedNames.insert(tok); - } - } - if (couldCollide(macroName)) { - usedNames.insert(macroName.str()); - } - - // Given a string that is either empty, or composed from characters - // from `charSeq`, create the next string in the lexicographical - // order. - auto getNextString{[&](llvm::StringRef str) { - if (str.empty()) { - return charSeq.take_front().str(); - } - if (str.back() == charSeq.back()) { - return (llvm::Twine(str) + charSeq.take_front()).str(); - } - size_t idx{charSeq.find(str.back())}; - return (llvm::Twine(str.drop_back()) + charSeq.substr(idx + 1, 1)).str(); - }}; - - // Generate consecutive arg names, until we get one that works - // (i.e. doesn't collide with existing names). Give up after 4096 - // attempts. - auto genArgName{[&](std::string name) { - for (size_t x{0}; x != 4096; ++x) { - name = getNextString(name); - if (usedNames.count(name) == 0) - return name; - } - return std::string(); - }}; - - std::string nextName; - std::vector<std::string> argNames; - for (size_t i{0}; i != argumentCount_; ++i) { - nextName = genArgName(nextName); - if (nextName.empty()) { - out << " // unable to print"; - return; - } - argNames.push_back(nextName); - } - - // Finally, print the macro. out << '('; - for (size_t i{0}; i != argumentCount_; ++i) { + for (size_t i{0}; i != argCount; ++i) { if (i != 0) { out << ", "; } - out << argNames[i]; + out << argNames_[i]; } if (isVariadic_) { out << ", ..."; } out << ") "; - auto getArgumentIndex{[&](llvm::StringRef name) -> size_t { - if (name.size() >= 2 && name[0] == '~') { - // `name` should be an argument name. The `Tokenize` function only - // generates a single character. - return static_cast<size_t>(name[1] - 'A'); - } - return argumentCount_; - }}; - for (size_t i{0}, e{replacement_.SizeInTokens()}; i != e; ++i) { std::string tok{replacement_.TokenAt(i).ToString()}; - if (size_t idx = getArgumentIndex(tok); idx < argumentCount_) { - out << argNames[idx]; + if (size_t idx = getArgumentIndex(tok); idx < argCount) { + out << argNames_[idx]; } else { out << tok; } @@ -181,6 +107,13 @@ TokenSequence Definition::Tokenize(const std::vector<std::string> &argNames, return result; } +std::size_t Definition::getArgumentIndex(const CharBlock &token) const { + if (token.size() >= 2 && token[0] == '~') { + return static_cast<size_t>(token[1] - 'A'); + } + return argumentCount(); +} + static TokenSequence Stringify( const TokenSequence &tokens, AllSources &allSources) { TokenSequence result; @@ -267,7 +200,7 @@ TokenSequence Definition::Apply( continue; } if (bytes == 2 && token[0] == '~') { // argument substitution - std::size_t index = token[1] - 'A'; + std::size_t index = getArgumentIndex(token); if (index >= args.size()) { continue; } @@ -310,8 +243,8 @@ TokenSequence Definition::Apply( Provenance commaProvenance{ prescanner.preprocessor().allSources().CompilerInsertionProvenance( ',')}; - for (std::size_t k{argumentCount_}; k < args.size(); ++k) { - if (k > argumentCount_) { + for (std::size_t k{argumentCount()}; k < args.size(); ++k) { + if (k > argumentCount()) { result.Put(","s, commaProvenance); } result.Put(args[k]); @@ -320,7 +253,7 @@ TokenSequence Definition::Apply( j + 2 < tokens && replacement_.TokenAt(j + 1).OnlyNonBlank() == '(' && parenthesesNesting == 0) { parenthesesNesting = 1; - skipping = args.size() == argumentCount_; + skipping = args.size() == argumentCount(); ++j; } else { if (parenthesesNesting > 0) { diff --git a/flang/lib/Parser/preprocessor.h b/flang/lib/Parser/preprocessor.h index b4177766f81c4b..8828fe944aab50 100644 --- a/flang/lib/Parser/preprocessor.h +++ b/flang/lib/Parser/preprocessor.h @@ -41,7 +41,7 @@ class Definition { Definition(const std::string &predefined, AllSources &); bool isFunctionLike() const { return isFunctionLike_; } - std::size_t argumentCount() const { return argumentCount_; } + std::size_t argumentCount() const { return argNames_.size(); } bool isVariadic() const { return isVariadic_; } bool isDisabled() const { return isDisabled_; } bool isPredefined() const { return isPredefined_; } @@ -56,12 +56,16 @@ class Definition { private: static TokenSequence Tokenize(const std::vector<std::string> &argNames, const TokenSequence &token, std::size_t firstToken, std::size_t tokens); + // For a given token, return the index of the argument to which the token + // corresponds, or `argumentCount` if the token does not correspond to any + // argument. + std::size_t getArgumentIndex(const CharBlock &token) const; bool isFunctionLike_{false}; - std::size_t argumentCount_{0}; bool isVariadic_{false}; bool isDisabled_{false}; bool isPredefined_{false}; + std::vector<std::string> argNames_; TokenSequence replacement_; }; diff --git a/flang/test/Preprocessing/show-macros3.F90 b/flang/test/Preprocessing/show-macros3.F90 index 4b07fcf2f505db..951a1ec5ba16f4 100644 --- a/flang/test/Preprocessing/show-macros3.F90 +++ b/flang/test/Preprocessing/show-macros3.F90 @@ -2,9 +2,8 @@ ! Variadic macro #define FOO1(X, Y, ...) bar(bar(X, Y), __VA_ARGS__) -! CHECK: #define FOO1(A, B, ...) bar(bar(A, B), __VA_ARGS__) +! CHECK: #define FOO1(X, Y, ...) bar(bar(X, Y), __VA_ARGS__) -! Macro parameter names are synthesized, starting from 'A', B', etc. -! Make sure the generated names do not collide with existing identifiers. -#define FOO2(X, Y) (A + X + C + Y) -! CHECK: #define FOO2(B, D) (A + B + C + D) +! Macro with an unused parameter +#define FOO2(X, Y, Z) (X + Z) +! CHECK: #define FOO2(X, Y, Z) (X + Z) >From baeaa9d4ce7178b15358472ee6f05b48fce144b0 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek <krzysztof.parzys...@amd.com> Date: Thu, 4 Apr 2024 13:27:13 -0500 Subject: [PATCH 4/6] clang-format --- flang/lib/Parser/preprocessor.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/flang/lib/Parser/preprocessor.cpp b/flang/lib/Parser/preprocessor.cpp index 30f5ff4df8f5d1..bd725ee2b49bfc 100644 --- a/flang/lib/Parser/preprocessor.cpp +++ b/flang/lib/Parser/preprocessor.cpp @@ -32,8 +32,8 @@ Definition::Definition( : replacement_{Tokenize({}, repl, firstToken, tokens)} {} Definition::Definition(const std::vector<std::string> &argNames, - const TokenSequence &repl, std::size_t firstToken, - std::size_t tokens, bool isVariadic) + const TokenSequence &repl, std::size_t firstToken, std::size_t tokens, + bool isVariadic) : isFunctionLike_{true}, isVariadic_{isVariadic}, argNames_{argNames}, replacement_{Tokenize(argNames, repl, firstToken, tokens)} {} >From 1802510cb73b3a406facf739c40233194732d376 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek <krzysztof.parzys...@amd.com> Date: Thu, 4 Apr 2024 12:08:45 -0500 Subject: [PATCH 5/6] Move preprocessor.h and token-sequence.h to flang/include directory preprocessor.h is moved to make definition of Preprocessor available in parsing.h. token-sequence is moved because preprocessor.h depends on it. --- clang/docs/tools/clang-formatted-files.txt | 4 ++-- flang/{lib => include/flang}/Parser/preprocessor.h | 2 +- flang/{lib => include/flang}/Parser/token-sequence.h | 0 flang/lib/Parser/parsing.cpp | 2 +- flang/lib/Parser/preprocessor.cpp | 3 ++- flang/lib/Parser/prescan.cpp | 4 ++-- flang/lib/Parser/prescan.h | 2 +- flang/lib/Parser/token-sequence.cpp | 3 ++- 8 files changed, 11 insertions(+), 9 deletions(-) rename flang/{lib => include/flang}/Parser/preprocessor.h (99%) rename flang/{lib => include/flang}/Parser/token-sequence.h (100%) diff --git a/clang/docs/tools/clang-formatted-files.txt b/clang/docs/tools/clang-formatted-files.txt index 70687c23b15e61..8fd4fed25a32a1 100644 --- a/clang/docs/tools/clang-formatted-files.txt +++ b/clang/docs/tools/clang-formatted-files.txt @@ -2147,8 +2147,10 @@ flang/include/flang/Parser/message.h flang/include/flang/Parser/parse-state.h flang/include/flang/Parser/parse-tree-visitor.h flang/include/flang/Parser/parsing.h +flang/include/flang/Parser/preprocessor.h flang/include/flang/Parser/provenance.h flang/include/flang/Parser/source.h +flang/include/flang/Parser/token-sequence.h flang/include/flang/Parser/tools.h flang/include/flang/Parser/unparse.h flang/include/flang/Parser/user-state.h @@ -2319,7 +2321,6 @@ flang/lib/Parser/openmp-parsers.cpp flang/lib/Parser/parse-tree.cpp flang/lib/Parser/parsing.cpp flang/lib/Parser/preprocessor.cpp -flang/lib/Parser/preprocessor.h flang/lib/Parser/prescan.cpp flang/lib/Parser/prescan.h flang/lib/Parser/program-parsers.cpp @@ -2328,7 +2329,6 @@ flang/lib/Parser/source.cpp flang/lib/Parser/stmt-parser.h flang/lib/Parser/token-parsers.h flang/lib/Parser/token-sequence.cpp -flang/lib/Parser/token-sequence.h flang/lib/Parser/tools.cpp flang/lib/Parser/type-parser-implementation.h flang/lib/Parser/type-parsers.h diff --git a/flang/lib/Parser/preprocessor.h b/flang/include/flang/Parser/preprocessor.h similarity index 99% rename from flang/lib/Parser/preprocessor.h rename to flang/include/flang/Parser/preprocessor.h index 8828fe944aab50..f5f94cdf78dc8e 100644 --- a/flang/lib/Parser/preprocessor.h +++ b/flang/include/flang/Parser/preprocessor.h @@ -15,9 +15,9 @@ // performed, so that special compiler command options &/or source file name // extensions for preprocessing will not be necessary. -#include "token-sequence.h" #include "flang/Parser/char-block.h" #include "flang/Parser/provenance.h" +#include "flang/Parser/token-sequence.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/raw_ostream.h" #include <cstddef> diff --git a/flang/lib/Parser/token-sequence.h b/flang/include/flang/Parser/token-sequence.h similarity index 100% rename from flang/lib/Parser/token-sequence.h rename to flang/include/flang/Parser/token-sequence.h diff --git a/flang/lib/Parser/parsing.cpp b/flang/lib/Parser/parsing.cpp index ec008be1fcea9d..39c2fdcab19d55 100644 --- a/flang/lib/Parser/parsing.cpp +++ b/flang/lib/Parser/parsing.cpp @@ -7,10 +7,10 @@ //===----------------------------------------------------------------------===// #include "flang/Parser/parsing.h" -#include "preprocessor.h" #include "prescan.h" #include "type-parsers.h" #include "flang/Parser/message.h" +#include "flang/Parser/preprocessor.h" #include "flang/Parser/provenance.h" #include "flang/Parser/source.h" #include "llvm/Support/raw_ostream.h" diff --git a/flang/lib/Parser/preprocessor.cpp b/flang/lib/Parser/preprocessor.cpp index bd725ee2b49bfc..1c41bdad29584f 100644 --- a/flang/lib/Parser/preprocessor.cpp +++ b/flang/lib/Parser/preprocessor.cpp @@ -6,7 +6,8 @@ // //===----------------------------------------------------------------------===// -#include "preprocessor.h" +#include "flang/Parser/preprocessor.h" + #include "prescan.h" #include "flang/Common/idioms.h" #include "flang/Parser/characters.h" diff --git a/flang/lib/Parser/prescan.cpp b/flang/lib/Parser/prescan.cpp index e9b23172ed2e28..96db3955299f33 100644 --- a/flang/lib/Parser/prescan.cpp +++ b/flang/lib/Parser/prescan.cpp @@ -7,12 +7,12 @@ //===----------------------------------------------------------------------===// #include "prescan.h" -#include "preprocessor.h" -#include "token-sequence.h" #include "flang/Common/idioms.h" #include "flang/Parser/characters.h" #include "flang/Parser/message.h" +#include "flang/Parser/preprocessor.h" #include "flang/Parser/source.h" +#include "flang/Parser/token-sequence.h" #include "llvm/Support/raw_ostream.h" #include <cstddef> #include <cstring> diff --git a/flang/lib/Parser/prescan.h b/flang/lib/Parser/prescan.h index 7442b5d2263354..581980001bcc23 100644 --- a/flang/lib/Parser/prescan.h +++ b/flang/lib/Parser/prescan.h @@ -16,11 +16,11 @@ // fixed form character literals on truncated card images, file // inclusion, and driving the Fortran source preprocessor. -#include "token-sequence.h" #include "flang/Common/Fortran-features.h" #include "flang/Parser/characters.h" #include "flang/Parser/message.h" #include "flang/Parser/provenance.h" +#include "flang/Parser/token-sequence.h" #include <bitset> #include <optional> #include <string> diff --git a/flang/lib/Parser/token-sequence.cpp b/flang/lib/Parser/token-sequence.cpp index 799d13a423660c..d0254ecd5aaefc 100644 --- a/flang/lib/Parser/token-sequence.cpp +++ b/flang/lib/Parser/token-sequence.cpp @@ -6,7 +6,8 @@ // //===----------------------------------------------------------------------===// -#include "token-sequence.h" +#include "flang/Parser/token-sequence.h" + #include "prescan.h" #include "flang/Parser/characters.h" #include "flang/Parser/message.h" >From 477937569028e0a7f253512d571ae291265ea01c Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek <krzysztof.parzys...@amd.com> Date: Thu, 4 Apr 2024 12:35:50 -0500 Subject: [PATCH 6/6] Make Preprocessor not be dynamically allocated --- flang/include/flang/Parser/parsing.h | 6 ++---- flang/lib/Parser/parsing.cpp | 15 ++++++--------- 2 files changed, 8 insertions(+), 13 deletions(-) diff --git a/flang/include/flang/Parser/parsing.h b/flang/include/flang/Parser/parsing.h index 14891c44dacafd..4d329c189cb80e 100644 --- a/flang/include/flang/Parser/parsing.h +++ b/flang/include/flang/Parser/parsing.h @@ -15,8 +15,8 @@ #include "parse-tree.h" #include "provenance.h" #include "flang/Common/Fortran-features.h" +#include "flang/Parser/preprocessor.h" #include "llvm/Support/raw_ostream.h" -#include <memory> #include <optional> #include <string> #include <utility> @@ -24,8 +24,6 @@ namespace Fortran::parser { -class Preprocessor; - struct Options { Options() {} @@ -87,7 +85,7 @@ class Parsing { const char *finalRestingPlace_{nullptr}; std::optional<Program> parseTree_; ParsingLog log_; - std::unique_ptr<Preprocessor> preprocessor_; + Preprocessor preprocessor_{allCooked_.allSources()}; }; } // namespace Fortran::parser #endif // FORTRAN_PARSER_PARSING_H_ diff --git a/flang/lib/Parser/parsing.cpp b/flang/lib/Parser/parsing.cpp index 39c2fdcab19d55..43a898ff120c5d 100644 --- a/flang/lib/Parser/parsing.cpp +++ b/flang/lib/Parser/parsing.cpp @@ -60,20 +60,19 @@ const SourceFile *Parsing::Prescan(const std::string &path, Options options) { } } - preprocessor_ = std::make_unique<Preprocessor>(allSources); if (!options.predefinitions.empty()) { - preprocessor_->DefineStandardMacros(); + preprocessor_.DefineStandardMacros(); for (const auto &predef : options.predefinitions) { if (predef.second) { - preprocessor_->Define(predef.first, *predef.second); + preprocessor_.Define(predef.first, *predef.second); } else { - preprocessor_->Undefine(predef.first); + preprocessor_.Undefine(predef.first); } } } currentCooked_ = &allCooked_.NewCookedSource(); Prescanner prescanner{ - messages_, *currentCooked_, *preprocessor_, options.features}; + messages_, *currentCooked_, preprocessor_, options.features}; prescanner.set_fixedForm(options.isFixedForm) .set_fixedFormColumnLimit(options.fixedFormColumns) .AddCompilerDirectiveSentinel("dir$"); @@ -87,7 +86,7 @@ const SourceFile *Parsing::Prescan(const std::string &path, Options options) { if (options.features.IsEnabled(LanguageFeature::CUDA)) { prescanner.AddCompilerDirectiveSentinel("$cuf"); prescanner.AddCompilerDirectiveSentinel("@cuf"); - preprocessor_->Define("_CUDA", "1"); + preprocessor_.Define("_CUDA", "1"); } ProvenanceRange range{allSources.AddIncludedFile( *sourceFile, ProvenanceRange{}, options.isModuleFile)}; @@ -108,9 +107,7 @@ const SourceFile *Parsing::Prescan(const std::string &path, Options options) { } void Parsing::EmitPreprocessorMacros(llvm::raw_ostream &out) const { - if (preprocessor_) { - preprocessor_->PrintMacros(out); - } + preprocessor_.PrintMacros(out); } void Parsing::EmitPreprocessedSource( _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits