llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-llvm-adt Author: Michael Spencer (Bigcheese) <details> <summary>Changes</summary> This separates out parsing of modulemaps from updating the `clang::ModuleMap` information. Currently this has no effect other than slightly changing diagnostics. Upcoming changes will use this to allow searching for modules without fully processing modulemaps. This creates a new `modulemap` namespace because there are too many things called ModuleMap* right now that mean different things. I'd like to clean this up, but I'm not sure yet what I want to call everything. This also drops the `SourceLocation` from `moduleMapFileRead`. This is never used in tree, and in future patches I plan to make the modulemap parser use a different `SourceManager` so that we can share modulemap parsing between `CompilerInstance`s. This will make the `SourceLocation` meaningless. --- Patch is 101.49 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/119740.diff 11 Files Affected: - (modified) clang/include/clang/Basic/DiagnosticLexKinds.td (+2) - (modified) clang/include/clang/Basic/Module.h (+24) - (modified) clang/include/clang/Lex/ModuleMap.h (+1-23) - (added) clang/include/clang/Lex/ModuleMapFile.h (+142) - (modified) clang/lib/Lex/CMakeLists.txt (+1) - (modified) clang/lib/Lex/ModuleMap.cpp (+189-1211) - (added) clang/lib/Lex/ModuleMapFile.cpp (+1248) - (modified) clang/test/Modules/Inputs/export_as_test.modulemap (+4) - (modified) clang/test/Modules/diagnostics.modulemap (+12-9) - (modified) clang/test/Modules/export_as_test.c (+2-3) - (modified) llvm/include/llvm/ADT/STLExtras.h (+9) ``````````diff diff --git a/clang/include/clang/Basic/DiagnosticLexKinds.td b/clang/include/clang/Basic/DiagnosticLexKinds.td index 959376b0847216..e5869619e69d35 100644 --- a/clang/include/clang/Basic/DiagnosticLexKinds.td +++ b/clang/include/clang/Basic/DiagnosticLexKinds.td @@ -914,6 +914,8 @@ def warn_mmap_redundant_export_as : Warning< InGroup<PrivateModule>; def err_mmap_submodule_export_as : Error< "only top-level modules can be re-exported as public">; +def err_mmap_qualified_export_as : Error< + "a module can only be re-exported as another top-level module">; def warn_quoted_include_in_framework_header : Warning< "double-quoted include \"%0\" in framework header, " diff --git a/clang/include/clang/Basic/Module.h b/clang/include/clang/Basic/Module.h index dd384c1d76c5fd..ef6e2ac95d0819 100644 --- a/clang/include/clang/Basic/Module.h +++ b/clang/include/clang/Basic/Module.h @@ -100,6 +100,30 @@ struct ASTFileSignature : std::array<uint8_t, 20> { } }; +/// The set of attributes that can be attached to a module. +struct ModuleAttributes { + /// Whether this is a system module. + LLVM_PREFERRED_TYPE(bool) + unsigned IsSystem : 1; + + /// Whether this is an extern "C" module. + LLVM_PREFERRED_TYPE(bool) + unsigned IsExternC : 1; + + /// Whether this is an exhaustive set of configuration macros. + LLVM_PREFERRED_TYPE(bool) + unsigned IsExhaustive : 1; + + /// Whether files in this module can only include non-modular headers + /// and headers from used modules. + LLVM_PREFERRED_TYPE(bool) + unsigned NoUndeclaredIncludes : 1; + + ModuleAttributes() + : IsSystem(false), IsExternC(false), IsExhaustive(false), + NoUndeclaredIncludes(false) {} +}; + /// Required to construct a Module. /// /// This tag type is only constructible by ModuleMap, guaranteeing it ownership diff --git a/clang/include/clang/Lex/ModuleMap.h b/clang/include/clang/Lex/ModuleMap.h index 53e9e0ec83ddb1..9de1b3b546c115 100644 --- a/clang/include/clang/Lex/ModuleMap.h +++ b/clang/include/clang/Lex/ModuleMap.h @@ -232,29 +232,7 @@ class ModuleMap { llvm::DenseMap<Module *, unsigned> ModuleScopeIDs; - /// The set of attributes that can be attached to a module. - struct Attributes { - /// Whether this is a system module. - LLVM_PREFERRED_TYPE(bool) - unsigned IsSystem : 1; - - /// Whether this is an extern "C" module. - LLVM_PREFERRED_TYPE(bool) - unsigned IsExternC : 1; - - /// Whether this is an exhaustive set of configuration macros. - LLVM_PREFERRED_TYPE(bool) - unsigned IsExhaustive : 1; - - /// Whether files in this module can only include non-modular headers - /// and headers from used modules. - LLVM_PREFERRED_TYPE(bool) - unsigned NoUndeclaredIncludes : 1; - - Attributes() - : IsSystem(false), IsExternC(false), IsExhaustive(false), - NoUndeclaredIncludes(false) {} - }; + using Attributes = ModuleAttributes; /// A directory for which framework modules can be inferred. struct InferredDirectory { diff --git a/clang/include/clang/Lex/ModuleMapFile.h b/clang/include/clang/Lex/ModuleMapFile.h new file mode 100644 index 00000000000000..8b79897876ad61 --- /dev/null +++ b/clang/include/clang/Lex/ModuleMapFile.h @@ -0,0 +1,142 @@ +//===- ModuleMapFile.h - Parsing and representation -------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LEX_MODULEMAPFILE_H +#define LLVM_CLANG_LEX_MODULEMAPFILE_H + +#include "clang/Basic/LLVM.h" +// TODO: Consider moving ModuleId to another header, parsing a modulemap file is +// intended to not depend on anything about the clang::Module class. +#include "clang/Basic/Module.h" +#include "clang/Basic/SourceLocation.h" +#include "llvm/ADT/StringRef.h" + +#include <optional> +#include <variant> + +namespace clang { + +class DiagnosticsEngine; +class SourceManager; + +namespace modulemap { + +using Decl = + std::variant<struct RequiresDecl, struct HeaderDecl, struct UmbrellaDirDecl, + struct ModuleDecl, struct ExcludeDecl, struct ExportDecl, + struct ExportAsDecl, struct ExternModuleDecl, struct UseDecl, + struct LinkDecl, struct ConfigMacrosDecl, struct ConflictDecl>; + +struct RequiresFeature { + SourceLocation Location; + StringRef Feature; + bool RequiredState = true; +}; + +struct RequiresDecl { + SourceLocation Location; + std::vector<RequiresFeature> Features; +}; + +struct HeaderDecl { + SourceLocation Location; + StringRef Path; + SourceLocation PathLoc; + std::optional<int64_t> Size; + std::optional<int64_t> MTime; + LLVM_PREFERRED_TYPE(bool) + unsigned Private : 1; + LLVM_PREFERRED_TYPE(bool) + unsigned Textual : 1; + LLVM_PREFERRED_TYPE(bool) + unsigned Umbrella : 1; + LLVM_PREFERRED_TYPE(bool) + unsigned Excluded : 1; +}; + +struct UmbrellaDirDecl { + SourceLocation Location; + StringRef Path; +}; + +struct ModuleDecl { + SourceLocation Location; /// Points to the first keyword in the decl. + ModuleId Id; + ModuleAttributes Attrs; + std::vector<Decl> Decls; + + LLVM_PREFERRED_TYPE(bool) + unsigned Explicit : 1; + LLVM_PREFERRED_TYPE(bool) + unsigned Framework : 1; +}; + +struct ExcludeDecl { + SourceLocation Location; + StringRef Module; +}; + +struct ExportDecl { + SourceLocation Location; + ModuleId Id; + bool Wildcard; +}; + +struct ExportAsDecl { + SourceLocation Location; + ModuleId Id; +}; + +struct ExternModuleDecl { + SourceLocation Location; + ModuleId Id; + StringRef Path; +}; + +struct UseDecl { + SourceLocation Location; + ModuleId Id; +}; + +struct LinkDecl { + SourceLocation Location; + StringRef Library; + LLVM_PREFERRED_TYPE(bool) + unsigned Framework : 1; +}; + +struct ConfigMacrosDecl { + SourceLocation Location; + std::vector<StringRef> Macros; + LLVM_PREFERRED_TYPE(bool) + unsigned Exhaustive : 1; +}; + +struct ConflictDecl { + SourceLocation Location; + ModuleId Id; + StringRef Message; +}; + +using TopLevelDecl = + std::variant<ModuleDecl, ExternModuleDecl>; + +struct ModuleMapFile { + std::vector<TopLevelDecl> Decls; +}; + +std::optional<ModuleMapFile> parseModuleMap(FileEntryRef File, + SourceManager &SM, + DiagnosticsEngine &Diags, + bool IsSystem, unsigned *Offset); +void dumpModuleMapFile(ModuleMapFile &MMF, llvm::raw_ostream &out); + +} // namespace modulemap +} // namespace clang + +#endif diff --git a/clang/lib/Lex/CMakeLists.txt b/clang/lib/Lex/CMakeLists.txt index 766336b89a2382..5a049a1d84fd0d 100644 --- a/clang/lib/Lex/CMakeLists.txt +++ b/clang/lib/Lex/CMakeLists.txt @@ -15,6 +15,7 @@ add_clang_library(clangLex MacroArgs.cpp MacroInfo.cpp ModuleMap.cpp + ModuleMapFile.cpp PPCaching.cpp PPCallbacks.cpp PPConditionalDirectiveRecord.cpp diff --git a/clang/lib/Lex/ModuleMap.cpp b/clang/lib/Lex/ModuleMap.cpp index ccf94f6345ff28..11b38db9eb6526 100644 --- a/clang/lib/Lex/ModuleMap.cpp +++ b/clang/lib/Lex/ModuleMap.cpp @@ -24,9 +24,7 @@ #include "clang/Lex/HeaderSearch.h" #include "clang/Lex/HeaderSearchOptions.h" #include "clang/Lex/LexDiagnostic.h" -#include "clang/Lex/Lexer.h" -#include "clang/Lex/LiteralSupport.h" -#include "clang/Lex/Token.h" +#include "clang/Lex/ModuleMapFile.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" @@ -34,7 +32,6 @@ #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" -#include "llvm/Support/Allocator.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Path.h" @@ -1461,81 +1458,10 @@ bool ModuleMap::resolveConflicts(Module *Mod, bool Complain) { //----------------------------------------------------------------------------// namespace clang { - - /// A token in a module map file. - struct MMToken { - enum TokenKind { - Comma, - ConfigMacros, - Conflict, - EndOfFile, - HeaderKeyword, - Identifier, - Exclaim, - ExcludeKeyword, - ExplicitKeyword, - ExportKeyword, - ExportAsKeyword, - ExternKeyword, - FrameworkKeyword, - LinkKeyword, - ModuleKeyword, - Period, - PrivateKeyword, - UmbrellaKeyword, - UseKeyword, - RequiresKeyword, - Star, - StringLiteral, - IntegerLiteral, - TextualKeyword, - LBrace, - RBrace, - LSquare, - RSquare - } Kind; - - SourceLocation::UIntTy Location; - unsigned StringLength; - union { - // If Kind != IntegerLiteral. - const char *StringData; - - // If Kind == IntegerLiteral. - uint64_t IntegerValue; - }; - - void clear() { - Kind = EndOfFile; - Location = 0; - StringLength = 0; - StringData = nullptr; - } - - bool is(TokenKind K) const { return Kind == K; } - - SourceLocation getLocation() const { - return SourceLocation::getFromRawEncoding(Location); - } - - uint64_t getInteger() const { - return Kind == IntegerLiteral ? IntegerValue : 0; - } - - StringRef getString() const { - return Kind == IntegerLiteral ? StringRef() - : StringRef(StringData, StringLength); - } - }; - class ModuleMapParser { - Lexer &L; + modulemap::ModuleMapFile &MMF; SourceManager &SourceMgr; - /// Default target information, used only for string literal - /// parsing. - const TargetInfo *Target; - DiagnosticsEngine &Diags; ModuleMap ⤅ @@ -1555,13 +1481,6 @@ namespace clang { /// Whether an error occurred. bool HadError = false; - /// Stores string data for the various string literals referenced - /// during parsing. - llvm::BumpPtrAllocator StringData; - - /// The current token. - MMToken Tok; - /// The active module. Module *ActiveModule = nullptr; @@ -1575,305 +1494,45 @@ namespace clang { /// 'textual' to match the original intent. llvm::SmallPtrSet<Module *, 2> UsesRequiresExcludedHack; - /// Consume the current token and return its location. - SourceLocation consumeToken(); - - /// Skip tokens until we reach the a token with the given kind - /// (or the end of the file). - void skipUntil(MMToken::TokenKind K); - - bool parseModuleId(ModuleId &Id); - void parseModuleDecl(); - void parseExternModuleDecl(); - void parseRequiresDecl(); - void parseHeaderDecl(MMToken::TokenKind, SourceLocation LeadingLoc); - void parseUmbrellaDirDecl(SourceLocation UmbrellaLoc); - void parseExportDecl(); - void parseExportAsDecl(); - void parseUseDecl(); - void parseLinkDecl(); - void parseConfigMacros(); - void parseConflict(); - void parseInferredModuleDecl(bool Framework, bool Explicit); + void handleModuleDecl(const modulemap::ModuleDecl &MD); + void handleExternModuleDecl(const modulemap::ExternModuleDecl &EMD); + void handleRequiresDecl(const modulemap::RequiresDecl &RD); + void handleHeaderDecl(const modulemap::HeaderDecl &HD); + void handleUmbrellaDirDecl(const modulemap::UmbrellaDirDecl &UDD); + void handleExportDecl(const modulemap::ExportDecl &ED); + void handleExportAsDecl(const modulemap::ExportAsDecl &EAD); + void handleUseDecl(const modulemap::UseDecl &UD); + void handleLinkDecl(const modulemap::LinkDecl &LD); + void handleConfigMacros(const modulemap::ConfigMacrosDecl &CMD); + void handleConflict(const modulemap::ConflictDecl &CD); + void handleInferredModuleDecl(const modulemap::ModuleDecl &MD); /// Private modules are canonicalized as Foo_Private. Clang provides extra /// module map search logic to find the appropriate private module when PCH /// is used with implicit module maps. Warn when private modules are written /// in other ways (FooPrivate and Foo.Private), providing notes and fixits. - void diagnosePrivateModules(SourceLocation ExplicitLoc, - SourceLocation FrameworkLoc); + void diagnosePrivateModules(SourceLocation StartLoc); using Attributes = ModuleMap::Attributes; - bool parseOptionalAttributes(Attributes &Attrs); - public: - ModuleMapParser(Lexer &L, SourceManager &SourceMgr, - const TargetInfo *Target, DiagnosticsEngine &Diags, + ModuleMapParser(modulemap::ModuleMapFile &MMF, + SourceManager &SourceMgr, DiagnosticsEngine &Diags, ModuleMap &Map, FileID ModuleMapFID, DirectoryEntryRef Directory, bool IsSystem) - : L(L), SourceMgr(SourceMgr), Target(Target), Diags(Diags), Map(Map), - ModuleMapFID(ModuleMapFID), Directory(Directory), IsSystem(IsSystem) { - Tok.clear(); - consumeToken(); - } + : MMF(MMF), SourceMgr(SourceMgr), Diags(Diags), Map(Map), + ModuleMapFID(ModuleMapFID), Directory(Directory), IsSystem(IsSystem) {} bool parseModuleMapFile(); - - bool terminatedByDirective() { return false; } - SourceLocation getLocation() { return Tok.getLocation(); } }; } // namespace clang -SourceLocation ModuleMapParser::consumeToken() { - SourceLocation Result = Tok.getLocation(); - -retry: - Tok.clear(); - Token LToken; - L.LexFromRawLexer(LToken); - Tok.Location = LToken.getLocation().getRawEncoding(); - switch (LToken.getKind()) { - case tok::raw_identifier: { - StringRef RI = LToken.getRawIdentifier(); - Tok.StringData = RI.data(); - Tok.StringLength = RI.size(); - Tok.Kind = llvm::StringSwitch<MMToken::TokenKind>(RI) - .Case("config_macros", MMToken::ConfigMacros) - .Case("conflict", MMToken::Conflict) - .Case("exclude", MMToken::ExcludeKeyword) - .Case("explicit", MMToken::ExplicitKeyword) - .Case("export", MMToken::ExportKeyword) - .Case("export_as", MMToken::ExportAsKeyword) - .Case("extern", MMToken::ExternKeyword) - .Case("framework", MMToken::FrameworkKeyword) - .Case("header", MMToken::HeaderKeyword) - .Case("link", MMToken::LinkKeyword) - .Case("module", MMToken::ModuleKeyword) - .Case("private", MMToken::PrivateKeyword) - .Case("requires", MMToken::RequiresKeyword) - .Case("textual", MMToken::TextualKeyword) - .Case("umbrella", MMToken::UmbrellaKeyword) - .Case("use", MMToken::UseKeyword) - .Default(MMToken::Identifier); - break; - } - - case tok::comma: - Tok.Kind = MMToken::Comma; - break; - - case tok::eof: - Tok.Kind = MMToken::EndOfFile; - break; - - case tok::l_brace: - Tok.Kind = MMToken::LBrace; - break; - - case tok::l_square: - Tok.Kind = MMToken::LSquare; - break; - - case tok::period: - Tok.Kind = MMToken::Period; - break; - - case tok::r_brace: - Tok.Kind = MMToken::RBrace; - break; - - case tok::r_square: - Tok.Kind = MMToken::RSquare; - break; - - case tok::star: - Tok.Kind = MMToken::Star; - break; - - case tok::exclaim: - Tok.Kind = MMToken::Exclaim; - break; - - case tok::string_literal: { - if (LToken.hasUDSuffix()) { - Diags.Report(LToken.getLocation(), diag::err_invalid_string_udl); - HadError = true; - goto retry; - } - - // Parse the string literal. - LangOptions LangOpts; - StringLiteralParser StringLiteral(LToken, SourceMgr, LangOpts, *Target); - if (StringLiteral.hadError) - goto retry; - - // Copy the string literal into our string data allocator. - unsigned Length = StringLiteral.GetStringLength(); - char *Saved = StringData.Allocate<char>(Length + 1); - memcpy(Saved, StringLiteral.GetString().data(), Length); - Saved[Length] = 0; - - // Form the token. - Tok.Kind = MMToken::StringLiteral; - Tok.StringData = Saved; - Tok.StringLength = Length; - break; - } - - case tok::numeric_constant: { - // We don't support any suffixes or other complications. - SmallString<32> SpellingBuffer; - SpellingBuffer.resize(LToken.getLength() + 1); - const char *Start = SpellingBuffer.data(); - unsigned Length = - Lexer::getSpelling(LToken, Start, SourceMgr, Map.LangOpts); - uint64_t Value; - if (StringRef(Start, Length).getAsInteger(0, Value)) { - Diags.Report(Tok.getLocation(), diag::err_mmap_unknown_token); - HadError = true; - goto retry; - } - - Tok.Kind = MMToken::IntegerLiteral; - Tok.IntegerValue = Value; - break; - } - - case tok::comment: - goto retry; - - case tok::hash: - // A module map can be terminated prematurely by - // #pragma clang module contents - // When building the module, we'll treat the rest of the file as the - // contents of the module. - { - auto NextIsIdent = [&](StringRef Str) -> bool { - L.LexFromRawLexer(LToken); - return !LToken.isAtStartOfLine() && LToken.is(tok::raw_identifier) && - LToken.getRawIdentifier() == Str; - }; - if (NextIsIdent("pragma") && NextIsIdent("clang") && - NextIsIdent("module") && NextIsIdent("contents")) { - Tok.Kind = MMToken::EndOfFile; - break; - } - } - [[fallthrough]]; - - default: - Diags.Report(Tok.getLocation(), diag::err_mmap_unknown_token); - HadError = true; - goto retry; - } - - return Result; -} - -void ModuleMapParser::skipUntil(MMToken::TokenKind K) { - unsigned braceDepth = 0; - unsigned squareDepth = 0; - do { - switch (Tok.Kind) { - case MMToken::EndOfFile: - return; - - case MMToken::LBrace: - if (Tok.is(K) && braceDepth == 0 && squareDepth == 0) - return; - - ++braceDepth; - break; - - case MMToken::LSquare: - if (Tok.is(K) && braceDepth == 0 && squareDepth == 0) - return; - - ++squareDepth; - break; - - case MMToken::RBrace: - if (braceDepth > 0) - --braceDepth; - else if (Tok.is(K)) - return; - break; - - case MMToken::RSquare: - if (squareDepth > 0) - --squareDepth; - else if (Tok.is(K)) - return; - break; - - default: - if (braceDepth == 0 && squareDepth == 0 && Tok.is(K)) - return; - break; - } - - consumeToken(); - } while (true); -} - -/// Parse a module-id. -/// -/// module-id: -/// identifier -/// identifier '.' module-id -/// -/// \returns true if an error occurred, false otherwise. -bool ModuleMapParser::parseModuleId(ModuleId &Id) { - Id.clear(); - do { - if (Tok.is(MMToken::Identifier) || Tok.is(MMToken::StringLiteral)) { - Id.push_back( - std::make_pair(std::string(Tok.getString()), Tok.getLocation())); - consumeToken(); - } else { - Diags.Report(Tok.getLocation(), diag::err_mmap_expected_module_name); - return true; - } - - if (!Tok.is(MMToken::Period)) - break; - - consumeToken(); - } while (true); - - return false; -} - -namespace { - - /// Enumerates the known attributes. - enum AttributeKind { - /// An unknown attribute. - AT_unknown, - - /// The 'system' attribute. - AT_system, - - /// The 'extern_c' attribute. - AT_extern_c, - - /// The 'exhaustive' attribute. - AT_exhaustive, - - /// The 'no_undeclared_includes' attribute. - AT_no_undeclared_includes - }; - -} // namespace - /// Private modules are canonicalized as Foo_Private. Clang provides extra /// module map search logic to find the appropriate private module when PCH /// is used with implicit module maps. Warn when private modules are written /// in other ways (FooPrivate and Foo.Private), providing notes and fixits. -void ModuleMapParser::diagnosePrivateModules(SourceLocation ExplicitLoc, - SourceLocation FrameworkLoc) { +void ModuleMapParser::diagnosePrivateModules(SourceLocation StartLoc) { auto GenNoteAndFixIt = [&](StringRef BadName, StringRef Canonical, co... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/119740 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits