llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang Author: Junior Rantila (juniorrantila) <details> <summary>Changes</summary> This patch takes the first steps toward C23 #embed support. We can include binary files, but embed-parameter-sequences are not implemented. Adding the embedded file to the -M dependency array is also not implemented. --- Full diff: https://github.com/llvm/llvm-project/pull/76480.diff 8 Files Affected: - (modified) clang/include/clang/Basic/DiagnosticParseKinds.td (+2) - (modified) clang/include/clang/Basic/TokenKinds.def (+3) - (modified) clang/include/clang/Lex/DependencyDirectivesScanner.h (+1) - (modified) clang/include/clang/Lex/Preprocessor.h (+3) - (modified) clang/lib/Basic/IdentifierTable.cpp (+1) - (modified) clang/lib/Lex/DependencyDirectivesScanner.cpp (+12) - (modified) clang/lib/Lex/Lexer.cpp (+1) - (modified) clang/lib/Lex/PPDirectives.cpp (+217-4) ``````````diff diff --git a/clang/include/clang/Basic/DiagnosticParseKinds.td b/clang/include/clang/Basic/DiagnosticParseKinds.td index e4b1069cde1850..56258df192f9ff 100644 --- a/clang/include/clang/Basic/DiagnosticParseKinds.td +++ b/clang/include/clang/Basic/DiagnosticParseKinds.td @@ -165,6 +165,8 @@ def ext_c99_feature : Extension< "'%0' is a C99 extension">, InGroup<C99>; def ext_c11_feature : Extension< "'%0' is a C11 extension">, InGroup<C11>; +def ext_c23_feature : Extension< + "'%0' is a C23 extension">, InGroup<C23>; def warn_c23_compat_keyword : Warning< "'%0' is incompatible with C standards before C23">, InGroup<CPre23Compat>, DefaultIgnore; diff --git a/clang/include/clang/Basic/TokenKinds.def b/clang/include/clang/Basic/TokenKinds.def index 3f0e1e1a7d45ad..591684c004f908 100644 --- a/clang/include/clang/Basic/TokenKinds.def +++ b/clang/include/clang/Basic/TokenKinds.def @@ -113,6 +113,9 @@ PPKEYWORD(defined) PPKEYWORD(include) PPKEYWORD(__include_macros) +// C23 6.10.2 - Binary resource inclusion +PPKEYWORD(embed) + // C99 6.10.3 - Macro Replacement. PPKEYWORD(define) PPKEYWORD(undef) diff --git a/clang/include/clang/Lex/DependencyDirectivesScanner.h b/clang/include/clang/Lex/DependencyDirectivesScanner.h index 0e115906fbfe51..b00b9391d0074a 100644 --- a/clang/include/clang/Lex/DependencyDirectivesScanner.h +++ b/clang/include/clang/Lex/DependencyDirectivesScanner.h @@ -70,6 +70,7 @@ enum DirectiveKind : uint8_t { pp_pragma_include_alias, pp_pragma_system_header, pp_include_next, + pp_embed, pp_if, pp_ifdef, pp_ifndef, diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h index 4ec21a8b6be2c8..4ff097eae78571 100644 --- a/clang/include/clang/Lex/Preprocessor.h +++ b/clang/include/clang/Lex/Preprocessor.h @@ -2689,6 +2689,9 @@ class Preprocessor { void HandleIncludeMacrosDirective(SourceLocation HashLoc, Token &Tok); void HandleImportDirective(SourceLocation HashLoc, Token &Tok); void HandleMicrosoftImportDirective(Token &Tok); + void HandleEmbedDirective(SourceLocation HashLoc, Token &Tok, + ConstSearchDirIterator LookupFrom = nullptr, + const FileEntry *LookupFromFile = nullptr); public: /// Check that the given module is available, producing a diagnostic if not. diff --git a/clang/lib/Basic/IdentifierTable.cpp b/clang/lib/Basic/IdentifierTable.cpp index 5902c6dc3ce0b4..50cf1925acf49e 100644 --- a/clang/lib/Basic/IdentifierTable.cpp +++ b/clang/lib/Basic/IdentifierTable.cpp @@ -446,6 +446,7 @@ tok::PPKeywordKind IdentifierInfo::getPPKeywordID() const { CASE( 5, 'i', 'e', ident); CASE( 5, 'i', 'd', ifdef); CASE( 5, 'u', 'd', undef); + CASE( 5, 'e', 'b', embed); CASE( 6, 'a', 's', assert); CASE( 6, 'd', 'f', define); diff --git a/clang/lib/Lex/DependencyDirectivesScanner.cpp b/clang/lib/Lex/DependencyDirectivesScanner.cpp index 980f865cf24c97..867614cdb27167 100644 --- a/clang/lib/Lex/DependencyDirectivesScanner.cpp +++ b/clang/lib/Lex/DependencyDirectivesScanner.cpp @@ -91,6 +91,9 @@ struct Scanner { dependency_directives_scan::Token &lexIncludeFilename(const char *&First, const char *const End); + dependency_directives_scan::Token &lexEmbedFilename(const char *&First, + const char *const End); + void skipLine(const char *&First, const char *const End); void skipDirective(StringRef Name, const char *&First, const char *const End); @@ -541,6 +544,11 @@ Scanner::lexIncludeFilename(const char *&First, const char *const End) { return CurDirToks.back(); } +dependency_directives_scan::Token & +Scanner::lexEmbedFilename(const char *&First, const char *const End) { + return lexIncludeFilename(First, End); +} + void Scanner::lexPPDirectiveBody(const char *&First, const char *const End) { while (true) { const dependency_directives_scan::Token &Tok = lexToken(First, End); @@ -875,6 +883,7 @@ bool Scanner::lexPPLine(const char *&First, const char *const End) { auto Kind = llvm::StringSwitch<DirectiveKind>(Id) .Case("include", pp_include) .Case("__include_macros", pp___include_macros) + .Case("embed", pp_embed) .Case("define", pp_define) .Case("undef", pp_undef) .Case("import", pp_import) @@ -903,6 +912,9 @@ bool Scanner::lexPPLine(const char *&First, const char *const End) { case pp_import: lexIncludeFilename(First, End); break; + case pp_embed: + lexEmbedFilename(First, End); + break; default: break; } diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp index 50b56265f6e164..527180daa5f9aa 100644 --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -4541,6 +4541,7 @@ bool Lexer::LexDependencyDirectiveTokenWhileSkipping(Token &Result) { llvm_unreachable("unexpected 'pp_none'"); case pp_include: case pp___include_macros: + case pp_embed: case pp_define: case pp_undef: case pp_import: diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp index 9f82a6d073e3ba..31bbc9ea1beeb9 100644 --- a/clang/lib/Lex/PPDirectives.cpp +++ b/clang/lib/Lex/PPDirectives.cpp @@ -12,6 +12,8 @@ //===----------------------------------------------------------------------===// #include "clang/Basic/CharInfo.h" +#include "clang/Basic/DiagnosticLex.h" +#include "clang/Basic/DiagnosticParse.h" #include "clang/Basic/DirectoryEntry.h" #include "clang/Basic/FileManager.h" #include "clang/Basic/IdentifierTable.h" @@ -20,10 +22,9 @@ #include "clang/Basic/SourceLocation.h" #include "clang/Basic/SourceManager.h" #include "clang/Basic/TokenKinds.h" +#include "clang/Frontend/Utils.h" #include "clang/Lex/CodeCompletionHandler.h" #include "clang/Lex/HeaderSearch.h" -#include "clang/Lex/HeaderSearchOptions.h" -#include "clang/Lex/LexDiagnostic.h" #include "clang/Lex/LiteralSupport.h" #include "clang/Lex/MacroInfo.h" #include "clang/Lex/ModuleLoader.h" @@ -39,16 +40,15 @@ #include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" -#include "llvm/Support/AlignOf.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Path.h" #include "llvm/Support/SaveAndRestore.h" #include <algorithm> #include <cassert> #include <cstring> -#include <new> #include <optional> #include <string> #include <utility> @@ -1242,6 +1242,11 @@ void Preprocessor::HandleDirective(Token &Result) { // Handle -imacros. return HandleIncludeMacrosDirective(SavedHash.getLocation(), Result); + // C23 6.10.2 - Binary resource inclusion + case tok::pp_embed: + // Handle #embed. + return HandleEmbedDirective(SavedHash.getLocation(), Result); + // C99 6.10.3 - Macro Replacement. case tok::pp_define: return HandleDefineDirective(Result, ImmediatelyAfterTopLevelIfndef); @@ -2014,6 +2019,214 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc, } } +void Preprocessor::HandleEmbedDirective(SourceLocation HashLoc, Token &EmbedTok, + ConstSearchDirIterator LookupFrom, + const FileEntry *LookupFromFile) { + if (!getLangOpts().C23) { + Diag(EmbedTok, diag::ext_c23_feature) + << EmbedTok.getIdentifierInfo()->getNameStart(); + } + + Token FilenameTok; + if (LexHeaderName(FilenameTok)) + return; + + if (FilenameTok.isNot(tok::header_name)) { + Diag(FilenameTok.getLocation(), diag::err_pp_expects_filename); + if (FilenameTok.isNot(tok::eod)) + DiscardUntilEndOfDirective(); + return; + } + + // FIXME: Add support for embed parameter sequence. + CheckEndOfDirective(EmbedTok.getIdentifierInfo()->getNameStart()); + + SmallString<128> FilenameBuffer; + StringRef Filename = getSpelling(FilenameTok, FilenameBuffer); + SourceLocation CharEnd = FilenameTok.getEndLoc(); + + CharSourceRange FilenameRange = + CharSourceRange::getCharRange(FilenameTok.getLocation(), CharEnd); + bool isAngled = + GetIncludeFilenameSpelling(FilenameTok.getLocation(), Filename); + + // If GetIncludeFilenameSpelling set the start ptr to null, there was an + // error. + if (Filename.empty()) + return; + + // Search include directories. + bool IsMapped = false; + bool IsFrameworkFound = false; + ConstSearchDirIterator CurDir = nullptr; + SmallString<1024> SearchPath; + SmallString<1024> RelativePath; + // We get the raw path only if we have 'Callbacks' to which we later pass + // the path. + ModuleMap::KnownHeader SuggestedModule; + SourceLocation FilenameLoc = FilenameTok.getLocation(); + StringRef LookupFilename = Filename; + + // Normalize slashes when compiling with -fms-extensions on non-Windows. This + // is unnecessary on Windows since the filesystem there handles backslashes. + SmallString<128> NormalizedPath; + llvm::sys::path::Style BackslashStyle = llvm::sys::path::Style::native; + if (is_style_posix(BackslashStyle) && LangOpts.MicrosoftExt) { + NormalizedPath = Filename.str(); + llvm::sys::path::native(NormalizedPath); + LookupFilename = NormalizedPath; + BackslashStyle = llvm::sys::path::Style::windows; + } + + OptionalFileEntryRef File = LookupHeaderIncludeOrImport( + &CurDir, Filename, FilenameLoc, FilenameRange, FilenameTok, + IsFrameworkFound, false, IsMapped, LookupFrom, LookupFromFile, + LookupFilename, RelativePath, SearchPath, SuggestedModule, isAngled); + if (!File) + return; + + // The #embed file will be considered to be a system header if either it is + // in a system include directory, or if the #embeder is a system include + // header. + SrcMgr::CharacteristicKind FileCharacter = HeaderInfo.getFileDirFlavor(*File); + + // Issue a diagnostic if the name of the file on disk has a different case + // than the one we're about to open. + const bool CheckIncludePathPortability = + !IsMapped && !File->getFileEntry().tryGetRealPathName().empty(); + + if (CheckIncludePathPortability) { + StringRef Name = LookupFilename; + StringRef NameWithoriginalSlashes = Filename; +#if defined(_WIN32) + // Skip UNC prefix if present. (tryGetRealPathName() always + // returns a path with the prefix skipped.) + bool NameWasUNC = Name.consume_front("\\\\?\\"); + NameWithoriginalSlashes.consume_front("\\\\?\\"); +#endif + StringRef RealPathName = File->getFileEntry().tryGetRealPathName(); + SmallVector<StringRef, 16> Components(llvm::sys::path::begin(Name), + llvm::sys::path::end(Name)); +#if defined(_WIN32) + // -Wnonportable-include-path is designed to diagnose includes using + // case even on systems with a case-insensitive file system. + // On Windows, RealPathName always starts with an upper-case drive + // letter for absolute paths, but Name might start with either + // case depending on if `cd c:\foo` or `cd C:\foo` was used in the shell. + // ("foo" will always have on-disk case, no matter which case was + // used in the cd command). To not emit this warning solely for + // the drive letter, whose case is dependent on if `cd` is used + // with upper- or lower-case drive letters, always consider the + // given drive letter case as correct for the purpose of this warning. + SmallString<128> FixedDriveRealPath; + if (llvm::sys::path::is_absolute(Name) && + llvm::sys::path::is_absolute(RealPathName) && + toLowercase(Name[0]) == toLowercase(RealPathName[0]) && + isLowercase(Name[0]) != isLowercase(RealPathName[0])) { + assert(Components.size() >= 3 && "should have drive, backslash, name"); + assert(Components[0].size() == 2 && "should start with drive"); + assert(Components[0][1] == ':' && "should have colon"); + FixedDriveRealPath = (Name.substr(0, 1) + RealPathName.substr(1)).str(); + RealPathName = FixedDriveRealPath; + } +#endif + + if (trySimplifyPath(Components, RealPathName, BackslashStyle)) { + SmallString<128> Path; + Path.reserve(Name.size() + 2); + Path.push_back(isAngled ? '<' : '"'); + + const auto IsSep = [BackslashStyle](char c) { + return llvm::sys::path::is_separator(c, BackslashStyle); + }; + + for (auto Component : Components) { + // On POSIX, Components will contain a single '/' as first element + // exactly if Name is an absolute path. + // On Windows, it will contain "C:" followed by '\' for absolute paths. + // The drive letter is optional for absolute paths on Windows, but + // clang currently cannot process absolute paths in #embed lines that + // don't have a drive. + // If the first entry in Components is a directory separator, + // then the code at the bottom of this loop that keeps the original + // directory separator style copies it. If the second entry is + // a directory separator (the C:\ case), then that separator already + // got copied when the C: was processed and we want to skip that entry. + if (!(Component.size() == 1 && IsSep(Component[0]))) + Path.append(Component); + else if (Path.size() != 1) + continue; + + // Append the separator(s) the user used, or the close quote + if (Path.size() > NameWithoriginalSlashes.size()) { + Path.push_back(isAngled ? '>' : '"'); + continue; + } + assert(IsSep(NameWithoriginalSlashes[Path.size() - 1])); + do + Path.push_back(NameWithoriginalSlashes[Path.size() - 1]); + while (Path.size() <= NameWithoriginalSlashes.size() && + IsSep(NameWithoriginalSlashes[Path.size() - 1])); + } + +#if defined(_WIN32) + // Restore UNC prefix if it was there. + if (NameWasUNC) + Path = (Path.substr(0, 1) + "\\\\?\\" + Path.substr(1)).str(); +#endif + + // For user files and known standard headers, issue a diagnostic. + // For other system headers, don't. They can be controlled separately. + auto DiagId = + (FileCharacter == SrcMgr::C_User || warnByDefaultOnWrongCase(Name)) + ? diag::pp_nonportable_path + : diag::pp_nonportable_system_path; + Diag(FilenameTok, DiagId) + << Path << FixItHint::CreateReplacement(FilenameRange, Path); + } + } + + // Look up the file, create a File ID for it. + SourceLocation EmbedPos = FilenameTok.getLocation(); + // If the filename string was the result of macro expansions, set the embed + // position on the file where it will be embedded and after the expansions. + if (EmbedPos.isMacroID()) + EmbedPos = SourceMgr.getExpansionRange(EmbedPos).getEnd(); + FileID FID = SourceMgr.createFileID(*File, EmbedPos, FileCharacter); + if (!FID.isValid()) { + TheModuleLoader.HadFatalFailure = true; + assert(TheModuleLoader.HadFatalFailure && + "This should be an early exit only to a fatal error"); + TheModuleLoader.HadFatalFailure = true; + EmbedTok.setKind(tok::eof); + CurLexer->cutOffLexing(); + return; + } + + const auto FileBuffer = getFileManager().getBufferForFile(*File); + if (!FileBuffer) + return; + const auto Buffer = FileBuffer.get()->getBuffer(); + if (Buffer.empty()) + return; + + std::string ProcessedBuffer = R"( + #pragma clang diagnostic push + #pragma clang diagnostic ignored "-Wc++11-narrowing" + )"; + for (size_t i = 0; i < Buffer.size(); ++i) { + auto c = Buffer[i]; + ProcessedBuffer += "0x" + llvm::toHex(c) + ","; + if ((i + 1) % 16 == 0) + ProcessedBuffer += "\n"; + } + ProcessedBuffer += "\n#pragma clang diagnostic pop\n"; + llvm::MemoryBufferRef Buf( + StringRef(ProcessedBuffer).copy(getPreprocessorAllocator()), Filename); + EnterSourceFile(SourceMgr.createFileID(Buf), CurDir, + FilenameTok.getLocation()); +} + OptionalFileEntryRef Preprocessor::LookupHeaderIncludeOrImport( ConstSearchDirIterator *CurDir, StringRef &Filename, SourceLocation FilenameLoc, CharSourceRange FilenameRange, `````````` </details> https://github.com/llvm/llvm-project/pull/76480 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits